aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorprimorial <primorial@yandex-team.ru>2022-02-10 16:48:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:04 +0300
commit6affe5bbe9dfa9a07e706fccf93328f5dbc69e18 (patch)
tree37fca8df48e73378bc9f00b56d5798500045ae36
parente232fc1c53d9880cb57b47c5186b8a11700944bf (diff)
downloadydb-6affe5bbe9dfa9a07e706fccf93328f5dbc69e18.tar.gz
Restoring authorship annotation for <primorial@yandex-team.ru>. Commit 1 of 2.
-rw-r--r--build/rules/contrib_restricted.policy4
-rw-r--r--contrib/libs/apache/arrow/README.md50
-rw-r--r--contrib/libs/apache/arrow/cpp/CHANGELOG_PARQUET.md1002
-rw-r--r--contrib/libs/apache/arrow/cpp/README.md68
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array.h64
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc586
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h488
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc204
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h468
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc94
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h100
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc502
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h246
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc1510
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.h1046
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.cc198
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h248
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.cc760
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h366
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc272
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h466
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc370
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h1206
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc140
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h126
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc406
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h1070
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc528
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h868
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc270
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h880
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_time.h86
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc238
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h368
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc814
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.h84
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc614
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/data.h488
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/dict_internal.h386
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/diff.cc1568
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/diff.h152
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc1036
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/util.h134
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc678
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h68
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/buffer.cc414
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/buffer.h980
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h768
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/builder.cc440
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/builder.h64
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/c/abi.h206
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc3384
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.h394
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/c/helpers.h234
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/c/util_internal.h170
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc532
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h490
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/compare.cc1566
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/compare.h216
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/config.cc84
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/config.h112
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/datum.cc448
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/datum.h504
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/device.cc418
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/device.h452
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc338
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/extension_type.h322
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc960
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h328
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc328
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h200
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc880
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h230
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/concurrency.h526
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc1534
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/file.h440
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc822
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h526
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc762
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h388
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/mman.h338
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/slow.cc296
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/slow.h236
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc298
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h112
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h104
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h110
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc742
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h346
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc1592
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h276
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc1742
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h1060
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc2866
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h420
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc82
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h190
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc3216
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h970
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h124
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/util.h82
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc2708
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h846
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc1028
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h340
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc1282
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h242
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc612
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h448
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/result.cc72
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/result.h922
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/result_internal.h44
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc1176
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/scalar.h1026
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.cc956
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.h1248
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/status.cc278
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/status.h874
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/stl_allocator.h306
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/symbols.map76
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/table.cc1256
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/table.h586
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc220
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/table_builder.h220
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc512
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor.h476
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter.h134
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter_internal.h176
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc660
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc576
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc480
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/type.cc4192
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/type.h3530
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h1230
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h1846
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/align_util.h136
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/atomic_shared_ptr.h222
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/base64.h68
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc1318
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h358
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc140
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h730
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc106
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h324
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc142
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h634
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc130
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h588
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.cc144
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.h86
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h198
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc658
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h314
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h170
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h366
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc342
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h62
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking_default.h8502
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/checked_cast.h122
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compare.h124
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc462
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h358
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc480
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_internal.h160
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc900
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc198
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc996
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc492
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc924
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h248
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc1216
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h348
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc282
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h294
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/dispatch.h230
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/double_conversion.h64
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc146
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h820
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h164
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc590
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/future.h506
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/hash_util.h132
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h1742
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h84
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc1712
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h192
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h250
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc3034
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h612
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h820
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc538
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h192
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc466
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h466
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/macros.h370
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/make_unique.h84
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/memory.cc148
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/memory.h86
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.cc108
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h126
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/optional.h56
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h116
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/range.h310
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/simd.h100
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/sort.h156
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc326
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/string.h140
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.cc80
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.h168
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h76
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc280
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h142
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc614
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h344
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/time.cc136
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/time.h164
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/trie.cc422
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h482
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h60
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h90
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/ubsan.h176
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc538
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h190
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc316
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h688
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc164
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h1376
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h52
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h142
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/visibility.h90
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/windows_compatibility.h84
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h90
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/base64.cpp256
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime.h52
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/date.h15898
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.cpp7746
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.h5608
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz_private.h638
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/visibility.h52
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/musl/strptime.c474
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h2140
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/string_view.hpp3062
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/strptime.h70
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/checked.h666
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/core.h674
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/cpp11.h206
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc332
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/visitor.h298
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h884
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/File_generated.h400
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/Message_generated.h1318
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/Schema_generated.h4530
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/SparseTensor_generated.h1826
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/Tensor_generated.h774
-rw-r--r--contrib/libs/apache/arrow/cpp/src/generated/feather_generated.h1726
-rw-r--r--contrib/libs/apache/arrow/src/arrow/util/config.h68
-rw-r--r--contrib/libs/apache/arrow/ya.make78
257 files changed, 91264 insertions, 91264 deletions
diff --git a/build/rules/contrib_restricted.policy b/build/rules/contrib_restricted.policy
index a83ead19043..fc92d931c5b 100644
--- a/build/rules/contrib_restricted.policy
+++ b/build/rules/contrib_restricted.policy
@@ -38,9 +38,9 @@ ALLOW passport/infra -> contrib/restricted/thrift
# keyutils is LGPL: CONTRIB-2236
ALLOW passport/infra -> contrib/restricted/keyutils
-# For Apache Arrow: CONTRIB-1662
+# For Apache Arrow: CONTRIB-1662
ALLOW mds -> contrib/restricted/uriparser
-
+
# https://st.yandex-team.ru/CONTRIB-2020
ALLOW weather -> contrib/restricted/range-v3
diff --git a/contrib/libs/apache/arrow/README.md b/contrib/libs/apache/arrow/README.md
index 7d10b81c6e4..5a35fe3b014 100644
--- a/contrib/libs/apache/arrow/README.md
+++ b/contrib/libs/apache/arrow/README.md
@@ -1,35 +1,35 @@
-<!---
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
# Apache Arrow
-
+
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/arrow.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:arrow)
[![License](http://img.shields.io/:license-Apache%202-blue.svg)](https://github.com/apache/arrow/blob/master/LICENSE.txt)
[![Twitter Follow](https://img.shields.io/twitter/follow/apachearrow.svg?style=social&label=Follow)](https://twitter.com/apachearrow)
-
+
## Powering In-Memory Analytics
-
+
Apache Arrow is a development platform for in-memory analytics. It contains a
set of technologies that enable big data systems to process and move data fast.
-
+
Major components of the project include:
-
+
- [The Arrow Columnar In-Memory Format](https://github.com/apache/arrow/blob/master/docs/source/format/Columnar.rst):
a standard and efficient in-memory representation of various datatypes, plain or nested
- [The Arrow IPC Format](https://github.com/apache/arrow/blob/master/docs/source/format/Columnar.rst#serialization-and-interprocess-communication-ipc):
@@ -52,7 +52,7 @@ Major components of the project include:
- [R libraries](https://github.com/apache/arrow/tree/master/r)
- [Ruby libraries](https://github.com/apache/arrow/tree/master/ruby)
- [Rust libraries](https://github.com/apache/arrow-rs)
-
+
Arrow is an [Apache Software Foundation](https://www.apache.org) project. Learn more at
[arrow.apache.org](https://arrow.apache.org).
diff --git a/contrib/libs/apache/arrow/cpp/CHANGELOG_PARQUET.md b/contrib/libs/apache/arrow/cpp/CHANGELOG_PARQUET.md
index 06a09c20f0e..79e305daf1c 100644
--- a/contrib/libs/apache/arrow/cpp/CHANGELOG_PARQUET.md
+++ b/contrib/libs/apache/arrow/cpp/CHANGELOG_PARQUET.md
@@ -1,501 +1,501 @@
-Parquet C++ 1.5.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-979] - [C++] Limit size of min, max or disable stats for long binary types
- * [PARQUET-1071] - [C++] parquet::arrow::FileWriter::Close is not idempotent
- * [PARQUET-1349] - [C++] PARQUET_RPATH_ORIGIN is not picked by the build
- * [PARQUET-1334] - [C++] memory_map parameter seems missleading in parquet file opener
- * [PARQUET-1333] - [C++] Reading of files with dictionary size 0 fails on Windows with bad_alloc
- * [PARQUET-1283] - [C++] FormatStatValue appends trailing space to string and int96
- * [PARQUET-1270] - [C++] Executable tools do not get installed
- * [PARQUET-1272] - [C++] ScanFileContents reports wrong row count for nested columns
- * [PARQUET-1268] - [C++] Conversion of Arrow null list columns fails
- * [PARQUET-1255] - [C++] Exceptions thrown in some tests
- * [PARQUET-1358] - [C++] index_page_offset should be unset as it is not supported.
- * [PARQUET-1357] - [C++] FormatStatValue truncates binary statistics on zero character
- * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for MacOS
- * [PARQUET-1313] - [C++] Compilation failure with VS2017
- * [PARQUET-1315] - [C++] ColumnChunkMetaData.has_dictionary_page() should return bool, not int64_t
- * [PARQUET-1307] - [C++] memory-test fails with latest Arrow
- * [PARQUET-1274] - [Python] SegFault in pyarrow.parquet.write_table with specific options
- * [PARQUET-1209] - locally defined symbol ... imported in function ..
- * [PARQUET-1245] - [C++] Segfault when writing Arrow table with duplicate columns
- * [PARQUET-1273] - [Python] Error writing to partitioned Parquet dataset
- * [PARQUET-1384] - [C++] Clang compiler warnings in bloom_filter-test.cc
-
-## Improvement
- * [PARQUET-1348] - [C++] Allow Arrow FileWriter To Write FileMetaData
- * [PARQUET-1346] - [C++] Protect against null values data in empty Arrow array
- * [PARQUET-1340] - [C++] Fix Travis Ci valgrind errors related to std::random_device
- * [PARQUET-1323] - [C++] Fix compiler warnings with clang-6.0
- * [PARQUET-1279] - Use ASSERT_NO_FATAIL_FAILURE in C++ unit tests
- * [PARQUET-1262] - [C++] Use the same BOOST_ROOT and Boost_NAMESPACE for Thrift
- * [PARQUET-1267] - replace "unsafe" std::equal by std::memcmp
- * [PARQUET-1360] - [C++] Minor API + style changes follow up to PARQUET-1348
- * [PARQUET-1166] - [API Proposal] Add GetRecordBatchReader in parquet/arrow/reader.h
- * [PARQUET-1378] - [c++] Allow RowGroups with zero rows to be written
- * [PARQUET-1256] - [C++] Add --print-key-value-metadata option to parquet_reader tool
- * [PARQUET-1276] - [C++] Reduce the amount of memory used for writing null decimal values
-
-## New Feature
- * [PARQUET-1392] - [C++] Supply row group indices to parquet::arrow::FileReader::ReadTable
-
-## Sub-task
- * [PARQUET-1227] - Thrift crypto metadata structures
- * [PARQUET-1332] - [C++] Add bloom filter utility class
-
-## Task
- * [PARQUET-1350] - [C++] Use abstract ResizableBuffer instead of concrete PoolBuffer
- * [PARQUET-1366] - [C++] Streamline use of Arrow bit-util.h
- * [PARQUET-1308] - [C++] parquet::arrow should use thread pool, not ParallelFor
- * [PARQUET-1382] - [C++] Prepare for arrow::test namespace removal
- * [PARQUET-1372] - [C++] Add an API to allow writing RowGroups based on their size rather than num_rows
-
-
-Parquet C++ 1.4.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-1193] - [CPP] Implement ColumnOrder to support min_value and max_value
- * [PARQUET-1180] - C++: Fix behaviour of num_children element of primitive nodes
- * [PARQUET-1146] - C++: Add macOS-compatible sha512sum call to release verify script
- * [PARQUET-1167] - [C++] FieldToNode function should return a status when throwing an exception
- * [PARQUET-1175] - [C++] Fix usage of deprecated Arrow API
- * [PARQUET-1113] - [C++] Incorporate fix from ARROW-1601 on bitmap read path
- * [PARQUET-1111] - dev/release/verify-release-candidate has stale help
- * [PARQUET-1109] - C++: Update release verification script to SHA512
- * [PARQUET-1179] - [C++] Support Apache Thrift 0.11
- * [PARQUET-1226] - [C++] Fix new build warnings with clang 5.0
- * [PARQUET-1233] - [CPP ]Enable option to switch between stl classes and boost classes for thrift header
- * [PARQUET-1205] - Fix msvc static build
- * [PARQUET-1210] - [C++] Boost 1.66 compilation fails on Windows on linkage stage
-
-## Improvement
- * [PARQUET-1092] - [C++] Write Arrow tables with chunked columns
- * [PARQUET-1086] - [C++] Remove usage of arrow/util/compiler-util.h after 1.3.0 release
- * [PARQUET-1097] - [C++] Account for Arrow API deprecation in ARROW-1511
- * [PARQUET-1150] - C++: Hide statically linked boost symbols
- * [PARQUET-1151] - [C++] Add build options / configuration to use static runtime libraries with MSVC
- * [PARQUET-1147] - [C++] Account for API deprecation / change in ARROW-1671
- * [PARQUET-1162] - C++: Update dev/README after migration to Gitbox
- * [PARQUET-1165] - [C++] Pin clang-format version to 4.0
- * [PARQUET-1164] - [C++] Follow API changes in ARROW-1808
- * [PARQUET-1177] - [C++] Add more extensive compiler warnings when using Clang
- * [PARQUET-1110] - [C++] Release verification script for Windows
- * [PARQUET-859] - [C++] Flatten parquet/file directory
- * [PARQUET-1220] - [C++] Don't build Thrift examples and tutorials in the ExternalProject
- * [PARQUET-1219] - [C++] Update release-candidate script links to gitbox
- * [PARQUET-1196] - [C++] Provide a parquet_arrow example project incl. CMake setup
- * [PARQUET-1200] - [C++] Support reading a single Arrow column from a Parquet file
-
-## New Feature
- * [PARQUET-1095] - [C++] Read and write Arrow decimal values
- * [PARQUET-970] - Add Add Lz4 and Zstd compression codecs
-
-## Task
- * [PARQUET-1221] - [C++] Extend release README
- * [PARQUET-1225] - NaN values may lead to incorrect filtering under certain circumstances
-
-
-Parquet C++ 1.3.1
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-1105] - [CPP] Remove libboost_system dependency
- * [PARQUET-1138] - [C++] Fix compilation with Arrow 0.7.1
- * [PARQUET-1123] - [C++] Update parquet-cpp to use Arrow's AssertArraysEqual
- * [PARQUET-1121] - C++: DictionaryArrays of NullType cannot be written
- * [PARQUET-1139] - Add license to cmake_modules/parquet-cppConfig.cmake.in
-
-## Improvement
- * [PARQUET-1140] - [C++] Fail on RAT errors in CI
- * [PARQUET-1070] - Add CPack support to the build
-
-
-Parquet C++ 1.3.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-1098] - [C++] Install new header in parquet/util
- * [PARQUET-1085] - [C++] Backwards compatibility from macro cleanup in transitive dependencies in ARROW-1452
- * [PARQUET-1074] - [C++] Switch to long key ids in KEYs file
- * [PARQUET-1075] - C++: Coverage upload is broken
- * [PARQUET-1088] - [CPP] remove parquet_version.h from version control since it gets auto generated
- * [PARQUET-1002] - [C++] Compute statistics based on Logical Types
- * [PARQUET-1100] - [C++] Reading repeated types should decode number of records rather than number of values
- * [PARQUET-1090] - [C++] Fix int32 overflow in Arrow table writer, add max row group size property
- * [PARQUET-1108] - [C++] Fix Int96 comparators
-
-## Improvement
- * [PARQUET-1104] - [C++] Upgrade to Apache Arrow 0.7.0 RC0
- * [PARQUET-1072] - [C++] Add ARROW_NO_DEPRECATED_API to CI to check for deprecated API use
- * [PARQUET-1096] - C++: Update sha{1, 256, 512} checksums per latest ASF release policy
- * [PARQUET-1079] - [C++] Account for Arrow API change in ARROW-1335
- * [PARQUET-1087] - [C++] Add wrapper for ScanFileContents in parquet::arrow that catches exceptions
- * [PARQUET-1093] - C++: Improve Arrow level generation error message
- * [PARQUET-1094] - C++: Add benchmark for boolean Arrow column I/O
- * [PARQUET-1083] - [C++] Refactor core logic in parquet-scan.cc so that it can be used as a library function for benchmarking
- * [PARQUET-1037] - Allow final RowGroup to be unfilled
-
-## New Feature
- * [PARQUET-1078] - [C++] Add Arrow writer option to coerce timestamps to milliseconds or microseconds
- * [PARQUET-929] - [C++] Handle arrow::DictionaryArray when writing Arrow data
-
-
-Parquet C++ 1.2.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-1029] - [C++] TypedColumnReader/TypeColumnWriter symbols are no longer being exported
- * [PARQUET-997] - Fix override compiler warnings
- * [PARQUET-1033] - Mismatched Read and Write
- * [PARQUET-1007] - [C++ ] Update parquet.thrift from https://github.com/apache/parquet-format
- * [PARQUET-1039] - PARQUET-911 Breaks Arrow
- * [PARQUET-1038] - Key value metadata should be nullptr if not set
- * [PARQUET-1018] - [C++] parquet.dll has runtime dependencies on one or more libraries in the build toolchain
- * [PARQUET-1003] - [C++] Modify DEFAULT_CREATED_BY value for every new release version
- * [PARQUET-1004] - CPP Building fails on windows
- * [PARQUET-1040] - Missing writer method implementations
- * [PARQUET-1054] - [C++] Account for Arrow API changes in ARROW-1199
- * [PARQUET-1042] - C++: Compilation breaks on GCC 4.8
- * [PARQUET-1048] - [C++] Static linking of libarrow is no longer supported
- * [PARQUET-1013] - Fix ZLIB_INCLUDE_DIR
- * [PARQUET-998] - C++: Release script is not usable
- * [PARQUET-1023] - [C++] Brotli libraries are not being statically linked on Windows
- * [PARQUET-1000] - [C++] Do not build thirdparty Arrow with /WX on MSVC
- * [PARQUET-1052] - [C++] add_compiler_export_flags() throws warning with CMake >= 3.3
- * [PARQUET-1069] - C++: ./dev/release/verify-release-candidate is broken due to missing Arrow dependencies
-
-## Improvement
- * [PARQUET-996] - Improve MSVC build - ThirdpartyToolchain - Arrow
- * [PARQUET-911] - C++: Support nested structs in parquet_arrow
- * [PARQUET-986] - Improve MSVC build - ThirdpartyToolchain - Thrift
- * [PARQUET-864] - [C++] Consolidate non-Parquet-specific bit utility code into Apache Arrow
- * [PARQUET-1043] - [C++] Raise minimum supported CMake version to 3.2
- * [PARQUET-1016] - Upgrade thirdparty Arrow to 0.4.0
- * [PARQUET-858] - [C++] Flatten parquet/column directory, consolidate related code
- * [PARQUET-978] - [C++] Minimizing footer reads for small(ish) metadata
- * [PARQUET-991] - [C++] Fix compiler warnings on MSVC and build with /WX in Appveyor
- * [PARQUET-863] - [C++] Move SIMD, CPU info, hashing, and other generic utilities into Apache Arrow
- * [PARQUET-1053] - Fix unused result warnings due to unchecked Statuses
- * [PARQUET-1067] - C++: Update arrow hash to 0.5.0
- * [PARQUET-1041] - C++: Support Arrow's NullArray
- * [PARQUET-1008] - Update TypedColumnReader::ReadBatch method to accept batch_size as int64_t
- * [PARQUET-1044] - [C++] Use compression libraries from Apache Arrow
- * [PARQUET-999] - Improve MSVC build - Enable PARQUET_BUILD_BENCHMARKS
- * [PARQUET-967] - [C++] Combine libparquet/libparquet_arrow libraries
- * [PARQUET-1045] - [C++] Refactor to account for computational utility code migration in ARROW-1154
-
-## New Feature
- * [PARQUET-1035] - Write Int96 from Arrow Timestamp(ns)
-
-## Task
- * [PARQUET-994] - C++: release-candidate script should not push to master
- * [PARQUET-902] - [C++] Move compressor interfaces into Apache Arrow
-
-## Test
- * [PARQUET-706] - [C++] Create test case that uses libparquet as a 3rd party library
-
-
-Parquet C++ 1.1.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-898] - [C++] Change Travis CI OS X image to Xcode 6.4 and fix our thirdparty build
- * [PARQUET-976] - [C++] Pass unit test suite with MSVC, build in Appveyor
- * [PARQUET-963] - [C++] Disallow reading struct types in Arrow reader for now
- * [PARQUET-959] - [C++] Arrow thirdparty build fails on multiarch systems
- * [PARQUET-962] - [C++] GTEST_MAIN_STATIC_LIB is not defined in FindGTest.cmake
- * [PARQUET-958] - [C++] Print Parquet metadata in JSON format
- * [PARQUET-956] - C++: BUILD_BYPRODUCTS not specified anymore for gtest
- * [PARQUET-948] - [C++] Account for API changes in ARROW-782
- * [PARQUET-947] - [C++] Refactor to account for ARROW-795 Arrow core library consolidation
- * [PARQUET-965] - [C++] FIXED_LEN_BYTE_ARRAY types are unhandled in the Arrow reader
- * [PARQUET-949] - [C++] Arrow version pinning seems to not be working properly
- * [PARQUET-955] - [C++] pkg_check_modules will override $ARROW_HOME if it is set in the environment
- * [PARQUET-945] - [C++] Thrift static libraries are not used with recent patch
- * [PARQUET-943] - [C++] Overflow build error on x86
- * [PARQUET-938] - [C++] There is a typo in cmake_modules/FindSnappy.cmake comment
- * [PARQUET-936] - [C++] parquet::arrow::WriteTable can enter infinite loop if chunk_size is 0
- * [PARQUET-981] - Repair usage of *_HOME 3rd party dependencies environment variables during Windows build
- * [PARQUET-992] - [C++] parquet/compression.h leaks zlib.h
- * [PARQUET-987] - [C++] Fix regressions caused by PARQUET-981
- * [PARQUET-933] - [C++] Account for Arrow Table API changes coming in ARROW-728
- * [PARQUET-915] - Support Arrow Time Types in Schema
- * [PARQUET-914] - [C++] Throw more informative exception when user writes too many values to a column in a row group
- * [PARQUET-923] - [C++] Account for Time metadata changes in ARROW-686
- * [PARQUET-918] - FromParquetSchema API crashes on nested schemas
- * [PARQUET-925] - [C++] FindArrow.cmake sets the wrong library path after ARROW-648
- * [PARQUET-932] - [c++] Add option to build parquet library with minimal dependency
- * [PARQUET-919] - [C++] Account for API changes in ARROW-683
- * [PARQUET-995] - [C++] Int96 reader in parquet_arrow uses size of Int96Type instead of Int96
-
-## Improvement
- * [PARQUET-508] - Add ParquetFilePrinter
- * [PARQUET-595] - Add API for key-value metadata
- * [PARQUET-897] - [C++] Only use designated public headers from libarrow
- * [PARQUET-679] - [C++] Build and unit tests support for MSVC on Windows
- * [PARQUET-977] - Improve MSVC build
- * [PARQUET-957] - [C++] Add optional $PARQUET_BUILD_TOOLCHAIN environment variable option for configuring build environment
- * [PARQUET-961] - [C++] Strip debug symbols from libparquet libraries in release builds by default
- * [PARQUET-954] - C++: Use Brolti 0.6 release
- * [PARQUET-953] - [C++] Change arrow::FileWriter API to be initialized from a Schema, and provide for writing multiple tables
- * [PARQUET-941] - [C++] Stop needless Boost static library detection for CentOS 7 support
- * [PARQUET-942] - [C++] Fix wrong variabe use in FindSnappy
- * [PARQUET-939] - [C++] Support Thrift_HOME CMake variable like FindSnappy does as Snappy_HOME
- * [PARQUET-940] - [C++] Fix Arrow library path detection
- * [PARQUET-937] - [C++] Support CMake < 3.4 again for Arrow detection
- * [PARQUET-935] - [C++] Set shared library version for .deb packages
- * [PARQUET-934] - [C++] Support multiarch on Debian
- * [PARQUET-984] - C++: Add abi and so version to pkg-config
- * [PARQUET-983] - C++: Update Thirdparty hash to Arrow 0.3.0
- * [PARQUET-989] - [C++] Link dynamically to libarrow in toolchain build, set LD_LIBRARY_PATH
- * [PARQUET-988] - [C++] Add Linux toolchain-based build to Travis CI
- * [PARQUET-928] - [C++] Support pkg-config
- * [PARQUET-927] - [C++] Specify shared library version of Apache Arrow
- * [PARQUET-931] - [C++] Add option to pin thirdparty Arrow version used in ExternalProject
- * [PARQUET-926] - [C++] Use pkg-config to find Apache Arrow
- * [PARQUET-917] - C++: Build parquet_arrow by default
- * [PARQUET-910] - C++: Support TIME logical type in parquet_arrow
- * [PARQUET-909] - [CPP]: Reduce buffer allocations (mallocs) on critical path
-
-## New Feature
- * [PARQUET-853] - [C++] Add option to link with shared boost libraries when building Arrow in the thirdparty toolchain
- * [PARQUET-946] - [C++] Refactoring in parquet::arrow::FileReader to be able to read a single row group
- * [PARQUET-930] - [C++] Account for all Arrow date/time types
-
-
-Parquet C++ 1.0.0
---------------------------------------------------------------------------------
-## Bug
- * [PARQUET-455] - Fix compiler warnings on OS X / Clang
- * [PARQUET-558] - Support ZSH in build scripts
- * [PARQUET-720] - Parquet-cpp fails to link when included in multiple TUs
- * [PARQUET-718] - Reading boolean pages written by parquet-cpp fails
- * [PARQUET-640] - [C++] Force the use of gcc 4.9 in conda builds
- * [PARQUET-643] - Add const modifier to schema pointer reference in ParquetFileWriter
- * [PARQUET-672] - [C++] Build testing conda artifacts in debug mode
- * [PARQUET-661] - [C++] Do not assume that perl is found in /usr/bin
- * [PARQUET-659] - [C++] Instantiated template visibility is broken on clang / OS X
- * [PARQUET-657] - [C++] Don't define DISALLOW_COPY_AND_ASSIGN if already defined
- * [PARQUET-656] - [C++] Revert PARQUET-653
- * [PARQUET-676] - MAX_VALUES_PER_LITERAL_RUN causes RLE encoding failure
- * [PARQUET-614] - C++: Remove unneeded LZ4-related code
- * [PARQUET-604] - Install writer.h headers
- * [PARQUET-621] - C++: Uninitialised DecimalMetadata is read
- * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metdata writes
- * [PARQUET-599] - ColumnWriter::RleEncodeLevels' size estimation might be wrong
- * [PARQUET-617] - C++: Enable conda build to work on systems with non-default C++ toolchains
- * [PARQUET-627] - Ensure that thrift headers are generated before source compilation
- * [PARQUET-745] - TypedRowGroupStatistics fails to PlainDecode min and max in ByteArrayType
- * [PARQUET-738] - Update arrow version that also supports newer Xcode
- * [PARQUET-747] - [C++] TypedRowGroupStatistics are not being exported in libparquet.so
- * [PARQUET-711] - Use metadata builders in parquet writer
- * [PARQUET-732] - Building a subset of dependencies does not work
- * [PARQUET-760] - On switching from dictionary to the fallback encoding, an incorrect encoding is set
- * [PARQUET-691] - [C++] Write ColumnChunk metadata after each column chunk in the file
- * [PARQUET-797] - [C++] Update for API changes in ARROW-418
- * [PARQUET-837] - [C++] SerializedFile::ParseMetaData uses Seek, followed by Read, and could have race conditions
- * [PARQUET-827] - [C++] Incorporate addition of arrow::MemoryPool::Reallocate
- * [PARQUET-502] - Scanner segfaults when its batch size is smaller than the number of rows
- * [PARQUET-469] - Roll back Thrift bindings to 0.9.0
- * [PARQUET-889] - Fix compilation when PARQUET_USE_SSE is on
- * [PARQUET-888] - C++ Memory leak in RowGroupSerializer
- * [PARQUET-819] - C++: Trying to install non-existing parquet/arrow/utils.h
- * [PARQUET-736] - XCode 8.0 breaks builds
- * [PARQUET-505] - Column reader: automatically handle large data pages
- * [PARQUET-615] - C++: Building static or shared libparquet should not be mutually exclusive
- * [PARQUET-658] - ColumnReader has no virtual destructor
- * [PARQUET-799] - concurrent usage of the file reader API
- * [PARQUET-513] - Valgrind errors are not failing the Travis CI build
- * [PARQUET-841] - [C++] Writing wrong format version when using ParquetVersion::PARQUET_1_0
- * [PARQUET-742] - Add missing license headers
- * [PARQUET-741] - compression_buffer_ is reused although it shouldn't
- * [PARQUET-700] - C++: Disable dictionary encoding for boolean columns
- * [PARQUET-662] - [C++] ParquetException must be explicitly exported in dynamic libraries
- * [PARQUET-704] - [C++] scan-all.h is not being installed
- * [PARQUET-865] - C++: Pass all CXXFLAGS to Thrift ExternalProject
- * [PARQUET-875] - [C++] Fix coveralls build given changes to thirdparty build procedure
- * [PARQUET-709] - [C++] Fix conda dev binary builds
- * [PARQUET-638] - [C++] Revert static linking of libstdc++ in conda builds until symbol visibility addressed
- * [PARQUET-606] - Travis coverage is broken
- * [PARQUET-880] - [CPP] Prevent destructors from throwing
- * [PARQUET-886] - [C++] Revise build documentation and requirements in README.md
- * [PARQUET-900] - C++: Fix NOTICE / LICENSE issues
- * [PARQUET-885] - [C++] Do not search for Thrift in default system paths
- * [PARQUET-879] - C++: ExternalProject compilation for Thrift fails on older CMake versions
- * [PARQUET-635] - [C++] Statically link libstdc++ on Linux in conda recipe
- * [PARQUET-710] - Remove unneeded private member variables from RowGroupReader ABI
- * [PARQUET-766] - C++: Expose ParquetFileReader through Arrow reader as const
- * [PARQUET-876] - C++: Correct snapshot version
- * [PARQUET-821] - [C++] zlib download link is broken
- * [PARQUET-818] - [C++] Refactor library to share IO, Buffer, and memory management abstractions with Apache Arrow
- * [PARQUET-537] - LocalFileSource leaks resources
- * [PARQUET-764] - [CPP] Parquet Writer does not write Boolean values correctly
- * [PARQUET-812] - [C++] Failure reading BYTE_ARRAY data from file in parquet-compatibility project
- * [PARQUET-759] - Cannot store columns consisting of empty strings
- * [PARQUET-846] - [CPP] CpuInfo::Init() is not thread safe
- * [PARQUET-694] - C++: Revert default data page size back to 1M
- * [PARQUET-842] - [C++] Impala rejects DOUBLE columns if decimal metadata is set
- * [PARQUET-708] - [C++] RleEncoder does not account for "worst case scenario" in MaxBufferSize for bit_width > 1
- * [PARQUET-639] - Do not export DCHECK in public headers
- * [PARQUET-828] - [C++] "version" field set improperly in file metadata
- * [PARQUET-891] - [C++] Do not search for Snappy in default system paths
- * [PARQUET-626] - Fix builds due to unavailable llvm.org apt mirror
- * [PARQUET-629] - RowGroupSerializer should only close itself once
- * [PARQUET-472] - Clean up InputStream ownership semantics in ColumnReader
- * [PARQUET-739] - Rle-decoding uses static buffer that is shared accross threads
- * [PARQUET-561] - ParquetFileReader::Contents PIMPL missing a virtual destructor
- * [PARQUET-892] - [C++] Clean up link library targets in CMake files
- * [PARQUET-454] - Address inconsistencies in boolean decoding
- * [PARQUET-816] - [C++] Failure decoding sample dict-encoded file from parquet-compatibility project
- * [PARQUET-565] - Use PATH instead of DIRECTORY in get_filename_component to support CMake<2.8.12
- * [PARQUET-446] - Hide thrift dependency in parquet-cpp
- * [PARQUET-843] - [C++] Impala unable to read files created by parquet-cpp
- * [PARQUET-555] - Dictionary page metadata handling inconsistencies
- * [PARQUET-908] - Fix for PARQUET-890 introduces undefined symbol in libparquet_arrow.so
- * [PARQUET-793] - [CPP] Do not return incorrect statistics
- * [PARQUET-887] - C++: Fix issues in release scripts arise in RC1
-
-## Improvement
- * [PARQUET-277] - Remove boost dependency
- * [PARQUET-500] - Enable coveralls.io for apache/parquet-cpp
- * [PARQUET-497] - Decouple Parquet physical file structure from FileReader class
- * [PARQUET-597] - Add data rates to benchmark output
- * [PARQUET-522] - #include cleanup with include-what-you-use
- * [PARQUET-515] - Add "Reset" to LevelEncoder and LevelDecoder
- * [PARQUET-514] - Automate coveralls.io updates in Travis CI
- * [PARQUET-551] - Handle compiler warnings due to disabled DCHECKs in release builds
- * [PARQUET-559] - Enable InputStream as a source to the ParquetFileReader
- * [PARQUET-562] - Simplified ZSH support in build scripts
- * [PARQUET-538] - Improve ColumnReader Tests
- * [PARQUET-541] - Portable build scripts
- * [PARQUET-724] - Test more advanced properties setting
- * [PARQUET-641] - Instantiate stringstream only if needed in SerializedPageReader::NextPage
- * [PARQUET-636] - Expose selection for different encodings
- * [PARQUET-603] - Implement missing information in schema descriptor
- * [PARQUET-610] - Print ColumnMetaData for each RowGroup
- * [PARQUET-600] - Add benchmarks for RLE-Level encoding
- * [PARQUET-592] - Support compressed writes
- * [PARQUET-593] - Add API for writing Page statistics
- * [PARQUET-589] - Implement Chunked InMemoryInputStream for better memory usage
- * [PARQUET-587] - Implement BufferReader::Read(int64_t,uint8_t*)
- * [PARQUET-616] - C++: WriteBatch should accept const arrays
- * [PARQUET-630] - C++: Support link flags for older CMake versions
- * [PARQUET-634] - Consistent private linking of dependencies
- * [PARQUET-633] - Add version to WriterProperties
- * [PARQUET-625] - Improve RLE read performance
- * [PARQUET-737] - Use absolute namespace in macros
- * [PARQUET-762] - C++: Use optimistic allocation instead of Arrow Builders
- * [PARQUET-773] - C++: Check licenses with RAT in CI
- * [PARQUET-687] - C++: Switch to PLAIN encoding if dictionary grows too large
- * [PARQUET-784] - C++: Reference Spark, Kudu and FrameOfReference in LICENSE
- * [PARQUET-809] - [C++] Add API to determine if two files' schemas are compatible
- * [PARQUET-778] - Standardize the schema output to match the parquet-mr format
- * [PARQUET-463] - Add DCHECK* macros for assertions in debug builds
- * [PARQUET-471] - Use the same environment setup script for Travis CI as local sandbox development
- * [PARQUET-449] - Update to latest parquet.thrift
- * [PARQUET-496] - Fix cpplint configuration to be more restrictive
- * [PARQUET-468] - Add a cmake option to generate the Parquet thrift headers with the thriftc in the environment
- * [PARQUET-482] - Organize src code file structure to have a very clear folder with public headers.
- * [PARQUET-591] - Page size estimation during writes
- * [PARQUET-518] - Review usages of size_t and unsigned integers generally per Google style guide
- * [PARQUET-533] - Simplify RandomAccessSource API to combine Seek/Read
- * [PARQUET-767] - Add release scripts for parquet-cpp
- * [PARQUET-699] - Update parquet.thrift from https://github.com/apache/parquet-format
- * [PARQUET-653] - [C++] Re-enable -static-libstdc++ in dev artifact builds
- * [PARQUET-763] - C++: Expose ParquetFileReader through Arrow reader
- * [PARQUET-857] - [C++] Flatten parquet/encodings directory
- * [PARQUET-862] - Provide defaut cache size values if CPU info probing is not available
- * [PARQUET-689] - C++: Compress DataPages eagerly
- * [PARQUET-874] - [C++] Use default memory allocator from Arrow
- * [PARQUET-267] - Detach thirdparty code from build configuration.
- * [PARQUET-418] - Add a utility to print contents of a Parquet file to stdout
- * [PARQUET-519] - Disable compiler warning supressions and fix all DEBUG build warnings
- * [PARQUET-447] - Add Debug and Release build types and associated compiler flags
- * [PARQUET-868] - C++: Build snappy with optimizations
- * [PARQUET-894] - Fix compilation warning
- * [PARQUET-883] - C++: Support non-standard gcc version strings
- * [PARQUET-607] - Public Writer header
- * [PARQUET-731] - [CPP] Add API to return metadata size and Skip reading values
- * [PARQUET-628] - Link thrift privately
- * [PARQUET-877] - C++: Update Arrow Hash, update Version in metadata.
- * [PARQUET-547] - Refactor most templates to use DataType structs rather than the Type::type enum
- * [PARQUET-882] - [CPP] Improve Application Version parsing
- * [PARQUET-448] - Add cmake option to skip building the unit tests
- * [PARQUET-721] - Performance benchmarks for reading into Arrow structures
- * [PARQUET-820] - C++: Decoders should directly emit arrays with spacing for null entries
- * [PARQUET-813] - C++: Build dependencies using CMake External project
- * [PARQUET-488] - Add SSE-related cmake options to manage compiler flags
- * [PARQUET-564] - Add option to run unit tests with valgrind --tool=memcheck
- * [PARQUET-572] - Rename parquet_cpp namespace to parquet
- * [PARQUET-829] - C++: Make use of ARROW-469
- * [PARQUET-501] - Add an OutputStream abstraction (capable of memory allocation) for Encoder public API
- * [PARQUET-744] - Clarifications on build instructions
- * [PARQUET-520] - Add version of LocalFileSource that uses memory-mapping for zero-copy reads
- * [PARQUET-556] - Extend RowGroupStatistics to include "min" "max" statistics
- * [PARQUET-671] - Improve performance of RLE/bit-packed decoding in parquet-cpp
- * [PARQUET-681] - Add tool to scan a parquet file
-
-## New Feature
- * [PARQUET-499] - Complete PlainEncoder implementation for all primitive types and test end to end
- * [PARQUET-439] - Conform all copyright headers to ASF requirements
- * [PARQUET-436] - Implement ParquetFileWriter class entry point for generating new Parquet files
- * [PARQUET-435] - Provide vectorized ColumnReader interface
- * [PARQUET-438] - Update RLE encoder/decoder modules from Impala upstream changes and adapt unit tests
- * [PARQUET-512] - Add optional google/benchmark 3rd-party dependency for performance testing
- * [PARQUET-566] - Add method to retrieve the full column path
- * [PARQUET-613] - C++: Add conda packaging recipe
- * [PARQUET-605] - Expose schema node in ColumnDescriptor
- * [PARQUET-619] - C++: Add OutputStream for local files
- * [PARQUET-583] - Implement Parquet to Thrift schema conversion
- * [PARQUET-582] - Conversion functions for Parquet enums to Thrift enums
- * [PARQUET-728] - [C++] Bring parquet::arrow up to date with API changes in arrow::io
- * [PARQUET-752] - [C++] Conform parquet_arrow to upstream API changes
- * [PARQUET-788] - [C++] Reference Impala / Apache Impala (incubating) in LICENSE
- * [PARQUET-808] - [C++] Add API to read file given externally-provided FileMetadata
- * [PARQUET-807] - [C++] Add API to read file metadata only from a file handle
- * [PARQUET-805] - C++: Read Int96 into Arrow Timestamp(ns)
- * [PARQUET-836] - [C++] Add column selection to parquet::arrow::FileReader
- * [PARQUET-835] - [C++] Add option to parquet::arrow to read columns in parallel using a thread pool
- * [PARQUET-830] - [C++] Add additional configuration options to parquet::arrow::OpenFIle
- * [PARQUET-769] - C++: Add support for Brotli Compression
- * [PARQUET-489] - Add visibility macros to be used for public and internal APIs of libparquet
- * [PARQUET-542] - Support memory allocation from external memory
- * [PARQUET-844] - [C++] Consolidate encodings, schema, and compression subdirectories into fewer files
- * [PARQUET-848] - [C++] Consolidate libparquet_thrift subcomponent
- * [PARQUET-646] - [C++] Enable easier 3rd-party toolchain clang builds on Linux
- * [PARQUET-598] - [C++] Test writing all primitive data types
- * [PARQUET-442] - Convert flat SchemaElement vector to implied nested schema data structure
- * [PARQUET-867] - [C++] Support writing sliced Arrow arrays
- * [PARQUET-456] - Add zlib codec support
- * [PARQUET-834] - C++: Support r/w of arrow::ListArray
- * [PARQUET-485] - Decouple data page delimiting from column reader / scanner classes, create test fixtures
- * [PARQUET-434] - Add a ParquetFileReader class to encapsulate some low-level details of interacting with Parquet files
- * [PARQUET-666] - PLAIN_DICTIONARY write support
- * [PARQUET-437] - Incorporate googletest thirdparty dependency and add cmake tools (ADD_PARQUET_TEST) to simplify adding new unit tests
- * [PARQUET-866] - [C++] Account for API changes in ARROW-33
- * [PARQUET-545] - Improve API to support Decimal type
- * [PARQUET-579] - Add API for writing Column statistics
- * [PARQUET-494] - Implement PLAIN_DICTIONARY encoding and decoding
- * [PARQUET-618] - C++: Automatically upload conda build artifacts on commits to master
- * [PARQUET-833] - C++: Provide API to write spaced arrays (e.g. Arrow)
- * [PARQUET-903] - C++: Add option to set RPATH to ORIGIN
- * [PARQUET-451] - Add a RowGroup reader interface class
- * [PARQUET-785] - C++: List conversion for Arrow Schemas
- * [PARQUET-712] - C++: Read into Arrow memory
- * [PARQUET-890] - C++: Support I/O of DATE columns in parquet_arrow
- * [PARQUET-782] - C++: Support writing to Arrow sinks
- * [PARQUET-849] - [C++] Upgrade default Thrift in thirdparty toolchain to 0.9.3 or 0.10
- * [PARQUET-573] - C++: Create a public API for reading and writing file metadata
-
-## Task
- * [PARQUET-814] - C++: Remove Conda recipes
- * [PARQUET-503] - Re-enable parquet 2.0 encodings
- * [PARQUET-169] - Parquet-cpp: Implement support for bulk reading and writing repetition/definition levels.
- * [PARQUET-878] - C++: Remove setup_build_env from rc-verification script
- * [PARQUET-881] - C++: Update Arrow hash to 0.2.0-rc2
- * [PARQUET-771] - C++: Sync KEYS file
- * [PARQUET-901] - C++: Publish RCs in apache-parquet-VERSION in SVN
-
-## Test
- * [PARQUET-525] - Test coverage for malformed file failure modes on the read path
- * [PARQUET-703] - [C++] Validate num_values metadata for columns with nulls
- * [PARQUET-507] - Improve runtime of rle-test.cc
- * [PARQUET-549] - Add scanner and column reader tests for dictionary data pages
- * [PARQUET-457] - Add compressed data page unit tests
+Parquet C++ 1.5.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-979] - [C++] Limit size of min, max or disable stats for long binary types
+ * [PARQUET-1071] - [C++] parquet::arrow::FileWriter::Close is not idempotent
+ * [PARQUET-1349] - [C++] PARQUET_RPATH_ORIGIN is not picked by the build
+ * [PARQUET-1334] - [C++] memory_map parameter seems missleading in parquet file opener
+ * [PARQUET-1333] - [C++] Reading of files with dictionary size 0 fails on Windows with bad_alloc
+ * [PARQUET-1283] - [C++] FormatStatValue appends trailing space to string and int96
+ * [PARQUET-1270] - [C++] Executable tools do not get installed
+ * [PARQUET-1272] - [C++] ScanFileContents reports wrong row count for nested columns
+ * [PARQUET-1268] - [C++] Conversion of Arrow null list columns fails
+ * [PARQUET-1255] - [C++] Exceptions thrown in some tests
+ * [PARQUET-1358] - [C++] index_page_offset should be unset as it is not supported.
+ * [PARQUET-1357] - [C++] FormatStatValue truncates binary statistics on zero character
+ * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for MacOS
+ * [PARQUET-1313] - [C++] Compilation failure with VS2017
+ * [PARQUET-1315] - [C++] ColumnChunkMetaData.has_dictionary_page() should return bool, not int64_t
+ * [PARQUET-1307] - [C++] memory-test fails with latest Arrow
+ * [PARQUET-1274] - [Python] SegFault in pyarrow.parquet.write_table with specific options
+ * [PARQUET-1209] - locally defined symbol ... imported in function ..
+ * [PARQUET-1245] - [C++] Segfault when writing Arrow table with duplicate columns
+ * [PARQUET-1273] - [Python] Error writing to partitioned Parquet dataset
+ * [PARQUET-1384] - [C++] Clang compiler warnings in bloom_filter-test.cc
+
+## Improvement
+ * [PARQUET-1348] - [C++] Allow Arrow FileWriter To Write FileMetaData
+ * [PARQUET-1346] - [C++] Protect against null values data in empty Arrow array
+ * [PARQUET-1340] - [C++] Fix Travis Ci valgrind errors related to std::random_device
+ * [PARQUET-1323] - [C++] Fix compiler warnings with clang-6.0
+ * [PARQUET-1279] - Use ASSERT_NO_FATAIL_FAILURE in C++ unit tests
+ * [PARQUET-1262] - [C++] Use the same BOOST_ROOT and Boost_NAMESPACE for Thrift
+ * [PARQUET-1267] - replace "unsafe" std::equal by std::memcmp
+ * [PARQUET-1360] - [C++] Minor API + style changes follow up to PARQUET-1348
+ * [PARQUET-1166] - [API Proposal] Add GetRecordBatchReader in parquet/arrow/reader.h
+ * [PARQUET-1378] - [c++] Allow RowGroups with zero rows to be written
+ * [PARQUET-1256] - [C++] Add --print-key-value-metadata option to parquet_reader tool
+ * [PARQUET-1276] - [C++] Reduce the amount of memory used for writing null decimal values
+
+## New Feature
+ * [PARQUET-1392] - [C++] Supply row group indices to parquet::arrow::FileReader::ReadTable
+
+## Sub-task
+ * [PARQUET-1227] - Thrift crypto metadata structures
+ * [PARQUET-1332] - [C++] Add bloom filter utility class
+
+## Task
+ * [PARQUET-1350] - [C++] Use abstract ResizableBuffer instead of concrete PoolBuffer
+ * [PARQUET-1366] - [C++] Streamline use of Arrow bit-util.h
+ * [PARQUET-1308] - [C++] parquet::arrow should use thread pool, not ParallelFor
+ * [PARQUET-1382] - [C++] Prepare for arrow::test namespace removal
+ * [PARQUET-1372] - [C++] Add an API to allow writing RowGroups based on their size rather than num_rows
+
+
+Parquet C++ 1.4.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-1193] - [CPP] Implement ColumnOrder to support min_value and max_value
+ * [PARQUET-1180] - C++: Fix behaviour of num_children element of primitive nodes
+ * [PARQUET-1146] - C++: Add macOS-compatible sha512sum call to release verify script
+ * [PARQUET-1167] - [C++] FieldToNode function should return a status when throwing an exception
+ * [PARQUET-1175] - [C++] Fix usage of deprecated Arrow API
+ * [PARQUET-1113] - [C++] Incorporate fix from ARROW-1601 on bitmap read path
+ * [PARQUET-1111] - dev/release/verify-release-candidate has stale help
+ * [PARQUET-1109] - C++: Update release verification script to SHA512
+ * [PARQUET-1179] - [C++] Support Apache Thrift 0.11
+ * [PARQUET-1226] - [C++] Fix new build warnings with clang 5.0
+ * [PARQUET-1233] - [CPP ]Enable option to switch between stl classes and boost classes for thrift header
+ * [PARQUET-1205] - Fix msvc static build
+ * [PARQUET-1210] - [C++] Boost 1.66 compilation fails on Windows on linkage stage
+
+## Improvement
+ * [PARQUET-1092] - [C++] Write Arrow tables with chunked columns
+ * [PARQUET-1086] - [C++] Remove usage of arrow/util/compiler-util.h after 1.3.0 release
+ * [PARQUET-1097] - [C++] Account for Arrow API deprecation in ARROW-1511
+ * [PARQUET-1150] - C++: Hide statically linked boost symbols
+ * [PARQUET-1151] - [C++] Add build options / configuration to use static runtime libraries with MSVC
+ * [PARQUET-1147] - [C++] Account for API deprecation / change in ARROW-1671
+ * [PARQUET-1162] - C++: Update dev/README after migration to Gitbox
+ * [PARQUET-1165] - [C++] Pin clang-format version to 4.0
+ * [PARQUET-1164] - [C++] Follow API changes in ARROW-1808
+ * [PARQUET-1177] - [C++] Add more extensive compiler warnings when using Clang
+ * [PARQUET-1110] - [C++] Release verification script for Windows
+ * [PARQUET-859] - [C++] Flatten parquet/file directory
+ * [PARQUET-1220] - [C++] Don't build Thrift examples and tutorials in the ExternalProject
+ * [PARQUET-1219] - [C++] Update release-candidate script links to gitbox
+ * [PARQUET-1196] - [C++] Provide a parquet_arrow example project incl. CMake setup
+ * [PARQUET-1200] - [C++] Support reading a single Arrow column from a Parquet file
+
+## New Feature
+ * [PARQUET-1095] - [C++] Read and write Arrow decimal values
+ * [PARQUET-970] - Add Add Lz4 and Zstd compression codecs
+
+## Task
+ * [PARQUET-1221] - [C++] Extend release README
+ * [PARQUET-1225] - NaN values may lead to incorrect filtering under certain circumstances
+
+
+Parquet C++ 1.3.1
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-1105] - [CPP] Remove libboost_system dependency
+ * [PARQUET-1138] - [C++] Fix compilation with Arrow 0.7.1
+ * [PARQUET-1123] - [C++] Update parquet-cpp to use Arrow's AssertArraysEqual
+ * [PARQUET-1121] - C++: DictionaryArrays of NullType cannot be written
+ * [PARQUET-1139] - Add license to cmake_modules/parquet-cppConfig.cmake.in
+
+## Improvement
+ * [PARQUET-1140] - [C++] Fail on RAT errors in CI
+ * [PARQUET-1070] - Add CPack support to the build
+
+
+Parquet C++ 1.3.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-1098] - [C++] Install new header in parquet/util
+ * [PARQUET-1085] - [C++] Backwards compatibility from macro cleanup in transitive dependencies in ARROW-1452
+ * [PARQUET-1074] - [C++] Switch to long key ids in KEYs file
+ * [PARQUET-1075] - C++: Coverage upload is broken
+ * [PARQUET-1088] - [CPP] remove parquet_version.h from version control since it gets auto generated
+ * [PARQUET-1002] - [C++] Compute statistics based on Logical Types
+ * [PARQUET-1100] - [C++] Reading repeated types should decode number of records rather than number of values
+ * [PARQUET-1090] - [C++] Fix int32 overflow in Arrow table writer, add max row group size property
+ * [PARQUET-1108] - [C++] Fix Int96 comparators
+
+## Improvement
+ * [PARQUET-1104] - [C++] Upgrade to Apache Arrow 0.7.0 RC0
+ * [PARQUET-1072] - [C++] Add ARROW_NO_DEPRECATED_API to CI to check for deprecated API use
+ * [PARQUET-1096] - C++: Update sha{1, 256, 512} checksums per latest ASF release policy
+ * [PARQUET-1079] - [C++] Account for Arrow API change in ARROW-1335
+ * [PARQUET-1087] - [C++] Add wrapper for ScanFileContents in parquet::arrow that catches exceptions
+ * [PARQUET-1093] - C++: Improve Arrow level generation error message
+ * [PARQUET-1094] - C++: Add benchmark for boolean Arrow column I/O
+ * [PARQUET-1083] - [C++] Refactor core logic in parquet-scan.cc so that it can be used as a library function for benchmarking
+ * [PARQUET-1037] - Allow final RowGroup to be unfilled
+
+## New Feature
+ * [PARQUET-1078] - [C++] Add Arrow writer option to coerce timestamps to milliseconds or microseconds
+ * [PARQUET-929] - [C++] Handle arrow::DictionaryArray when writing Arrow data
+
+
+Parquet C++ 1.2.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-1029] - [C++] TypedColumnReader/TypeColumnWriter symbols are no longer being exported
+ * [PARQUET-997] - Fix override compiler warnings
+ * [PARQUET-1033] - Mismatched Read and Write
+ * [PARQUET-1007] - [C++ ] Update parquet.thrift from https://github.com/apache/parquet-format
+ * [PARQUET-1039] - PARQUET-911 Breaks Arrow
+ * [PARQUET-1038] - Key value metadata should be nullptr if not set
+ * [PARQUET-1018] - [C++] parquet.dll has runtime dependencies on one or more libraries in the build toolchain
+ * [PARQUET-1003] - [C++] Modify DEFAULT_CREATED_BY value for every new release version
+ * [PARQUET-1004] - CPP Building fails on windows
+ * [PARQUET-1040] - Missing writer method implementations
+ * [PARQUET-1054] - [C++] Account for Arrow API changes in ARROW-1199
+ * [PARQUET-1042] - C++: Compilation breaks on GCC 4.8
+ * [PARQUET-1048] - [C++] Static linking of libarrow is no longer supported
+ * [PARQUET-1013] - Fix ZLIB_INCLUDE_DIR
+ * [PARQUET-998] - C++: Release script is not usable
+ * [PARQUET-1023] - [C++] Brotli libraries are not being statically linked on Windows
+ * [PARQUET-1000] - [C++] Do not build thirdparty Arrow with /WX on MSVC
+ * [PARQUET-1052] - [C++] add_compiler_export_flags() throws warning with CMake >= 3.3
+ * [PARQUET-1069] - C++: ./dev/release/verify-release-candidate is broken due to missing Arrow dependencies
+
+## Improvement
+ * [PARQUET-996] - Improve MSVC build - ThirdpartyToolchain - Arrow
+ * [PARQUET-911] - C++: Support nested structs in parquet_arrow
+ * [PARQUET-986] - Improve MSVC build - ThirdpartyToolchain - Thrift
+ * [PARQUET-864] - [C++] Consolidate non-Parquet-specific bit utility code into Apache Arrow
+ * [PARQUET-1043] - [C++] Raise minimum supported CMake version to 3.2
+ * [PARQUET-1016] - Upgrade thirdparty Arrow to 0.4.0
+ * [PARQUET-858] - [C++] Flatten parquet/column directory, consolidate related code
+ * [PARQUET-978] - [C++] Minimizing footer reads for small(ish) metadata
+ * [PARQUET-991] - [C++] Fix compiler warnings on MSVC and build with /WX in Appveyor
+ * [PARQUET-863] - [C++] Move SIMD, CPU info, hashing, and other generic utilities into Apache Arrow
+ * [PARQUET-1053] - Fix unused result warnings due to unchecked Statuses
+ * [PARQUET-1067] - C++: Update arrow hash to 0.5.0
+ * [PARQUET-1041] - C++: Support Arrow's NullArray
+ * [PARQUET-1008] - Update TypedColumnReader::ReadBatch method to accept batch_size as int64_t
+ * [PARQUET-1044] - [C++] Use compression libraries from Apache Arrow
+ * [PARQUET-999] - Improve MSVC build - Enable PARQUET_BUILD_BENCHMARKS
+ * [PARQUET-967] - [C++] Combine libparquet/libparquet_arrow libraries
+ * [PARQUET-1045] - [C++] Refactor to account for computational utility code migration in ARROW-1154
+
+## New Feature
+ * [PARQUET-1035] - Write Int96 from Arrow Timestamp(ns)
+
+## Task
+ * [PARQUET-994] - C++: release-candidate script should not push to master
+ * [PARQUET-902] - [C++] Move compressor interfaces into Apache Arrow
+
+## Test
+ * [PARQUET-706] - [C++] Create test case that uses libparquet as a 3rd party library
+
+
+Parquet C++ 1.1.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-898] - [C++] Change Travis CI OS X image to Xcode 6.4 and fix our thirdparty build
+ * [PARQUET-976] - [C++] Pass unit test suite with MSVC, build in Appveyor
+ * [PARQUET-963] - [C++] Disallow reading struct types in Arrow reader for now
+ * [PARQUET-959] - [C++] Arrow thirdparty build fails on multiarch systems
+ * [PARQUET-962] - [C++] GTEST_MAIN_STATIC_LIB is not defined in FindGTest.cmake
+ * [PARQUET-958] - [C++] Print Parquet metadata in JSON format
+ * [PARQUET-956] - C++: BUILD_BYPRODUCTS not specified anymore for gtest
+ * [PARQUET-948] - [C++] Account for API changes in ARROW-782
+ * [PARQUET-947] - [C++] Refactor to account for ARROW-795 Arrow core library consolidation
+ * [PARQUET-965] - [C++] FIXED_LEN_BYTE_ARRAY types are unhandled in the Arrow reader
+ * [PARQUET-949] - [C++] Arrow version pinning seems to not be working properly
+ * [PARQUET-955] - [C++] pkg_check_modules will override $ARROW_HOME if it is set in the environment
+ * [PARQUET-945] - [C++] Thrift static libraries are not used with recent patch
+ * [PARQUET-943] - [C++] Overflow build error on x86
+ * [PARQUET-938] - [C++] There is a typo in cmake_modules/FindSnappy.cmake comment
+ * [PARQUET-936] - [C++] parquet::arrow::WriteTable can enter infinite loop if chunk_size is 0
+ * [PARQUET-981] - Repair usage of *_HOME 3rd party dependencies environment variables during Windows build
+ * [PARQUET-992] - [C++] parquet/compression.h leaks zlib.h
+ * [PARQUET-987] - [C++] Fix regressions caused by PARQUET-981
+ * [PARQUET-933] - [C++] Account for Arrow Table API changes coming in ARROW-728
+ * [PARQUET-915] - Support Arrow Time Types in Schema
+ * [PARQUET-914] - [C++] Throw more informative exception when user writes too many values to a column in a row group
+ * [PARQUET-923] - [C++] Account for Time metadata changes in ARROW-686
+ * [PARQUET-918] - FromParquetSchema API crashes on nested schemas
+ * [PARQUET-925] - [C++] FindArrow.cmake sets the wrong library path after ARROW-648
+ * [PARQUET-932] - [c++] Add option to build parquet library with minimal dependency
+ * [PARQUET-919] - [C++] Account for API changes in ARROW-683
+ * [PARQUET-995] - [C++] Int96 reader in parquet_arrow uses size of Int96Type instead of Int96
+
+## Improvement
+ * [PARQUET-508] - Add ParquetFilePrinter
+ * [PARQUET-595] - Add API for key-value metadata
+ * [PARQUET-897] - [C++] Only use designated public headers from libarrow
+ * [PARQUET-679] - [C++] Build and unit tests support for MSVC on Windows
+ * [PARQUET-977] - Improve MSVC build
+ * [PARQUET-957] - [C++] Add optional $PARQUET_BUILD_TOOLCHAIN environment variable option for configuring build environment
+ * [PARQUET-961] - [C++] Strip debug symbols from libparquet libraries in release builds by default
+ * [PARQUET-954] - C++: Use Brolti 0.6 release
+ * [PARQUET-953] - [C++] Change arrow::FileWriter API to be initialized from a Schema, and provide for writing multiple tables
+ * [PARQUET-941] - [C++] Stop needless Boost static library detection for CentOS 7 support
+ * [PARQUET-942] - [C++] Fix wrong variabe use in FindSnappy
+ * [PARQUET-939] - [C++] Support Thrift_HOME CMake variable like FindSnappy does as Snappy_HOME
+ * [PARQUET-940] - [C++] Fix Arrow library path detection
+ * [PARQUET-937] - [C++] Support CMake < 3.4 again for Arrow detection
+ * [PARQUET-935] - [C++] Set shared library version for .deb packages
+ * [PARQUET-934] - [C++] Support multiarch on Debian
+ * [PARQUET-984] - C++: Add abi and so version to pkg-config
+ * [PARQUET-983] - C++: Update Thirdparty hash to Arrow 0.3.0
+ * [PARQUET-989] - [C++] Link dynamically to libarrow in toolchain build, set LD_LIBRARY_PATH
+ * [PARQUET-988] - [C++] Add Linux toolchain-based build to Travis CI
+ * [PARQUET-928] - [C++] Support pkg-config
+ * [PARQUET-927] - [C++] Specify shared library version of Apache Arrow
+ * [PARQUET-931] - [C++] Add option to pin thirdparty Arrow version used in ExternalProject
+ * [PARQUET-926] - [C++] Use pkg-config to find Apache Arrow
+ * [PARQUET-917] - C++: Build parquet_arrow by default
+ * [PARQUET-910] - C++: Support TIME logical type in parquet_arrow
+ * [PARQUET-909] - [CPP]: Reduce buffer allocations (mallocs) on critical path
+
+## New Feature
+ * [PARQUET-853] - [C++] Add option to link with shared boost libraries when building Arrow in the thirdparty toolchain
+ * [PARQUET-946] - [C++] Refactoring in parquet::arrow::FileReader to be able to read a single row group
+ * [PARQUET-930] - [C++] Account for all Arrow date/time types
+
+
+Parquet C++ 1.0.0
+--------------------------------------------------------------------------------
+## Bug
+ * [PARQUET-455] - Fix compiler warnings on OS X / Clang
+ * [PARQUET-558] - Support ZSH in build scripts
+ * [PARQUET-720] - Parquet-cpp fails to link when included in multiple TUs
+ * [PARQUET-718] - Reading boolean pages written by parquet-cpp fails
+ * [PARQUET-640] - [C++] Force the use of gcc 4.9 in conda builds
+ * [PARQUET-643] - Add const modifier to schema pointer reference in ParquetFileWriter
+ * [PARQUET-672] - [C++] Build testing conda artifacts in debug mode
+ * [PARQUET-661] - [C++] Do not assume that perl is found in /usr/bin
+ * [PARQUET-659] - [C++] Instantiated template visibility is broken on clang / OS X
+ * [PARQUET-657] - [C++] Don't define DISALLOW_COPY_AND_ASSIGN if already defined
+ * [PARQUET-656] - [C++] Revert PARQUET-653
+ * [PARQUET-676] - MAX_VALUES_PER_LITERAL_RUN causes RLE encoding failure
+ * [PARQUET-614] - C++: Remove unneeded LZ4-related code
+ * [PARQUET-604] - Install writer.h headers
+ * [PARQUET-621] - C++: Uninitialised DecimalMetadata is read
+ * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metdata writes
+ * [PARQUET-599] - ColumnWriter::RleEncodeLevels' size estimation might be wrong
+ * [PARQUET-617] - C++: Enable conda build to work on systems with non-default C++ toolchains
+ * [PARQUET-627] - Ensure that thrift headers are generated before source compilation
+ * [PARQUET-745] - TypedRowGroupStatistics fails to PlainDecode min and max in ByteArrayType
+ * [PARQUET-738] - Update arrow version that also supports newer Xcode
+ * [PARQUET-747] - [C++] TypedRowGroupStatistics are not being exported in libparquet.so
+ * [PARQUET-711] - Use metadata builders in parquet writer
+ * [PARQUET-732] - Building a subset of dependencies does not work
+ * [PARQUET-760] - On switching from dictionary to the fallback encoding, an incorrect encoding is set
+ * [PARQUET-691] - [C++] Write ColumnChunk metadata after each column chunk in the file
+ * [PARQUET-797] - [C++] Update for API changes in ARROW-418
+ * [PARQUET-837] - [C++] SerializedFile::ParseMetaData uses Seek, followed by Read, and could have race conditions
+ * [PARQUET-827] - [C++] Incorporate addition of arrow::MemoryPool::Reallocate
+ * [PARQUET-502] - Scanner segfaults when its batch size is smaller than the number of rows
+ * [PARQUET-469] - Roll back Thrift bindings to 0.9.0
+ * [PARQUET-889] - Fix compilation when PARQUET_USE_SSE is on
+ * [PARQUET-888] - C++ Memory leak in RowGroupSerializer
+ * [PARQUET-819] - C++: Trying to install non-existing parquet/arrow/utils.h
+ * [PARQUET-736] - XCode 8.0 breaks builds
+ * [PARQUET-505] - Column reader: automatically handle large data pages
+ * [PARQUET-615] - C++: Building static or shared libparquet should not be mutually exclusive
+ * [PARQUET-658] - ColumnReader has no virtual destructor
+ * [PARQUET-799] - concurrent usage of the file reader API
+ * [PARQUET-513] - Valgrind errors are not failing the Travis CI build
+ * [PARQUET-841] - [C++] Writing wrong format version when using ParquetVersion::PARQUET_1_0
+ * [PARQUET-742] - Add missing license headers
+ * [PARQUET-741] - compression_buffer_ is reused although it shouldn't
+ * [PARQUET-700] - C++: Disable dictionary encoding for boolean columns
+ * [PARQUET-662] - [C++] ParquetException must be explicitly exported in dynamic libraries
+ * [PARQUET-704] - [C++] scan-all.h is not being installed
+ * [PARQUET-865] - C++: Pass all CXXFLAGS to Thrift ExternalProject
+ * [PARQUET-875] - [C++] Fix coveralls build given changes to thirdparty build procedure
+ * [PARQUET-709] - [C++] Fix conda dev binary builds
+ * [PARQUET-638] - [C++] Revert static linking of libstdc++ in conda builds until symbol visibility addressed
+ * [PARQUET-606] - Travis coverage is broken
+ * [PARQUET-880] - [CPP] Prevent destructors from throwing
+ * [PARQUET-886] - [C++] Revise build documentation and requirements in README.md
+ * [PARQUET-900] - C++: Fix NOTICE / LICENSE issues
+ * [PARQUET-885] - [C++] Do not search for Thrift in default system paths
+ * [PARQUET-879] - C++: ExternalProject compilation for Thrift fails on older CMake versions
+ * [PARQUET-635] - [C++] Statically link libstdc++ on Linux in conda recipe
+ * [PARQUET-710] - Remove unneeded private member variables from RowGroupReader ABI
+ * [PARQUET-766] - C++: Expose ParquetFileReader through Arrow reader as const
+ * [PARQUET-876] - C++: Correct snapshot version
+ * [PARQUET-821] - [C++] zlib download link is broken
+ * [PARQUET-818] - [C++] Refactor library to share IO, Buffer, and memory management abstractions with Apache Arrow
+ * [PARQUET-537] - LocalFileSource leaks resources
+ * [PARQUET-764] - [CPP] Parquet Writer does not write Boolean values correctly
+ * [PARQUET-812] - [C++] Failure reading BYTE_ARRAY data from file in parquet-compatibility project
+ * [PARQUET-759] - Cannot store columns consisting of empty strings
+ * [PARQUET-846] - [CPP] CpuInfo::Init() is not thread safe
+ * [PARQUET-694] - C++: Revert default data page size back to 1M
+ * [PARQUET-842] - [C++] Impala rejects DOUBLE columns if decimal metadata is set
+ * [PARQUET-708] - [C++] RleEncoder does not account for "worst case scenario" in MaxBufferSize for bit_width > 1
+ * [PARQUET-639] - Do not export DCHECK in public headers
+ * [PARQUET-828] - [C++] "version" field set improperly in file metadata
+ * [PARQUET-891] - [C++] Do not search for Snappy in default system paths
+ * [PARQUET-626] - Fix builds due to unavailable llvm.org apt mirror
+ * [PARQUET-629] - RowGroupSerializer should only close itself once
+ * [PARQUET-472] - Clean up InputStream ownership semantics in ColumnReader
+ * [PARQUET-739] - Rle-decoding uses static buffer that is shared accross threads
+ * [PARQUET-561] - ParquetFileReader::Contents PIMPL missing a virtual destructor
+ * [PARQUET-892] - [C++] Clean up link library targets in CMake files
+ * [PARQUET-454] - Address inconsistencies in boolean decoding
+ * [PARQUET-816] - [C++] Failure decoding sample dict-encoded file from parquet-compatibility project
+ * [PARQUET-565] - Use PATH instead of DIRECTORY in get_filename_component to support CMake<2.8.12
+ * [PARQUET-446] - Hide thrift dependency in parquet-cpp
+ * [PARQUET-843] - [C++] Impala unable to read files created by parquet-cpp
+ * [PARQUET-555] - Dictionary page metadata handling inconsistencies
+ * [PARQUET-908] - Fix for PARQUET-890 introduces undefined symbol in libparquet_arrow.so
+ * [PARQUET-793] - [CPP] Do not return incorrect statistics
+ * [PARQUET-887] - C++: Fix issues in release scripts arise in RC1
+
+## Improvement
+ * [PARQUET-277] - Remove boost dependency
+ * [PARQUET-500] - Enable coveralls.io for apache/parquet-cpp
+ * [PARQUET-497] - Decouple Parquet physical file structure from FileReader class
+ * [PARQUET-597] - Add data rates to benchmark output
+ * [PARQUET-522] - #include cleanup with include-what-you-use
+ * [PARQUET-515] - Add "Reset" to LevelEncoder and LevelDecoder
+ * [PARQUET-514] - Automate coveralls.io updates in Travis CI
+ * [PARQUET-551] - Handle compiler warnings due to disabled DCHECKs in release builds
+ * [PARQUET-559] - Enable InputStream as a source to the ParquetFileReader
+ * [PARQUET-562] - Simplified ZSH support in build scripts
+ * [PARQUET-538] - Improve ColumnReader Tests
+ * [PARQUET-541] - Portable build scripts
+ * [PARQUET-724] - Test more advanced properties setting
+ * [PARQUET-641] - Instantiate stringstream only if needed in SerializedPageReader::NextPage
+ * [PARQUET-636] - Expose selection for different encodings
+ * [PARQUET-603] - Implement missing information in schema descriptor
+ * [PARQUET-610] - Print ColumnMetaData for each RowGroup
+ * [PARQUET-600] - Add benchmarks for RLE-Level encoding
+ * [PARQUET-592] - Support compressed writes
+ * [PARQUET-593] - Add API for writing Page statistics
+ * [PARQUET-589] - Implement Chunked InMemoryInputStream for better memory usage
+ * [PARQUET-587] - Implement BufferReader::Read(int64_t,uint8_t*)
+ * [PARQUET-616] - C++: WriteBatch should accept const arrays
+ * [PARQUET-630] - C++: Support link flags for older CMake versions
+ * [PARQUET-634] - Consistent private linking of dependencies
+ * [PARQUET-633] - Add version to WriterProperties
+ * [PARQUET-625] - Improve RLE read performance
+ * [PARQUET-737] - Use absolute namespace in macros
+ * [PARQUET-762] - C++: Use optimistic allocation instead of Arrow Builders
+ * [PARQUET-773] - C++: Check licenses with RAT in CI
+ * [PARQUET-687] - C++: Switch to PLAIN encoding if dictionary grows too large
+ * [PARQUET-784] - C++: Reference Spark, Kudu and FrameOfReference in LICENSE
+ * [PARQUET-809] - [C++] Add API to determine if two files' schemas are compatible
+ * [PARQUET-778] - Standardize the schema output to match the parquet-mr format
+ * [PARQUET-463] - Add DCHECK* macros for assertions in debug builds
+ * [PARQUET-471] - Use the same environment setup script for Travis CI as local sandbox development
+ * [PARQUET-449] - Update to latest parquet.thrift
+ * [PARQUET-496] - Fix cpplint configuration to be more restrictive
+ * [PARQUET-468] - Add a cmake option to generate the Parquet thrift headers with the thriftc in the environment
+ * [PARQUET-482] - Organize src code file structure to have a very clear folder with public headers.
+ * [PARQUET-591] - Page size estimation during writes
+ * [PARQUET-518] - Review usages of size_t and unsigned integers generally per Google style guide
+ * [PARQUET-533] - Simplify RandomAccessSource API to combine Seek/Read
+ * [PARQUET-767] - Add release scripts for parquet-cpp
+ * [PARQUET-699] - Update parquet.thrift from https://github.com/apache/parquet-format
+ * [PARQUET-653] - [C++] Re-enable -static-libstdc++ in dev artifact builds
+ * [PARQUET-763] - C++: Expose ParquetFileReader through Arrow reader
+ * [PARQUET-857] - [C++] Flatten parquet/encodings directory
+ * [PARQUET-862] - Provide defaut cache size values if CPU info probing is not available
+ * [PARQUET-689] - C++: Compress DataPages eagerly
+ * [PARQUET-874] - [C++] Use default memory allocator from Arrow
+ * [PARQUET-267] - Detach thirdparty code from build configuration.
+ * [PARQUET-418] - Add a utility to print contents of a Parquet file to stdout
+ * [PARQUET-519] - Disable compiler warning supressions and fix all DEBUG build warnings
+ * [PARQUET-447] - Add Debug and Release build types and associated compiler flags
+ * [PARQUET-868] - C++: Build snappy with optimizations
+ * [PARQUET-894] - Fix compilation warning
+ * [PARQUET-883] - C++: Support non-standard gcc version strings
+ * [PARQUET-607] - Public Writer header
+ * [PARQUET-731] - [CPP] Add API to return metadata size and Skip reading values
+ * [PARQUET-628] - Link thrift privately
+ * [PARQUET-877] - C++: Update Arrow Hash, update Version in metadata.
+ * [PARQUET-547] - Refactor most templates to use DataType structs rather than the Type::type enum
+ * [PARQUET-882] - [CPP] Improve Application Version parsing
+ * [PARQUET-448] - Add cmake option to skip building the unit tests
+ * [PARQUET-721] - Performance benchmarks for reading into Arrow structures
+ * [PARQUET-820] - C++: Decoders should directly emit arrays with spacing for null entries
+ * [PARQUET-813] - C++: Build dependencies using CMake External project
+ * [PARQUET-488] - Add SSE-related cmake options to manage compiler flags
+ * [PARQUET-564] - Add option to run unit tests with valgrind --tool=memcheck
+ * [PARQUET-572] - Rename parquet_cpp namespace to parquet
+ * [PARQUET-829] - C++: Make use of ARROW-469
+ * [PARQUET-501] - Add an OutputStream abstraction (capable of memory allocation) for Encoder public API
+ * [PARQUET-744] - Clarifications on build instructions
+ * [PARQUET-520] - Add version of LocalFileSource that uses memory-mapping for zero-copy reads
+ * [PARQUET-556] - Extend RowGroupStatistics to include "min" "max" statistics
+ * [PARQUET-671] - Improve performance of RLE/bit-packed decoding in parquet-cpp
+ * [PARQUET-681] - Add tool to scan a parquet file
+
+## New Feature
+ * [PARQUET-499] - Complete PlainEncoder implementation for all primitive types and test end to end
+ * [PARQUET-439] - Conform all copyright headers to ASF requirements
+ * [PARQUET-436] - Implement ParquetFileWriter class entry point for generating new Parquet files
+ * [PARQUET-435] - Provide vectorized ColumnReader interface
+ * [PARQUET-438] - Update RLE encoder/decoder modules from Impala upstream changes and adapt unit tests
+ * [PARQUET-512] - Add optional google/benchmark 3rd-party dependency for performance testing
+ * [PARQUET-566] - Add method to retrieve the full column path
+ * [PARQUET-613] - C++: Add conda packaging recipe
+ * [PARQUET-605] - Expose schema node in ColumnDescriptor
+ * [PARQUET-619] - C++: Add OutputStream for local files
+ * [PARQUET-583] - Implement Parquet to Thrift schema conversion
+ * [PARQUET-582] - Conversion functions for Parquet enums to Thrift enums
+ * [PARQUET-728] - [C++] Bring parquet::arrow up to date with API changes in arrow::io
+ * [PARQUET-752] - [C++] Conform parquet_arrow to upstream API changes
+ * [PARQUET-788] - [C++] Reference Impala / Apache Impala (incubating) in LICENSE
+ * [PARQUET-808] - [C++] Add API to read file given externally-provided FileMetadata
+ * [PARQUET-807] - [C++] Add API to read file metadata only from a file handle
+ * [PARQUET-805] - C++: Read Int96 into Arrow Timestamp(ns)
+ * [PARQUET-836] - [C++] Add column selection to parquet::arrow::FileReader
+ * [PARQUET-835] - [C++] Add option to parquet::arrow to read columns in parallel using a thread pool
+ * [PARQUET-830] - [C++] Add additional configuration options to parquet::arrow::OpenFIle
+ * [PARQUET-769] - C++: Add support for Brotli Compression
+ * [PARQUET-489] - Add visibility macros to be used for public and internal APIs of libparquet
+ * [PARQUET-542] - Support memory allocation from external memory
+ * [PARQUET-844] - [C++] Consolidate encodings, schema, and compression subdirectories into fewer files
+ * [PARQUET-848] - [C++] Consolidate libparquet_thrift subcomponent
+ * [PARQUET-646] - [C++] Enable easier 3rd-party toolchain clang builds on Linux
+ * [PARQUET-598] - [C++] Test writing all primitive data types
+ * [PARQUET-442] - Convert flat SchemaElement vector to implied nested schema data structure
+ * [PARQUET-867] - [C++] Support writing sliced Arrow arrays
+ * [PARQUET-456] - Add zlib codec support
+ * [PARQUET-834] - C++: Support r/w of arrow::ListArray
+ * [PARQUET-485] - Decouple data page delimiting from column reader / scanner classes, create test fixtures
+ * [PARQUET-434] - Add a ParquetFileReader class to encapsulate some low-level details of interacting with Parquet files
+ * [PARQUET-666] - PLAIN_DICTIONARY write support
+ * [PARQUET-437] - Incorporate googletest thirdparty dependency and add cmake tools (ADD_PARQUET_TEST) to simplify adding new unit tests
+ * [PARQUET-866] - [C++] Account for API changes in ARROW-33
+ * [PARQUET-545] - Improve API to support Decimal type
+ * [PARQUET-579] - Add API for writing Column statistics
+ * [PARQUET-494] - Implement PLAIN_DICTIONARY encoding and decoding
+ * [PARQUET-618] - C++: Automatically upload conda build artifacts on commits to master
+ * [PARQUET-833] - C++: Provide API to write spaced arrays (e.g. Arrow)
+ * [PARQUET-903] - C++: Add option to set RPATH to ORIGIN
+ * [PARQUET-451] - Add a RowGroup reader interface class
+ * [PARQUET-785] - C++: List conversion for Arrow Schemas
+ * [PARQUET-712] - C++: Read into Arrow memory
+ * [PARQUET-890] - C++: Support I/O of DATE columns in parquet_arrow
+ * [PARQUET-782] - C++: Support writing to Arrow sinks
+ * [PARQUET-849] - [C++] Upgrade default Thrift in thirdparty toolchain to 0.9.3 or 0.10
+ * [PARQUET-573] - C++: Create a public API for reading and writing file metadata
+
+## Task
+ * [PARQUET-814] - C++: Remove Conda recipes
+ * [PARQUET-503] - Re-enable parquet 2.0 encodings
+ * [PARQUET-169] - Parquet-cpp: Implement support for bulk reading and writing repetition/definition levels.
+ * [PARQUET-878] - C++: Remove setup_build_env from rc-verification script
+ * [PARQUET-881] - C++: Update Arrow hash to 0.2.0-rc2
+ * [PARQUET-771] - C++: Sync KEYS file
+ * [PARQUET-901] - C++: Publish RCs in apache-parquet-VERSION in SVN
+
+## Test
+ * [PARQUET-525] - Test coverage for malformed file failure modes on the read path
+ * [PARQUET-703] - [C++] Validate num_values metadata for columns with nulls
+ * [PARQUET-507] - Improve runtime of rle-test.cc
+ * [PARQUET-549] - Add scanner and column reader tests for dictionary data pages
+ * [PARQUET-457] - Add compressed data page unit tests
diff --git a/contrib/libs/apache/arrow/cpp/README.md b/contrib/libs/apache/arrow/cpp/README.md
index b083f3fe78e..04cc1b9072d 100644
--- a/contrib/libs/apache/arrow/cpp/README.md
+++ b/contrib/libs/apache/arrow/cpp/README.md
@@ -1,34 +1,34 @@
-<!---
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-# Apache Arrow C++
-
-This directory contains the code and build system for the Arrow C++ libraries,
-as well as for the C++ libraries for Apache Parquet.
-
-## Installation
-
-See https://arrow.apache.org/install/ for the latest instructions how
-to install pre-compiled binary versions of the library.
-
-## Source Builds and Development
-
-Please refer to our latest [C++ Development Documentation][1].
-
-[1]: https://github.com/apache/arrow/blob/master/docs/source/developers/cpp
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Apache Arrow C++
+
+This directory contains the code and build system for the Arrow C++ libraries,
+as well as for the C++ libraries for Apache Parquet.
+
+## Installation
+
+See https://arrow.apache.org/install/ for the latest instructions how
+to install pre-compiled binary versions of the library.
+
+## Source Builds and Development
+
+Please refer to our latest [C++ Development Documentation][1].
+
+[1]: https://github.com/apache/arrow/blob/master/docs/source/developers/cpp
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array.h b/contrib/libs/apache/arrow/cpp/src/arrow/array.h
index 739d65e0a5d..d63218cdc7b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Kitchen-sink public API for arrow::Array data structures. C++ library code
-// (especially header files) in Apache Arrow should use more specific headers
-// unless it's a file that uses most or all Array types in which case using
-// arrow/array.h is fine.
-
-#pragma once
-
-#include "arrow/array/array_base.h" // IWYU pragma: keep
-#include "arrow/array/array_binary.h" // IWYU pragma: keep
-#include "arrow/array/array_decimal.h" // IWYU pragma: keep
-#include "arrow/array/array_dict.h" // IWYU pragma: keep
-#include "arrow/array/array_nested.h" // IWYU pragma: keep
-#include "arrow/array/array_primitive.h" // IWYU pragma: keep
-#include "arrow/array/data.h" // IWYU pragma: keep
-#include "arrow/array/util.h" // IWYU pragma: keep
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Kitchen-sink public API for arrow::Array data structures. C++ library code
+// (especially header files) in Apache Arrow should use more specific headers
+// unless it's a file that uses most or all Array types in which case using
+// arrow/array.h is fine.
+
+#pragma once
+
+#include "arrow/array/array_base.h" // IWYU pragma: keep
+#include "arrow/array/array_binary.h" // IWYU pragma: keep
+#include "arrow/array/array_decimal.h" // IWYU pragma: keep
+#include "arrow/array/array_dict.h" // IWYU pragma: keep
+#include "arrow/array/array_nested.h" // IWYU pragma: keep
+#include "arrow/array/array_primitive.h" // IWYU pragma: keep
+#include "arrow/array/data.h" // IWYU pragma: keep
+#include "arrow/array/util.h" // IWYU pragma: keep
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
index 67c5ca84e1f..bcfedcf372f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
@@ -1,308 +1,308 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_base.h"
-
-#include <cstdint>
-#include <memory>
-#include <sstream> // IWYU pragma: keep
-#include <string>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/array/array_binary.h"
-#include "arrow/array/array_dict.h"
-#include "arrow/array/array_nested.h"
-#include "arrow/array/array_primitive.h"
-#include "arrow/array/util.h"
-#include "arrow/array/validate.h"
-#include "arrow/buffer.h"
-#include "arrow/compare.h"
-#include "arrow/pretty_print.h"
-#include "arrow/scalar.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class ExtensionArray;
-
-// ----------------------------------------------------------------------
-// Base array class
-
-int64_t Array::null_count() const { return data_->GetNullCount(); }
-
-namespace internal {
-
-struct ScalarFromArraySlotImpl {
- template <typename T>
- using ScalarType = typename TypeTraits<T>::ScalarType;
-
- Status Visit(const NullArray& a) {
- out_ = std::make_shared<NullScalar>();
- return Status::OK();
- }
-
- Status Visit(const BooleanArray& a) { return Finish(a.Value(index_)); }
-
- template <typename T>
- Status Visit(const NumericArray<T>& a) {
- return Finish(a.Value(index_));
- }
-
- Status Visit(const Decimal128Array& a) {
- return Finish(Decimal128(a.GetValue(index_)));
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_base.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream> // IWYU pragma: keep
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/util.h"
+#include "arrow/array/validate.h"
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/pretty_print.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class ExtensionArray;
+
+// ----------------------------------------------------------------------
+// Base array class
+
+int64_t Array::null_count() const { return data_->GetNullCount(); }
+
+namespace internal {
+
+struct ScalarFromArraySlotImpl {
+ template <typename T>
+ using ScalarType = typename TypeTraits<T>::ScalarType;
+
+ Status Visit(const NullArray& a) {
+ out_ = std::make_shared<NullScalar>();
+ return Status::OK();
+ }
+
+ Status Visit(const BooleanArray& a) { return Finish(a.Value(index_)); }
+
+ template <typename T>
+ Status Visit(const NumericArray<T>& a) {
+ return Finish(a.Value(index_));
+ }
+
+ Status Visit(const Decimal128Array& a) {
+ return Finish(Decimal128(a.GetValue(index_)));
+ }
+
Status Visit(const Decimal256Array& a) {
return Finish(Decimal256(a.GetValue(index_)));
}
- template <typename T>
- Status Visit(const BaseBinaryArray<T>& a) {
- return Finish(a.GetString(index_));
- }
-
- Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }
-
- Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
-
- template <typename T>
- Status Visit(const BaseListArray<T>& a) {
- return Finish(a.value_slice(index_));
- }
-
- Status Visit(const FixedSizeListArray& a) { return Finish(a.value_slice(index_)); }
-
- Status Visit(const StructArray& a) {
- ScalarVector children;
- for (const auto& child : a.fields()) {
- children.emplace_back();
- ARROW_ASSIGN_OR_RAISE(children.back(), child->GetScalar(index_));
- }
- return Finish(std::move(children));
- }
-
- Status Visit(const SparseUnionArray& a) {
- // child array which stores the actual value
- auto arr = a.field(a.child_id(index_));
- // no need to adjust the index
- ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
- if (value->is_valid) {
- out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, a.type()));
- } else {
- out_ = MakeNullScalar(a.type());
- }
- return Status::OK();
- }
-
- Status Visit(const DenseUnionArray& a) {
- // child array which stores the actual value
- auto arr = a.field(a.child_id(index_));
- // need to look up the value based on offsets
- auto offset = a.value_offset(index_);
- ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
- if (value->is_valid) {
- out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, a.type()));
- } else {
- out_ = MakeNullScalar(a.type());
- }
- return Status::OK();
- }
-
- Status Visit(const DictionaryArray& a) {
- auto ty = a.type();
-
- ARROW_ASSIGN_OR_RAISE(auto index,
- MakeScalar(checked_cast<DictionaryType&>(*ty).index_type(),
- a.GetValueIndex(index_)));
-
- auto scalar = DictionaryScalar(ty);
- scalar.is_valid = a.IsValid(index_);
- scalar.value.index = index;
- scalar.value.dictionary = a.dictionary();
-
- out_ = std::make_shared<DictionaryScalar>(std::move(scalar));
- return Status::OK();
- }
-
- Status Visit(const ExtensionArray& a) {
- return Status::NotImplemented("Non-null ExtensionScalar");
- }
-
- template <typename Arg>
- Status Finish(Arg&& arg) {
- return MakeScalar(array_.type(), std::forward<Arg>(arg)).Value(&out_);
- }
-
- Status Finish(std::string arg) {
- return MakeScalar(array_.type(), Buffer::FromString(std::move(arg))).Value(&out_);
- }
-
- Result<std::shared_ptr<Scalar>> Finish() && {
- if (index_ >= array_.length()) {
- return Status::IndexError("tried to refer to element ", index_,
- " but array is only ", array_.length(), " long");
- }
-
- if (array_.IsNull(index_)) {
- auto null = MakeNullScalar(array_.type());
- if (is_dictionary(array_.type()->id())) {
- auto& dict_null = checked_cast<DictionaryScalar&>(*null);
- const auto& dict_array = checked_cast<const DictionaryArray&>(array_);
- dict_null.value.dictionary = dict_array.dictionary();
- }
- return null;
- }
-
- RETURN_NOT_OK(VisitArrayInline(array_, this));
- return std::move(out_);
- }
-
- ScalarFromArraySlotImpl(const Array& array, int64_t index)
- : array_(array), index_(index) {}
-
- const Array& array_;
- int64_t index_;
- std::shared_ptr<Scalar> out_;
-};
-
-} // namespace internal
-
-Result<std::shared_ptr<Scalar>> Array::GetScalar(int64_t i) const {
- return internal::ScalarFromArraySlotImpl{*this, i}.Finish();
-}
-
-std::string Array::Diff(const Array& other) const {
- std::stringstream diff;
- ARROW_IGNORE_EXPR(Equals(other, EqualOptions().diff_sink(&diff)));
- return diff.str();
-}
-
-bool Array::Equals(const Array& arr, const EqualOptions& opts) const {
- return ArrayEquals(*this, arr, opts);
-}
-
-bool Array::Equals(const std::shared_ptr<Array>& arr, const EqualOptions& opts) const {
- if (!arr) {
- return false;
- }
- return Equals(*arr, opts);
-}
-
-bool Array::ApproxEquals(const Array& arr, const EqualOptions& opts) const {
- return ArrayApproxEquals(*this, arr, opts);
-}
-
-bool Array::ApproxEquals(const std::shared_ptr<Array>& arr,
- const EqualOptions& opts) const {
- if (!arr) {
- return false;
- }
- return ApproxEquals(*arr, opts);
-}
-
-bool Array::RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
+ template <typename T>
+ Status Visit(const BaseBinaryArray<T>& a) {
+ return Finish(a.GetString(index_));
+ }
+
+ Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }
+
+ Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
+
+ template <typename T>
+ Status Visit(const BaseListArray<T>& a) {
+ return Finish(a.value_slice(index_));
+ }
+
+ Status Visit(const FixedSizeListArray& a) { return Finish(a.value_slice(index_)); }
+
+ Status Visit(const StructArray& a) {
+ ScalarVector children;
+ for (const auto& child : a.fields()) {
+ children.emplace_back();
+ ARROW_ASSIGN_OR_RAISE(children.back(), child->GetScalar(index_));
+ }
+ return Finish(std::move(children));
+ }
+
+ Status Visit(const SparseUnionArray& a) {
+ // child array which stores the actual value
+ auto arr = a.field(a.child_id(index_));
+ // no need to adjust the index
+ ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
+ if (value->is_valid) {
+ out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, a.type()));
+ } else {
+ out_ = MakeNullScalar(a.type());
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DenseUnionArray& a) {
+ // child array which stores the actual value
+ auto arr = a.field(a.child_id(index_));
+ // need to look up the value based on offsets
+ auto offset = a.value_offset(index_);
+ ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
+ if (value->is_valid) {
+ out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, a.type()));
+ } else {
+ out_ = MakeNullScalar(a.type());
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryArray& a) {
+ auto ty = a.type();
+
+ ARROW_ASSIGN_OR_RAISE(auto index,
+ MakeScalar(checked_cast<DictionaryType&>(*ty).index_type(),
+ a.GetValueIndex(index_)));
+
+ auto scalar = DictionaryScalar(ty);
+ scalar.is_valid = a.IsValid(index_);
+ scalar.value.index = index;
+ scalar.value.dictionary = a.dictionary();
+
+ out_ = std::make_shared<DictionaryScalar>(std::move(scalar));
+ return Status::OK();
+ }
+
+ Status Visit(const ExtensionArray& a) {
+ return Status::NotImplemented("Non-null ExtensionScalar");
+ }
+
+ template <typename Arg>
+ Status Finish(Arg&& arg) {
+ return MakeScalar(array_.type(), std::forward<Arg>(arg)).Value(&out_);
+ }
+
+ Status Finish(std::string arg) {
+ return MakeScalar(array_.type(), Buffer::FromString(std::move(arg))).Value(&out_);
+ }
+
+ Result<std::shared_ptr<Scalar>> Finish() && {
+ if (index_ >= array_.length()) {
+ return Status::IndexError("tried to refer to element ", index_,
+ " but array is only ", array_.length(), " long");
+ }
+
+ if (array_.IsNull(index_)) {
+ auto null = MakeNullScalar(array_.type());
+ if (is_dictionary(array_.type()->id())) {
+ auto& dict_null = checked_cast<DictionaryScalar&>(*null);
+ const auto& dict_array = checked_cast<const DictionaryArray&>(array_);
+ dict_null.value.dictionary = dict_array.dictionary();
+ }
+ return null;
+ }
+
+ RETURN_NOT_OK(VisitArrayInline(array_, this));
+ return std::move(out_);
+ }
+
+ ScalarFromArraySlotImpl(const Array& array, int64_t index)
+ : array_(array), index_(index) {}
+
+ const Array& array_;
+ int64_t index_;
+ std::shared_ptr<Scalar> out_;
+};
+
+} // namespace internal
+
+Result<std::shared_ptr<Scalar>> Array::GetScalar(int64_t i) const {
+ return internal::ScalarFromArraySlotImpl{*this, i}.Finish();
+}
+
+std::string Array::Diff(const Array& other) const {
+ std::stringstream diff;
+ ARROW_IGNORE_EXPR(Equals(other, EqualOptions().diff_sink(&diff)));
+ return diff.str();
+}
+
+bool Array::Equals(const Array& arr, const EqualOptions& opts) const {
+ return ArrayEquals(*this, arr, opts);
+}
+
+bool Array::Equals(const std::shared_ptr<Array>& arr, const EqualOptions& opts) const {
+ if (!arr) {
+ return false;
+ }
+ return Equals(*arr, opts);
+}
+
+bool Array::ApproxEquals(const Array& arr, const EqualOptions& opts) const {
+ return ArrayApproxEquals(*this, arr, opts);
+}
+
+bool Array::ApproxEquals(const std::shared_ptr<Array>& arr,
+ const EqualOptions& opts) const {
+ if (!arr) {
+ return false;
+ }
+ return ApproxEquals(*arr, opts);
+}
+
+bool Array::RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
int64_t other_start_idx, const EqualOptions& opts) const {
return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts);
-}
-
-bool Array::RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
+}
+
+bool Array::RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
int64_t end_idx, int64_t other_start_idx,
const EqualOptions& opts) const {
- if (!other) {
- return false;
- }
+ if (!other) {
+ return false;
+ }
return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts);
-}
-
-bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+}
+
+bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
const Array& other, const EqualOptions& opts) const {
return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts);
-}
-
-bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+}
+
+bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
const std::shared_ptr<Array>& other,
const EqualOptions& opts) const {
- if (!other) {
- return false;
- }
+ if (!other) {
+ return false;
+ }
return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts);
-}
-
-std::shared_ptr<Array> Array::Slice(int64_t offset, int64_t length) const {
- return MakeArray(data_->Slice(offset, length));
-}
-
-std::shared_ptr<Array> Array::Slice(int64_t offset) const {
- int64_t slice_length = data_->length - offset;
- return Slice(offset, slice_length);
-}
-
-Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset, int64_t length) const {
- ARROW_ASSIGN_OR_RAISE(auto sliced_data, data_->SliceSafe(offset, length));
- return MakeArray(std::move(sliced_data));
-}
-
-Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset) const {
- if (offset < 0) {
- // Avoid UBSAN in subtraction below
- return Status::Invalid("Negative buffer slice offset");
- }
- return SliceSafe(offset, data_->length - offset);
-}
-
-std::string Array::ToString() const {
- std::stringstream ss;
- ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
- return ss.str();
-}
-
-Result<std::shared_ptr<Array>> Array::View(
- const std::shared_ptr<DataType>& out_type) const {
- ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> result,
- internal::GetArrayView(data_, out_type));
- return MakeArray(result);
-}
-
-// ----------------------------------------------------------------------
-// NullArray
-
-NullArray::NullArray(int64_t length) {
- SetData(ArrayData::Make(null(), length, {nullptr}, length));
-}
-
-// ----------------------------------------------------------------------
-// Implement Array::Accept as inline visitor
-
-Status Array::Accept(ArrayVisitor* visitor) const {
- return VisitArrayInline(*this, visitor);
-}
-
-Status Array::Validate() const { return internal::ValidateArray(*this); }
-
-Status Array::ValidateFull() const {
- RETURN_NOT_OK(internal::ValidateArray(*this));
+}
+
+std::shared_ptr<Array> Array::Slice(int64_t offset, int64_t length) const {
+ return MakeArray(data_->Slice(offset, length));
+}
+
+std::shared_ptr<Array> Array::Slice(int64_t offset) const {
+ int64_t slice_length = data_->length - offset;
+ return Slice(offset, slice_length);
+}
+
+Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset, int64_t length) const {
+ ARROW_ASSIGN_OR_RAISE(auto sliced_data, data_->SliceSafe(offset, length));
+ return MakeArray(std::move(sliced_data));
+}
+
+Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset) const {
+ if (offset < 0) {
+ // Avoid UBSAN in subtraction below
+ return Status::Invalid("Negative buffer slice offset");
+ }
+ return SliceSafe(offset, data_->length - offset);
+}
+
+std::string Array::ToString() const {
+ std::stringstream ss;
+ ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
+ return ss.str();
+}
+
+Result<std::shared_ptr<Array>> Array::View(
+ const std::shared_ptr<DataType>& out_type) const {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> result,
+ internal::GetArrayView(data_, out_type));
+ return MakeArray(result);
+}
+
+// ----------------------------------------------------------------------
+// NullArray
+
+NullArray::NullArray(int64_t length) {
+ SetData(ArrayData::Make(null(), length, {nullptr}, length));
+}
+
+// ----------------------------------------------------------------------
+// Implement Array::Accept as inline visitor
+
+Status Array::Accept(ArrayVisitor* visitor) const {
+ return VisitArrayInline(*this, visitor);
+}
+
+Status Array::Validate() const { return internal::ValidateArray(*this); }
+
+Status Array::ValidateFull() const {
+ RETURN_NOT_OK(internal::ValidateArray(*this));
return internal::ValidateArrayFull(*this);
-}
-
-} // namespace arrow
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
index 2add572e7a4..a459b1f1d3f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
@@ -1,260 +1,260 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
-#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-#include "arrow/visitor.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// User array accessor types
-
-/// \brief Array base type
-/// Immutable data array with some logical type and some length.
-///
-/// Any memory is owned by the respective Buffer instance (or its parents).
-///
-/// The base class is only required to have a null bitmap buffer if the null
-/// count is greater than 0
-///
-/// If known, the null count can be provided in the base Array constructor. If
-/// the null count is not known, pass -1 to indicate that the null count is to
-/// be computed on the first call to null_count()
-class ARROW_EXPORT Array {
- public:
- virtual ~Array() = default;
-
- /// \brief Return true if value at index is null. Does not boundscheck
- bool IsNull(int64_t i) const {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+#include "arrow/visitor.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// User array accessor types
+
+/// \brief Array base type
+/// Immutable data array with some logical type and some length.
+///
+/// Any memory is owned by the respective Buffer instance (or its parents).
+///
+/// The base class is only required to have a null bitmap buffer if the null
+/// count is greater than 0
+///
+/// If known, the null count can be provided in the base Array constructor. If
+/// the null count is not known, pass -1 to indicate that the null count is to
+/// be computed on the first call to null_count()
+class ARROW_EXPORT Array {
+ public:
+ virtual ~Array() = default;
+
+ /// \brief Return true if value at index is null. Does not boundscheck
+ bool IsNull(int64_t i) const {
return null_bitmap_data_ != NULLPTR
? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
: data_->null_count == data_->length;
- }
-
- /// \brief Return true if value at index is valid (not null). Does not
- /// boundscheck
- bool IsValid(int64_t i) const {
+ }
+
+ /// \brief Return true if value at index is valid (not null). Does not
+ /// boundscheck
+ bool IsValid(int64_t i) const {
return null_bitmap_data_ != NULLPTR
? BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
: data_->null_count != data_->length;
- }
-
- /// \brief Return a Scalar containing the value of this array at i
- Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
-
- /// Size in the number of elements this array contains.
- int64_t length() const { return data_->length; }
-
- /// A relative position into another array's data, to enable zero-copy
- /// slicing. This value defaults to zero
- int64_t offset() const { return data_->offset; }
-
- /// The number of null entries in the array. If the null count was not known
- /// at time of construction (and set to a negative value), then the null
- /// count will be computed and cached on the first invocation of this
- /// function
- int64_t null_count() const;
-
- std::shared_ptr<DataType> type() const { return data_->type; }
- Type::type type_id() const { return data_->type->id(); }
-
- /// Buffer for the validity (null) bitmap, if any. Note that Union types
- /// never have a null bitmap.
- ///
- /// Note that for `null_count == 0` or for null type, this will be null.
- /// This buffer does not account for any slice offset
+ }
+
+ /// \brief Return a Scalar containing the value of this array at i
+ Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
+
+ /// Size in the number of elements this array contains.
+ int64_t length() const { return data_->length; }
+
+ /// A relative position into another array's data, to enable zero-copy
+ /// slicing. This value defaults to zero
+ int64_t offset() const { return data_->offset; }
+
+ /// The number of null entries in the array. If the null count was not known
+ /// at time of construction (and set to a negative value), then the null
+ /// count will be computed and cached on the first invocation of this
+ /// function
+ int64_t null_count() const;
+
+ std::shared_ptr<DataType> type() const { return data_->type; }
+ Type::type type_id() const { return data_->type->id(); }
+
+ /// Buffer for the validity (null) bitmap, if any. Note that Union types
+ /// never have a null bitmap.
+ ///
+ /// Note that for `null_count == 0` or for null type, this will be null.
+ /// This buffer does not account for any slice offset
const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
-
- /// Raw pointer to the null bitmap.
- ///
- /// Note that for `null_count == 0` or for null type, this will be null.
- /// This buffer does not account for any slice offset
- const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
-
- /// Equality comparison with another array
- bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
- bool Equals(const std::shared_ptr<Array>& arr,
- const EqualOptions& = EqualOptions::Defaults()) const;
-
- /// \brief Return the formatted unified diff of arrow::Diff between this
- /// Array and another Array
- std::string Diff(const Array& other) const;
-
- /// Approximate equality comparison with another array
- ///
- /// epsilon is only used if this is FloatArray or DoubleArray
- bool ApproxEquals(const std::shared_ptr<Array>& arr,
- const EqualOptions& = EqualOptions::Defaults()) const;
- bool ApproxEquals(const Array& arr,
- const EqualOptions& = EqualOptions::Defaults()) const;
-
- /// Compare if the range of slots specified are equal for the given array and
- /// this array. end_idx exclusive. This methods does not bounds check.
- bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+
+ /// Raw pointer to the null bitmap.
+ ///
+ /// Note that for `null_count == 0` or for null type, this will be null.
+ /// This buffer does not account for any slice offset
+ const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
+
+ /// Equality comparison with another array
+ bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
+ bool Equals(const std::shared_ptr<Array>& arr,
+ const EqualOptions& = EqualOptions::Defaults()) const;
+
+ /// \brief Return the formatted unified diff of arrow::Diff between this
+ /// Array and another Array
+ std::string Diff(const Array& other) const;
+
+ /// Approximate equality comparison with another array
+ ///
+ /// epsilon is only used if this is FloatArray or DoubleArray
+ bool ApproxEquals(const std::shared_ptr<Array>& arr,
+ const EqualOptions& = EqualOptions::Defaults()) const;
+ bool ApproxEquals(const Array& arr,
+ const EqualOptions& = EqualOptions::Defaults()) const;
+
+ /// Compare if the range of slots specified are equal for the given array and
+ /// this array. end_idx exclusive. This methods does not bounds check.
+ bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
const Array& other,
const EqualOptions& = EqualOptions::Defaults()) const;
- bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+ bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
const std::shared_ptr<Array>& other,
const EqualOptions& = EqualOptions::Defaults()) const;
- bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
+ bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
int64_t other_start_idx,
const EqualOptions& = EqualOptions::Defaults()) const;
- bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
+ bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
int64_t end_idx, int64_t other_start_idx,
const EqualOptions& = EqualOptions::Defaults()) const;
-
- Status Accept(ArrayVisitor* visitor) const;
-
- /// Construct a zero-copy view of this array with the given type.
- ///
- /// This method checks if the types are layout-compatible.
- /// Nested types are traversed in depth-first order. Data buffers must have
- /// the same item sizes, even though the logical types may be different.
- /// An error is returned if the types are not layout-compatible.
- Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
-
- /// Construct a zero-copy slice of the array with the indicated offset and
- /// length
- ///
- /// \param[in] offset the position of the first element in the constructed
- /// slice
- /// \param[in] length the length of the slice. If there are not enough
- /// elements in the array, the length will be adjusted accordingly
- ///
- /// \return a new object wrapped in std::shared_ptr<Array>
- std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
-
- /// Slice from offset until end of the array
- std::shared_ptr<Array> Slice(int64_t offset) const;
-
- /// Input-checking variant of Array::Slice
- Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
- /// Input-checking variant of Array::Slice
- Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
-
+
+ Status Accept(ArrayVisitor* visitor) const;
+
+ /// Construct a zero-copy view of this array with the given type.
+ ///
+ /// This method checks if the types are layout-compatible.
+ /// Nested types are traversed in depth-first order. Data buffers must have
+ /// the same item sizes, even though the logical types may be different.
+ /// An error is returned if the types are not layout-compatible.
+ Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
+
+ /// Construct a zero-copy slice of the array with the indicated offset and
+ /// length
+ ///
+ /// \param[in] offset the position of the first element in the constructed
+ /// slice
+ /// \param[in] length the length of the slice. If there are not enough
+ /// elements in the array, the length will be adjusted accordingly
+ ///
+ /// \return a new object wrapped in std::shared_ptr<Array>
+ std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
+
+ /// Slice from offset until end of the array
+ std::shared_ptr<Array> Slice(int64_t offset) const;
+
+ /// Input-checking variant of Array::Slice
+ Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
+ /// Input-checking variant of Array::Slice
+ Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
+
const std::shared_ptr<ArrayData>& data() const { return data_; }
-
- int num_fields() const { return static_cast<int>(data_->child_data.size()); }
-
- /// \return PrettyPrint representation of array suitable for debugging
- std::string ToString() const;
-
- /// \brief Perform cheap validation checks to determine obvious inconsistencies
- /// within the array's internal data.
- ///
- /// This is O(k) where k is the number of descendents.
- ///
- /// \return Status
- Status Validate() const;
-
- /// \brief Perform extensive validation checks to determine inconsistencies
- /// within the array's internal data.
- ///
- /// This is potentially O(k*n) where k is the number of descendents and n
- /// is the array length.
- ///
- /// \return Status
- Status ValidateFull() const;
-
- protected:
- Array() : null_bitmap_data_(NULLPTR) {}
-
- std::shared_ptr<ArrayData> data_;
- const uint8_t* null_bitmap_data_;
-
- /// Protected method for constructors
- void SetData(const std::shared_ptr<ArrayData>& data) {
- if (data->buffers.size() > 0) {
- null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
- } else {
- null_bitmap_data_ = NULLPTR;
- }
- data_ = data;
- }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
-};
-
-static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
- os << x.ToString();
- return os;
-}
-
-/// Base class for non-nested arrays
-class ARROW_EXPORT FlatArray : public Array {
- protected:
- using Array::Array;
-};
-
-/// Base class for arrays of fixed-size logical types
-class ARROW_EXPORT PrimitiveArray : public FlatArray {
- public:
- PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// Does not account for any slice offset
- std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
-
- protected:
- PrimitiveArray() : raw_values_(NULLPTR) {}
-
- void SetData(const std::shared_ptr<ArrayData>& data) {
- this->Array::SetData(data);
- raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
- }
-
- explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
-
- const uint8_t* raw_values_;
-};
-
-/// Degenerate null type Array
-class ARROW_EXPORT NullArray : public FlatArray {
- public:
- using TypeClass = NullType;
-
- explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
- explicit NullArray(int64_t length);
-
- private:
- void SetData(const std::shared_ptr<ArrayData>& data) {
- null_bitmap_data_ = NULLPTR;
- data->null_count = data->length;
- data_ = data;
- }
-};
-
-} // namespace arrow
+
+ int num_fields() const { return static_cast<int>(data_->child_data.size()); }
+
+ /// \return PrettyPrint representation of array suitable for debugging
+ std::string ToString() const;
+
+ /// \brief Perform cheap validation checks to determine obvious inconsistencies
+ /// within the array's internal data.
+ ///
+ /// This is O(k) where k is the number of descendents.
+ ///
+ /// \return Status
+ Status Validate() const;
+
+ /// \brief Perform extensive validation checks to determine inconsistencies
+ /// within the array's internal data.
+ ///
+ /// This is potentially O(k*n) where k is the number of descendents and n
+ /// is the array length.
+ ///
+ /// \return Status
+ Status ValidateFull() const;
+
+ protected:
+ Array() : null_bitmap_data_(NULLPTR) {}
+
+ std::shared_ptr<ArrayData> data_;
+ const uint8_t* null_bitmap_data_;
+
+ /// Protected method for constructors
+ void SetData(const std::shared_ptr<ArrayData>& data) {
+ if (data->buffers.size() > 0) {
+ null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
+ } else {
+ null_bitmap_data_ = NULLPTR;
+ }
+ data_ = data;
+ }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
+};
+
+static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
+ os << x.ToString();
+ return os;
+}
+
+/// Base class for non-nested arrays
+class ARROW_EXPORT FlatArray : public Array {
+ protected:
+ using Array::Array;
+};
+
+/// Base class for arrays of fixed-size logical types
+class ARROW_EXPORT PrimitiveArray : public FlatArray {
+ public:
+ PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// Does not account for any slice offset
+ std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
+
+ protected:
+ PrimitiveArray() : raw_values_(NULLPTR) {}
+
+ void SetData(const std::shared_ptr<ArrayData>& data) {
+ this->Array::SetData(data);
+ raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
+ }
+
+ explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+ const uint8_t* raw_values_;
+};
+
+/// Degenerate null type Array
+class ARROW_EXPORT NullArray : public FlatArray {
+ public:
+ using TypeClass = NullType;
+
+ explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+ explicit NullArray(int64_t length);
+
+ private:
+ void SetData(const std::shared_ptr<ArrayData>& data) {
+ null_bitmap_data_ = NULLPTR;
+ data->null_count = data->length;
+ data_ = data;
+ }
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
index 9466b5a48f9..396c22892f9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
@@ -1,108 +1,108 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_binary.h"
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/array/array_base.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_binary.h"
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
#include "arrow/array/validate.h"
-#include "arrow/type.h"
+#include "arrow/type.h"
#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_binary_like(data->type->id()));
- SetData(data);
-}
-
-BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
- int64_t offset) {
- SetData(ArrayData::Make(binary(), length, {null_bitmap, value_offsets, data},
- null_count, offset));
-}
-
-LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
+ SetData(data);
+}
+
+BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
+ int64_t offset) {
+ SetData(ArrayData::Make(binary(), length, {null_bitmap, value_offsets, data},
+ null_count, offset));
+}
+
+LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_large_binary_like(data->type->id()));
- SetData(data);
-}
-
-LargeBinaryArray::LargeBinaryArray(int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset) {
- SetData(ArrayData::Make(large_binary(), length, {null_bitmap, value_offsets, data},
- null_count, offset));
-}
-
-StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_EQ(data->type->id(), Type::STRING);
- SetData(data);
-}
-
-StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
- int64_t offset) {
- SetData(ArrayData::Make(utf8(), length, {null_bitmap, value_offsets, data}, null_count,
- offset));
-}
-
+ SetData(data);
+}
+
+LargeBinaryArray::LargeBinaryArray(int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset) {
+ SetData(ArrayData::Make(large_binary(), length, {null_bitmap, value_offsets, data},
+ null_count, offset));
+}
+
+StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::STRING);
+ SetData(data);
+}
+
+StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
+ int64_t offset) {
+ SetData(ArrayData::Make(utf8(), length, {null_bitmap, value_offsets, data}, null_count,
+ offset));
+}
+
Status StringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
-
-LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
- SetData(data);
-}
-
-LargeStringArray::LargeStringArray(int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset) {
- SetData(ArrayData::Make(large_utf8(), length, {null_bitmap, value_offsets, data},
- null_count, offset));
-}
-
+
+LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
+ SetData(data);
+}
+
+LargeStringArray::LargeStringArray(int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset) {
+ SetData(ArrayData::Make(large_utf8(), length, {null_bitmap, value_offsets, data},
+ null_count, offset));
+}
+
Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
-
-FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
- SetData(data);
-}
-
-FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type,
- int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset)
- : PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
- byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
-
-const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
- return raw_values_ + (i + data_->offset) * byte_width_;
-}
-
-} // namespace arrow
+
+FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
+ SetData(data);
+}
+
+FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type,
+ int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset)
+ : PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
+ byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
+
+const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
+ return raw_values_ + (i + data_->offset) * byte_width_;
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
index f8e8c4f8a44..58d631b8d50 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
@@ -1,76 +1,76 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Array accessor classes for Binary, LargeBinart, String, LargeString,
-// FixedSizeBinary
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for Binary, LargeBinart, String, LargeString,
+// FixedSizeBinary
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
#include "arrow/stl_iterator.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h" // IWYU pragma: export
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Binary and String
-
-/// Base class for variable-sized binary arrays, regardless of offset size
-/// and logical interpretation.
-template <typename TYPE>
-class BaseBinaryArray : public FlatArray {
- public:
- using TypeClass = TYPE;
- using offset_type = typename TypeClass::offset_type;
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h" // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+/// Base class for variable-sized binary arrays, regardless of offset size
+/// and logical interpretation.
+template <typename TYPE>
+class BaseBinaryArray : public FlatArray {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TypeClass::offset_type;
using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
-
- /// Return the pointer to the given elements bytes
- // XXX should GetValue(int64_t i) return a string_view?
- const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
- // Account for base offset
- i += data_->offset;
- const offset_type pos = raw_value_offsets_[i];
- *out_length = raw_value_offsets_[i + 1] - pos;
- return raw_data_ + pos;
- }
-
- /// \brief Get binary value as a string_view
- ///
- /// \param i the value index
- /// \return the view over the selected value
- util::string_view GetView(int64_t i) const {
- // Account for base offset
- i += data_->offset;
- const offset_type pos = raw_value_offsets_[i];
- return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
- raw_value_offsets_[i + 1] - pos);
- }
-
+
+ /// Return the pointer to the given elements bytes
+ // XXX should GetValue(int64_t i) return a string_view?
+ const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+ // Account for base offset
+ i += data_->offset;
+ const offset_type pos = raw_value_offsets_[i];
+ *out_length = raw_value_offsets_[i + 1] - pos;
+ return raw_data_ + pos;
+ }
+
+ /// \brief Get binary value as a string_view
+ ///
+ /// \param i the value index
+ /// \return the view over the selected value
+ util::string_view GetView(int64_t i) const {
+ // Account for base offset
+ i += data_->offset;
+ const offset_type pos = raw_value_offsets_[i];
+ return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
+ raw_value_offsets_[i + 1] - pos);
+ }
+
/// \brief Get binary value as a string_view
/// Provided for consistency with other arrays.
///
@@ -78,178 +78,178 @@ class BaseBinaryArray : public FlatArray {
/// \return the view over the selected value
util::string_view Value(int64_t i) const { return GetView(i); }
- /// \brief Get binary value as a std::string
- ///
- /// \param i the value index
- /// \return the value copied into a std::string
- std::string GetString(int64_t i) const { return std::string(GetView(i)); }
-
- /// Note that this buffer does not account for any slice offset
- std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
-
- /// Note that this buffer does not account for any slice offset
- std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
-
- const offset_type* raw_value_offsets() const {
- return raw_value_offsets_ + data_->offset;
- }
-
- const uint8_t* raw_data() const { return raw_data_; }
-
- /// \brief Return the data buffer absolute offset of the data for the value
- /// at the passed index.
- ///
- /// Does not perform boundschecking
- offset_type value_offset(int64_t i) const {
- return raw_value_offsets_[i + data_->offset];
- }
-
- /// \brief Return the length of the data for the value at the passed index.
- ///
- /// Does not perform boundschecking
- offset_type value_length(int64_t i) const {
- i += data_->offset;
- return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
- }
-
- /// \brief Return the total length of the memory in the data buffer
- /// referenced by this array. If the array has been sliced then this may be
- /// less than the size of the data buffer (data_->buffers[2]).
- offset_type total_values_length() const {
- if (data_->length > 0) {
- return raw_value_offsets_[data_->length + data_->offset] -
- raw_value_offsets_[data_->offset];
- } else {
- return 0;
- }
- }
-
+ /// \brief Get binary value as a std::string
+ ///
+ /// \param i the value index
+ /// \return the value copied into a std::string
+ std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
+
+ const offset_type* raw_value_offsets() const {
+ return raw_value_offsets_ + data_->offset;
+ }
+
+ const uint8_t* raw_data() const { return raw_data_; }
+
+ /// \brief Return the data buffer absolute offset of the data for the value
+ /// at the passed index.
+ ///
+ /// Does not perform boundschecking
+ offset_type value_offset(int64_t i) const {
+ return raw_value_offsets_[i + data_->offset];
+ }
+
+ /// \brief Return the length of the data for the value at the passed index.
+ ///
+ /// Does not perform boundschecking
+ offset_type value_length(int64_t i) const {
+ i += data_->offset;
+ return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+ }
+
+ /// \brief Return the total length of the memory in the data buffer
+ /// referenced by this array. If the array has been sliced then this may be
+ /// less than the size of the data buffer (data_->buffers[2]).
+ offset_type total_values_length() const {
+ if (data_->length > 0) {
+ return raw_value_offsets_[data_->length + data_->offset] -
+ raw_value_offsets_[data_->offset];
+ } else {
+ return 0;
+ }
+ }
+
IteratorType begin() const { return IteratorType(*this); }
IteratorType end() const { return IteratorType(*this, length()); }
- protected:
- // For subclasses
+ protected:
+ // For subclasses
BaseBinaryArray() = default;
-
- // Protected method for constructors
- void SetData(const std::shared_ptr<ArrayData>& data) {
- this->Array::SetData(data);
- raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
- raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
- }
-
+
+ // Protected method for constructors
+ void SetData(const std::shared_ptr<ArrayData>& data) {
+ this->Array::SetData(data);
+ raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
+ raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
+ }
+
const offset_type* raw_value_offsets_ = NULLPTR;
const uint8_t* raw_data_ = NULLPTR;
-};
-
-/// Concrete Array class for variable-size binary data
-class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
- public:
- explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
-
- BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- protected:
- // For subclasses such as StringArray
- BinaryArray() : BaseBinaryArray() {}
-};
-
-/// Concrete Array class for variable-size string (utf-8) data
-class ARROW_EXPORT StringArray : public BinaryArray {
- public:
- using TypeClass = StringType;
-
- explicit StringArray(const std::shared_ptr<ArrayData>& data);
-
- StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Validate that this array contains only valid UTF8 entries
- ///
- /// This check is also implied by ValidateFull()
- Status ValidateUTF8() const;
-};
-
-/// Concrete Array class for large variable-size binary data
-class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
- public:
- explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
-
- LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- protected:
- // For subclasses such as LargeStringArray
- LargeBinaryArray() : BaseBinaryArray() {}
-};
-
-/// Concrete Array class for large variable-size string (utf-8) data
-class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
- public:
- using TypeClass = LargeStringType;
-
- explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
-
- LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Validate that this array contains only valid UTF8 entries
- ///
- /// This check is also implied by ValidateFull()
- Status ValidateUTF8() const;
-};
-
-// ----------------------------------------------------------------------
-// Fixed width binary
-
-/// Concrete Array class for fixed-size binary data
-class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
- public:
- using TypeClass = FixedSizeBinaryType;
+};
+
+/// Concrete Array class for variable-size binary data
+class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
+ public:
+ explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
+
+ BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+ // For subclasses such as StringArray
+ BinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for variable-size string (utf-8) data
+class ARROW_EXPORT StringArray : public BinaryArray {
+ public:
+ using TypeClass = StringType;
+
+ explicit StringArray(const std::shared_ptr<ArrayData>& data);
+
+ StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Validate that this array contains only valid UTF8 entries
+ ///
+ /// This check is also implied by ValidateFull()
+ Status ValidateUTF8() const;
+};
+
+/// Concrete Array class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
+ public:
+ explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+ LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+ // For subclasses such as LargeStringArray
+ LargeBinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for large variable-size string (utf-8) data
+class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
+ public:
+ using TypeClass = LargeStringType;
+
+ explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
+
+ LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Validate that this array contains only valid UTF8 entries
+ ///
+ /// This check is also implied by ValidateFull()
+ Status ValidateUTF8() const;
+};
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+/// Concrete Array class for fixed-size binary data
+class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
+ public:
+ using TypeClass = FixedSizeBinaryType;
using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
-
- explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
-
- FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- const uint8_t* GetValue(int64_t i) const;
- const uint8_t* Value(int64_t i) const { return GetValue(i); }
-
- util::string_view GetView(int64_t i) const {
- return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
- }
-
- std::string GetString(int64_t i) const { return std::string(GetView(i)); }
-
- int32_t byte_width() const { return byte_width_; }
-
- const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
-
+
+ explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+ FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ const uint8_t* GetValue(int64_t i) const;
+ const uint8_t* Value(int64_t i) const { return GetValue(i); }
+
+ util::string_view GetView(int64_t i) const {
+ return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
+ }
+
+ std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+ int32_t byte_width() const { return byte_width_; }
+
+ const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
+
IteratorType begin() const { return IteratorType(*this); }
IteratorType end() const { return IteratorType(*this, length()); }
- protected:
- void SetData(const std::shared_ptr<ArrayData>& data) {
- this->PrimitiveArray::SetData(data);
- byte_width_ =
- internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
- }
-
- int32_t byte_width_;
-};
-
-} // namespace arrow
+ protected:
+ void SetData(const std::shared_ptr<ArrayData>& data) {
+ this->PrimitiveArray::SetData(data);
+ byte_width_ =
+ internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
+ }
+
+ int32_t byte_width_;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
index d65f6ee5356..408a6476484 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
@@ -1,51 +1,51 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_decimal.h"
-
-#include <cstdint>
-#include <memory>
-#include <string>
-
-#include "arrow/array/array_binary.h"
-#include "arrow/array/data.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-// ----------------------------------------------------------------------
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_decimal.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
// Decimal128
-
-Decimal128Array::Decimal128Array(const std::shared_ptr<ArrayData>& data)
- : FixedSizeBinaryArray(data) {
+
+Decimal128Array::Decimal128Array(const std::shared_ptr<ArrayData>& data)
+ : FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL128);
-}
-
-std::string Decimal128Array::FormatValue(int64_t i) const {
- const auto& type_ = checked_cast<const Decimal128Type&>(*type());
- const Decimal128 value(GetValue(i));
- return value.ToString(type_.scale());
-}
-
+}
+
+std::string Decimal128Array::FormatValue(int64_t i) const {
+ const auto& type_ = checked_cast<const Decimal128Type&>(*type());
+ const Decimal128 value(GetValue(i));
+ return value.ToString(type_.scale());
+}
+
// ----------------------------------------------------------------------
// Decimal256
@@ -60,4 +60,4 @@ std::string Decimal256Array::FormatValue(int64_t i) const {
return value.ToString(type_.scale());
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
index 8d7d1c59cd0..6aa93cc2723 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
@@ -1,52 +1,52 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-
-#include "arrow/array/array_binary.h"
-#include "arrow/array/data.h"
-#include "arrow/type.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Decimal128Array
-
-/// Concrete Array class for 128-bit decimal data
-class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
- public:
- using TypeClass = Decimal128Type;
-
- using FixedSizeBinaryArray::FixedSizeBinaryArray;
-
- /// \brief Construct Decimal128Array from ArrayData instance
- explicit Decimal128Array(const std::shared_ptr<ArrayData>& data);
-
- std::string FormatValue(int64_t i) const;
-};
-
-// Backward compatibility
-using DecimalArray = Decimal128Array;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Decimal128Array
+
+/// Concrete Array class for 128-bit decimal data
+class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
+ public:
+ using TypeClass = Decimal128Type;
+
+ using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+ /// \brief Construct Decimal128Array from ArrayData instance
+ explicit Decimal128Array(const std::shared_ptr<ArrayData>& data);
+
+ std::string FormatValue(int64_t i) const;
+};
+
+// Backward compatibility
+using DecimalArray = Decimal128Array;
+
// ----------------------------------------------------------------------
// Decimal256Array
@@ -63,4 +63,4 @@ class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
std::string FormatValue(int64_t i) const;
};
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
index 2fa95e9a176..a6f917a6a30 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
@@ -1,151 +1,151 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_dict.h"
-
-#include <algorithm>
-#include <climits>
-#include <cstdint>
-#include <limits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_primitive.h"
-#include "arrow/array/data.h"
-#include "arrow/array/dict_internal.h"
-#include "arrow/array/util.h"
-#include "arrow/buffer.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_dict.h"
+
+#include <algorithm>
+#include <climits>
+#include <cstdint>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/data.h"
+#include "arrow/array/dict_internal.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer.h"
#include "arrow/chunked_array.h"
#include "arrow/datum.h"
-#include "arrow/status.h"
+#include "arrow/status.h"
#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::CopyBitmap;
-
-// ----------------------------------------------------------------------
-// DictionaryArray
-
-std::shared_ptr<Array> DictionaryArray::indices() const { return indices_; }
-
-int64_t DictionaryArray::GetValueIndex(int64_t i) const {
- const uint8_t* indices_data = data_->buffers[1]->data();
- // If the value is non-negative then we can use the unsigned path
- switch (indices_->type_id()) {
- case Type::UINT8:
- case Type::INT8:
- return static_cast<int64_t>(indices_data[data_->offset + i]);
- case Type::UINT16:
- case Type::INT16:
- return static_cast<int64_t>(
- reinterpret_cast<const uint16_t*>(indices_data)[data_->offset + i]);
- case Type::UINT32:
- case Type::INT32:
- return static_cast<int64_t>(
- reinterpret_cast<const uint32_t*>(indices_data)[data_->offset + i]);
- case Type::UINT64:
- case Type::INT64:
- return static_cast<int64_t>(
- reinterpret_cast<const uint64_t*>(indices_data)[data_->offset + i]);
- default:
- ARROW_CHECK(false) << "unreachable";
- return -1;
- }
-}
-
-DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data)
- : dict_type_(checked_cast<const DictionaryType*>(data->type.get())) {
- ARROW_CHECK_EQ(data->type->id(), Type::DICTIONARY);
- ARROW_CHECK_NE(data->dictionary, nullptr);
- SetData(data);
-}
-
-void DictionaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
- this->Array::SetData(data);
- auto indices_data = data_->Copy();
- indices_data->type = dict_type_->index_type();
- indices_data->dictionary = nullptr;
- indices_ = MakeArray(indices_data);
-}
-
-DictionaryArray::DictionaryArray(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& indices,
- const std::shared_ptr<Array>& dictionary)
- : dict_type_(checked_cast<const DictionaryType*>(type.get())) {
- ARROW_CHECK_EQ(type->id(), Type::DICTIONARY);
- ARROW_CHECK_EQ(indices->type_id(), dict_type_->index_type()->id());
- ARROW_CHECK_EQ(dict_type_->value_type()->id(), dictionary->type()->id());
- DCHECK(dict_type_->value_type()->Equals(*dictionary->type()));
- auto data = indices->data()->Copy();
- data->type = type;
- data->dictionary = dictionary->data();
- SetData(data);
-}
-
-std::shared_ptr<Array> DictionaryArray::dictionary() const {
- if (!dictionary_) {
- dictionary_ = MakeArray(data_->dictionary);
- }
- return dictionary_;
-}
-
-Result<std::shared_ptr<Array>> DictionaryArray::FromArrays(
- const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
- const std::shared_ptr<Array>& dictionary) {
- if (type->id() != Type::DICTIONARY) {
- return Status::TypeError("Expected a dictionary type");
- }
- const auto& dict = checked_cast<const DictionaryType&>(*type);
- if (indices->type_id() != dict.index_type()->id()) {
- return Status::TypeError(
- "Dictionary type's index type does not match "
- "indices array's type");
- }
- RETURN_NOT_OK(internal::CheckIndexBounds(*indices->data(),
- static_cast<uint64_t>(dictionary->length())));
- return std::make_shared<DictionaryArray>(type, indices, dictionary);
-}
-
-bool DictionaryArray::CanCompareIndices(const DictionaryArray& other) const {
- DCHECK(dictionary()->type()->Equals(other.dictionary()->type()))
- << "dictionaries have differing type " << *dictionary()->type() << " vs "
- << *other.dictionary()->type();
-
- if (!indices()->type()->Equals(other.indices()->type())) {
- return false;
- }
-
- auto min_length = std::min(dictionary()->length(), other.dictionary()->length());
- return dictionary()->RangeEquals(other.dictionary(), 0, min_length, 0);
-}
-
-// ----------------------------------------------------------------------
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::CopyBitmap;
+
+// ----------------------------------------------------------------------
+// DictionaryArray
+
+std::shared_ptr<Array> DictionaryArray::indices() const { return indices_; }
+
+int64_t DictionaryArray::GetValueIndex(int64_t i) const {
+ const uint8_t* indices_data = data_->buffers[1]->data();
+ // If the value is non-negative then we can use the unsigned path
+ switch (indices_->type_id()) {
+ case Type::UINT8:
+ case Type::INT8:
+ return static_cast<int64_t>(indices_data[data_->offset + i]);
+ case Type::UINT16:
+ case Type::INT16:
+ return static_cast<int64_t>(
+ reinterpret_cast<const uint16_t*>(indices_data)[data_->offset + i]);
+ case Type::UINT32:
+ case Type::INT32:
+ return static_cast<int64_t>(
+ reinterpret_cast<const uint32_t*>(indices_data)[data_->offset + i]);
+ case Type::UINT64:
+ case Type::INT64:
+ return static_cast<int64_t>(
+ reinterpret_cast<const uint64_t*>(indices_data)[data_->offset + i]);
+ default:
+ ARROW_CHECK(false) << "unreachable";
+ return -1;
+ }
+}
+
+DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data)
+ : dict_type_(checked_cast<const DictionaryType*>(data->type.get())) {
+ ARROW_CHECK_EQ(data->type->id(), Type::DICTIONARY);
+ ARROW_CHECK_NE(data->dictionary, nullptr);
+ SetData(data);
+}
+
+void DictionaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ this->Array::SetData(data);
+ auto indices_data = data_->Copy();
+ indices_data->type = dict_type_->index_type();
+ indices_data->dictionary = nullptr;
+ indices_ = MakeArray(indices_data);
+}
+
+DictionaryArray::DictionaryArray(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& indices,
+ const std::shared_ptr<Array>& dictionary)
+ : dict_type_(checked_cast<const DictionaryType*>(type.get())) {
+ ARROW_CHECK_EQ(type->id(), Type::DICTIONARY);
+ ARROW_CHECK_EQ(indices->type_id(), dict_type_->index_type()->id());
+ ARROW_CHECK_EQ(dict_type_->value_type()->id(), dictionary->type()->id());
+ DCHECK(dict_type_->value_type()->Equals(*dictionary->type()));
+ auto data = indices->data()->Copy();
+ data->type = type;
+ data->dictionary = dictionary->data();
+ SetData(data);
+}
+
+std::shared_ptr<Array> DictionaryArray::dictionary() const {
+ if (!dictionary_) {
+ dictionary_ = MakeArray(data_->dictionary);
+ }
+ return dictionary_;
+}
+
+Result<std::shared_ptr<Array>> DictionaryArray::FromArrays(
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
+ const std::shared_ptr<Array>& dictionary) {
+ if (type->id() != Type::DICTIONARY) {
+ return Status::TypeError("Expected a dictionary type");
+ }
+ const auto& dict = checked_cast<const DictionaryType&>(*type);
+ if (indices->type_id() != dict.index_type()->id()) {
+ return Status::TypeError(
+ "Dictionary type's index type does not match "
+ "indices array's type");
+ }
+ RETURN_NOT_OK(internal::CheckIndexBounds(*indices->data(),
+ static_cast<uint64_t>(dictionary->length())));
+ return std::make_shared<DictionaryArray>(type, indices, dictionary);
+}
+
+bool DictionaryArray::CanCompareIndices(const DictionaryArray& other) const {
+ DCHECK(dictionary()->type()->Equals(other.dictionary()->type()))
+ << "dictionaries have differing type " << *dictionary()->type() << " vs "
+ << *other.dictionary()->type();
+
+ if (!indices()->type()->Equals(other.indices()->type())) {
+ return false;
+ }
+
+ auto min_length = std::min(dictionary()->length(), other.dictionary()->length());
+ return dictionary()->RangeEquals(other.dictionary(), 0, min_length, 0);
+}
+
+// ----------------------------------------------------------------------
// Dictionary transposition
-
+
namespace {
inline bool IsTrivialTransposition(const int32_t* transpose_map,
@@ -226,68 +226,68 @@ Result<std::shared_ptr<Array>> DictionaryArray::Transpose(
namespace {
-template <typename T>
-class DictionaryUnifierImpl : public DictionaryUnifier {
- public:
- using ArrayType = typename TypeTraits<T>::ArrayType;
- using DictTraits = typename internal::DictionaryTraits<T>;
- using MemoTableType = typename DictTraits::MemoTableType;
-
- DictionaryUnifierImpl(MemoryPool* pool, std::shared_ptr<DataType> value_type)
- : pool_(pool), value_type_(value_type), memo_table_(pool) {}
-
- Status Unify(const Array& dictionary, std::shared_ptr<Buffer>* out) override {
- if (dictionary.null_count() > 0) {
- return Status::Invalid("Cannot yet unify dictionaries with nulls");
- }
- if (!dictionary.type()->Equals(*value_type_)) {
- return Status::Invalid("Dictionary type different from unifier: ",
- dictionary.type()->ToString());
- }
- const ArrayType& values = checked_cast<const ArrayType&>(dictionary);
- if (out != nullptr) {
- ARROW_ASSIGN_OR_RAISE(auto result,
- AllocateBuffer(dictionary.length() * sizeof(int32_t), pool_));
- auto result_raw = reinterpret_cast<int32_t*>(result->mutable_data());
- for (int64_t i = 0; i < values.length(); ++i) {
- RETURN_NOT_OK(memo_table_.GetOrInsert(values.GetView(i), &result_raw[i]));
- }
- *out = std::move(result);
- } else {
- for (int64_t i = 0; i < values.length(); ++i) {
- int32_t unused_memo_index;
- RETURN_NOT_OK(memo_table_.GetOrInsert(values.GetView(i), &unused_memo_index));
- }
- }
- return Status::OK();
- }
-
- Status Unify(const Array& dictionary) override { return Unify(dictionary, nullptr); }
-
- Status GetResult(std::shared_ptr<DataType>* out_type,
- std::shared_ptr<Array>* out_dict) override {
- int64_t dict_length = memo_table_.size();
- std::shared_ptr<DataType> index_type;
- if (dict_length <= std::numeric_limits<int8_t>::max()) {
- index_type = int8();
- } else if (dict_length <= std::numeric_limits<int16_t>::max()) {
- index_type = int16();
- } else if (dict_length <= std::numeric_limits<int32_t>::max()) {
- index_type = int32();
- } else {
- index_type = int64();
- }
- // Build unified dictionary type with the right index type
- *out_type = arrow::dictionary(index_type, value_type_);
-
- // Build unified dictionary array
- std::shared_ptr<ArrayData> data;
- RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
- 0 /* start_offset */, &data));
- *out_dict = MakeArray(data);
- return Status::OK();
- }
-
+template <typename T>
+class DictionaryUnifierImpl : public DictionaryUnifier {
+ public:
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ using DictTraits = typename internal::DictionaryTraits<T>;
+ using MemoTableType = typename DictTraits::MemoTableType;
+
+ DictionaryUnifierImpl(MemoryPool* pool, std::shared_ptr<DataType> value_type)
+ : pool_(pool), value_type_(value_type), memo_table_(pool) {}
+
+ Status Unify(const Array& dictionary, std::shared_ptr<Buffer>* out) override {
+ if (dictionary.null_count() > 0) {
+ return Status::Invalid("Cannot yet unify dictionaries with nulls");
+ }
+ if (!dictionary.type()->Equals(*value_type_)) {
+ return Status::Invalid("Dictionary type different from unifier: ",
+ dictionary.type()->ToString());
+ }
+ const ArrayType& values = checked_cast<const ArrayType&>(dictionary);
+ if (out != nullptr) {
+ ARROW_ASSIGN_OR_RAISE(auto result,
+ AllocateBuffer(dictionary.length() * sizeof(int32_t), pool_));
+ auto result_raw = reinterpret_cast<int32_t*>(result->mutable_data());
+ for (int64_t i = 0; i < values.length(); ++i) {
+ RETURN_NOT_OK(memo_table_.GetOrInsert(values.GetView(i), &result_raw[i]));
+ }
+ *out = std::move(result);
+ } else {
+ for (int64_t i = 0; i < values.length(); ++i) {
+ int32_t unused_memo_index;
+ RETURN_NOT_OK(memo_table_.GetOrInsert(values.GetView(i), &unused_memo_index));
+ }
+ }
+ return Status::OK();
+ }
+
+ Status Unify(const Array& dictionary) override { return Unify(dictionary, nullptr); }
+
+ Status GetResult(std::shared_ptr<DataType>* out_type,
+ std::shared_ptr<Array>* out_dict) override {
+ int64_t dict_length = memo_table_.size();
+ std::shared_ptr<DataType> index_type;
+ if (dict_length <= std::numeric_limits<int8_t>::max()) {
+ index_type = int8();
+ } else if (dict_length <= std::numeric_limits<int16_t>::max()) {
+ index_type = int16();
+ } else if (dict_length <= std::numeric_limits<int32_t>::max()) {
+ index_type = int32();
+ } else {
+ index_type = int64();
+ }
+ // Build unified dictionary type with the right index type
+ *out_type = arrow::dictionary(index_type, value_type_);
+
+ // Build unified dictionary array
+ std::shared_ptr<ArrayData> data;
+ RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
+ 0 /* start_offset */, &data));
+ *out_dict = MakeArray(data);
+ return Status::OK();
+ }
+
Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
std::shared_ptr<Array>* out_dict) override {
int64_t dict_length = memo_table_.size();
@@ -305,48 +305,48 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
return Status::OK();
}
- private:
- MemoryPool* pool_;
- std::shared_ptr<DataType> value_type_;
- MemoTableType memo_table_;
-};
-
-struct MakeUnifier {
- MemoryPool* pool;
- std::shared_ptr<DataType> value_type;
- std::unique_ptr<DictionaryUnifier> result;
-
- MakeUnifier(MemoryPool* pool, std::shared_ptr<DataType> value_type)
- : pool(pool), value_type(value_type) {}
-
- template <typename T>
- enable_if_no_memoize<T, Status> Visit(const T&) {
- // Default implementation for non-dictionary-supported datatypes
+ private:
+ MemoryPool* pool_;
+ std::shared_ptr<DataType> value_type_;
+ MemoTableType memo_table_;
+};
+
+struct MakeUnifier {
+ MemoryPool* pool;
+ std::shared_ptr<DataType> value_type;
+ std::unique_ptr<DictionaryUnifier> result;
+
+ MakeUnifier(MemoryPool* pool, std::shared_ptr<DataType> value_type)
+ : pool(pool), value_type(value_type) {}
+
+ template <typename T>
+ enable_if_no_memoize<T, Status> Visit(const T&) {
+ // Default implementation for non-dictionary-supported datatypes
return Status::NotImplemented("Unification of ", *value_type,
- " dictionaries is not implemented");
- }
-
- template <typename T>
- enable_if_memoize<T, Status> Visit(const T&) {
- result.reset(new DictionaryUnifierImpl<T>(pool, value_type));
- return Status::OK();
- }
-};
-
+ " dictionaries is not implemented");
+ }
+
+ template <typename T>
+ enable_if_memoize<T, Status> Visit(const T&) {
+ result.reset(new DictionaryUnifierImpl<T>(pool, value_type));
+ return Status::OK();
+ }
+};
+
struct RecursiveUnifier {
MemoryPool* pool;
-
+
// Return true if any of the arrays was changed (including descendents)
Result<bool> Unify(std::shared_ptr<DataType> type, ArrayDataVector* chunks) {
DCHECK(!chunks->empty());
bool changed = false;
std::shared_ptr<DataType> ext_type = nullptr;
-
+
if (type->id() == Type::EXTENSION) {
ext_type = std::move(type);
type = checked_cast<const ExtensionType&>(*ext_type).storage_type();
}
-
+
// Unify all child dictionaries (if any)
if (type->num_fields() > 0) {
ArrayDataVector children(chunks->size());
@@ -365,8 +365,8 @@ struct RecursiveUnifier {
changed = true;
}
}
- }
-
+ }
+
// Unify this dictionary
if (type->id() == Type::DICTIONARY) {
const auto& dict_type = checked_cast<const DictionaryType&>(*type);
@@ -396,26 +396,26 @@ struct RecursiveUnifier {
}
changed = true;
}
-
+
return changed;
- }
+ }
};
-
+
} // namespace
-
+
Result<std::unique_ptr<DictionaryUnifier>> DictionaryUnifier::Make(
std::shared_ptr<DataType> value_type, MemoryPool* pool) {
MakeUnifier maker(pool, value_type);
RETURN_NOT_OK(VisitTypeInline(*value_type, &maker));
return std::move(maker.result);
}
-
+
Result<std::shared_ptr<ChunkedArray>> DictionaryUnifier::UnifyChunkedArray(
const std::shared_ptr<ChunkedArray>& array, MemoryPool* pool) {
if (array->num_chunks() <= 1) {
return array;
- }
-
+ }
+
ArrayDataVector data_chunks(array->num_chunks());
std::transform(array->chunks().begin(), array->chunks().end(), data_chunks.begin(),
[](const std::shared_ptr<Array>& array) { return array->data(); });
@@ -423,20 +423,20 @@ Result<std::shared_ptr<ChunkedArray>> DictionaryUnifier::UnifyChunkedArray(
RecursiveUnifier{pool}.Unify(array->type(), &data_chunks));
if (!changed) {
return array;
- }
+ }
ArrayVector chunks(array->num_chunks());
std::transform(data_chunks.begin(), data_chunks.end(), chunks.begin(),
[](const std::shared_ptr<ArrayData>& data) { return MakeArray(data); });
return std::make_shared<ChunkedArray>(std::move(chunks), array->type());
}
-
+
Result<std::shared_ptr<Table>> DictionaryUnifier::UnifyTable(const Table& table,
MemoryPool* pool) {
ChunkedArrayVector columns = table.columns();
for (auto& col : columns) {
ARROW_ASSIGN_OR_RAISE(col, DictionaryUnifier::UnifyChunkedArray(col, pool));
- }
+ }
return Table::Make(table.schema(), std::move(columns), table.num_rows());
-}
-
-} // namespace arrow
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
index 8791eaa07db..ee7bc551436 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
@@ -1,125 +1,125 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// DictionaryArray
-
-/// \brief Array type for dictionary-encoded data with a
-/// data-dependent dictionary
-///
-/// A dictionary array contains an array of non-negative integers (the
-/// "dictionary indices") along with a data type containing a "dictionary"
-/// corresponding to the distinct values represented in the data.
-///
-/// For example, the array
-///
-/// ["foo", "bar", "foo", "bar", "foo", "bar"]
-///
-/// with dictionary ["bar", "foo"], would have dictionary array representation
-///
-/// indices: [1, 0, 1, 0, 1, 0]
-/// dictionary: ["bar", "foo"]
-///
-/// The indices in principle may be any integer type.
-class ARROW_EXPORT DictionaryArray : public Array {
- public:
- using TypeClass = DictionaryType;
-
- explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
-
- DictionaryArray(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& indices,
- const std::shared_ptr<Array>& dictionary);
-
- /// \brief Construct DictionaryArray from dictionary and indices
- /// array and validate
- ///
- /// This function does the validation of the indices and input type. It checks if
- /// all indices are non-negative and smaller than the size of the dictionary.
- ///
- /// \param[in] type a dictionary type
- /// \param[in] dictionary the dictionary with same value type as the
- /// type object
- /// \param[in] indices an array of non-negative integers smaller than the
- /// size of the dictionary
- static Result<std::shared_ptr<Array>> FromArrays(
- const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
- const std::shared_ptr<Array>& dictionary);
-
- static Result<std::shared_ptr<Array>> FromArrays(
- const std::shared_ptr<Array>& indices, const std::shared_ptr<Array>& dictionary) {
- return FromArrays(::arrow::dictionary(indices->type(), dictionary->type()), indices,
- dictionary);
- }
-
- /// \brief Transpose this DictionaryArray
- ///
- /// This method constructs a new dictionary array with the given dictionary
- /// type, transposing indices using the transpose map. The type and the
- /// transpose map are typically computed using DictionaryUnifier.
- ///
- /// \param[in] type the new type object
- /// \param[in] dictionary the new dictionary
- /// \param[in] transpose_map transposition array of this array's indices
- /// into the target array's indices
- /// \param[in] pool a pool to allocate the array data from
- Result<std::shared_ptr<Array>> Transpose(
- const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
- const int32_t* transpose_map, MemoryPool* pool = default_memory_pool()) const;
-
- /// \brief Determine whether dictionary arrays may be compared without unification
- bool CanCompareIndices(const DictionaryArray& other) const;
-
- /// \brief Return the dictionary for this array, which is stored as
- /// a member of the ArrayData internal structure
- std::shared_ptr<Array> dictionary() const;
- std::shared_ptr<Array> indices() const;
-
- /// \brief Return the ith value of indices, cast to int64_t. Not recommended
- /// for use in performance-sensitive code. Does not validate whether the
- /// value is null or out-of-bounds.
- int64_t GetValueIndex(int64_t i) const;
-
- const DictionaryType* dict_type() const { return dict_type_; }
-
- private:
- void SetData(const std::shared_ptr<ArrayData>& data);
- const DictionaryType* dict_type_;
- std::shared_ptr<Array> indices_;
-
- // Lazily initialized when invoking dictionary()
- mutable std::shared_ptr<Array> dictionary_;
-};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// DictionaryArray
+
+/// \brief Array type for dictionary-encoded data with a
+/// data-dependent dictionary
+///
+/// A dictionary array contains an array of non-negative integers (the
+/// "dictionary indices") along with a data type containing a "dictionary"
+/// corresponding to the distinct values represented in the data.
+///
+/// For example, the array
+///
+/// ["foo", "bar", "foo", "bar", "foo", "bar"]
+///
+/// with dictionary ["bar", "foo"], would have dictionary array representation
+///
+/// indices: [1, 0, 1, 0, 1, 0]
+/// dictionary: ["bar", "foo"]
+///
+/// The indices in principle may be any integer type.
+class ARROW_EXPORT DictionaryArray : public Array {
+ public:
+ using TypeClass = DictionaryType;
+
+ explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
+
+ DictionaryArray(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& indices,
+ const std::shared_ptr<Array>& dictionary);
+
+ /// \brief Construct DictionaryArray from dictionary and indices
+ /// array and validate
+ ///
+ /// This function does the validation of the indices and input type. It checks if
+ /// all indices are non-negative and smaller than the size of the dictionary.
+ ///
+ /// \param[in] type a dictionary type
+ /// \param[in] dictionary the dictionary with same value type as the
+ /// type object
+ /// \param[in] indices an array of non-negative integers smaller than the
+ /// size of the dictionary
+ static Result<std::shared_ptr<Array>> FromArrays(
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
+ const std::shared_ptr<Array>& dictionary);
+
+ static Result<std::shared_ptr<Array>> FromArrays(
+ const std::shared_ptr<Array>& indices, const std::shared_ptr<Array>& dictionary) {
+ return FromArrays(::arrow::dictionary(indices->type(), dictionary->type()), indices,
+ dictionary);
+ }
+
+ /// \brief Transpose this DictionaryArray
+ ///
+ /// This method constructs a new dictionary array with the given dictionary
+ /// type, transposing indices using the transpose map. The type and the
+ /// transpose map are typically computed using DictionaryUnifier.
+ ///
+ /// \param[in] type the new type object
+ /// \param[in] dictionary the new dictionary
+ /// \param[in] transpose_map transposition array of this array's indices
+ /// into the target array's indices
+ /// \param[in] pool a pool to allocate the array data from
+ Result<std::shared_ptr<Array>> Transpose(
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+ const int32_t* transpose_map, MemoryPool* pool = default_memory_pool()) const;
+
+ /// \brief Determine whether dictionary arrays may be compared without unification
+ bool CanCompareIndices(const DictionaryArray& other) const;
+
+ /// \brief Return the dictionary for this array, which is stored as
+ /// a member of the ArrayData internal structure
+ std::shared_ptr<Array> dictionary() const;
+ std::shared_ptr<Array> indices() const;
+
+ /// \brief Return the ith value of indices, cast to int64_t. Not recommended
+ /// for use in performance-sensitive code. Does not validate whether the
+ /// value is null or out-of-bounds.
+ int64_t GetValueIndex(int64_t i) const;
+
+ const DictionaryType* dict_type() const { return dict_type_; }
+
+ private:
+ void SetData(const std::shared_ptr<ArrayData>& data);
+ const DictionaryType* dict_type_;
+ std::shared_ptr<Array> indices_;
+
+ // Lazily initialized when invoking dictionary()
+ mutable std::shared_ptr<Array> dictionary_;
+};
+
/// \brief Helper class for incremental dictionary unification
class ARROW_EXPORT DictionaryUnifier {
public:
@@ -177,4 +177,4 @@ class ARROW_EXPORT DictionaryUnifier {
std::shared_ptr<Array>* out_dict) = 0;
};
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
index f967127c5f1..816c2d0745f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
@@ -1,757 +1,757 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_nested.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_primitive.h"
-#include "arrow/array/concatenate.h"
-#include "arrow/array/util.h"
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/atomic_shared_ptr.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::BitmapAnd;
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-using internal::CopyBitmap;
-
-// ----------------------------------------------------------------------
-// ListArray / LargeListArray
-
-namespace {
-
-template <typename TYPE>
-Status CleanListOffsets(const Array& offsets, MemoryPool* pool,
- std::shared_ptr<Buffer>* offset_buf_out,
- std::shared_ptr<Buffer>* validity_buf_out) {
- using offset_type = typename TYPE::offset_type;
- using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
- using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
-
- const auto& typed_offsets = checked_cast<const OffsetArrayType&>(offsets);
- const int64_t num_offsets = offsets.length();
-
- if (offsets.null_count() > 0) {
- if (!offsets.IsValid(num_offsets - 1)) {
- return Status::Invalid("Last list offset should be non-null");
- }
-
- ARROW_ASSIGN_OR_RAISE(auto clean_offsets,
- AllocateBuffer(num_offsets * sizeof(offset_type), pool));
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_nested.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::BitmapAnd;
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+using internal::CopyBitmap;
+
+// ----------------------------------------------------------------------
+// ListArray / LargeListArray
+
+namespace {
+
+template <typename TYPE>
+Status CleanListOffsets(const Array& offsets, MemoryPool* pool,
+ std::shared_ptr<Buffer>* offset_buf_out,
+ std::shared_ptr<Buffer>* validity_buf_out) {
+ using offset_type = typename TYPE::offset_type;
+ using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+ using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
+
+ const auto& typed_offsets = checked_cast<const OffsetArrayType&>(offsets);
+ const int64_t num_offsets = offsets.length();
+
+ if (offsets.null_count() > 0) {
+ if (!offsets.IsValid(num_offsets - 1)) {
+ return Status::Invalid("Last list offset should be non-null");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto clean_offsets,
+ AllocateBuffer(num_offsets * sizeof(offset_type), pool));
+
// Copy valid bits, ignoring the final offset (since for a length N list array,
// we have N + 1 offsets)
- ARROW_ASSIGN_OR_RAISE(
- auto clean_valid_bits,
- offsets.null_bitmap()->CopySlice(0, BitUtil::BytesForBits(num_offsets - 1)));
- *validity_buf_out = clean_valid_bits;
-
- const offset_type* raw_offsets = typed_offsets.raw_values();
- auto clean_raw_offsets =
- reinterpret_cast<offset_type*>(clean_offsets->mutable_data());
-
- // Must work backwards so we can tell how many values were in the last non-null value
- offset_type current_offset = raw_offsets[num_offsets - 1];
- for (int64_t i = num_offsets - 1; i >= 0; --i) {
- if (offsets.IsValid(i)) {
- current_offset = raw_offsets[i];
- }
- clean_raw_offsets[i] = current_offset;
- }
-
- *offset_buf_out = std::move(clean_offsets);
- } else {
- *validity_buf_out = offsets.null_bitmap();
- *offset_buf_out = typed_offsets.values();
- }
-
- return Status::OK();
-}
-
-template <typename TYPE>
-Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>> ListArrayFromArrays(
- const Array& offsets, const Array& values, MemoryPool* pool) {
- using offset_type = typename TYPE::offset_type;
- using ArrayType = typename TypeTraits<TYPE>::ArrayType;
- using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
-
- if (offsets.length() == 0) {
- return Status::Invalid("List offsets must have non-zero length");
- }
-
- if (offsets.type_id() != OffsetArrowType::type_id) {
- return Status::TypeError("List offsets must be ", OffsetArrowType::type_name());
- }
-
- std::shared_ptr<Buffer> offset_buf, validity_buf;
- RETURN_NOT_OK(CleanListOffsets<TYPE>(offsets, pool, &offset_buf, &validity_buf));
- BufferVector buffers = {validity_buf, offset_buf};
-
- auto list_type = std::make_shared<TYPE>(values.type());
- auto internal_data =
- ArrayData::Make(list_type, offsets.length() - 1, std::move(buffers),
- offsets.null_count(), offsets.offset());
- internal_data->child_data.push_back(values.data());
-
- return std::make_shared<ArrayType>(internal_data);
-}
-
-static std::shared_ptr<Array> SliceArrayWithOffsets(const Array& array, int64_t begin,
- int64_t end) {
- return array.Slice(begin, end - begin);
-}
-
-template <typename ListArrayT>
-Result<std::shared_ptr<Array>> FlattenListArray(const ListArrayT& list_array,
- MemoryPool* memory_pool) {
- const int64_t list_array_length = list_array.length();
- std::shared_ptr<arrow::Array> value_array = list_array.values();
-
- // Shortcut: if a ListArray does not contain nulls, then simply slice its
- // value array with the first and the last offsets.
- if (list_array.null_count() == 0) {
- return SliceArrayWithOffsets(*value_array, list_array.value_offset(0),
- list_array.value_offset(list_array_length));
- }
-
- // The ListArray contains nulls: there may be a non-empty sub-list behind
- // a null and it must not be contained in the result.
- std::vector<std::shared_ptr<Array>> non_null_fragments;
- int64_t valid_begin = 0;
- while (valid_begin < list_array_length) {
- int64_t valid_end = valid_begin;
- while (valid_end < list_array_length &&
- (list_array.IsValid(valid_end) || list_array.value_length(valid_end) == 0)) {
- ++valid_end;
- }
- if (valid_begin < valid_end) {
- non_null_fragments.push_back(
- SliceArrayWithOffsets(*value_array, list_array.value_offset(valid_begin),
- list_array.value_offset(valid_end)));
- }
- valid_begin = valid_end + 1; // skip null entry
- }
-
- // Final attempt to avoid invoking Concatenate().
- if (non_null_fragments.size() == 1) {
- return non_null_fragments[0];
- }
-
- return Concatenate(non_null_fragments, memory_pool);
-}
-
-} // namespace
-
-namespace internal {
-
-template <typename TYPE>
-inline void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
- Type::type expected_type_id) {
- ARROW_CHECK_EQ(data->buffers.size(), 2);
- ARROW_CHECK_EQ(data->type->id(), expected_type_id);
- ARROW_CHECK_EQ(data->child_data.size(), 1);
-
- self->Array::SetData(data);
-
- self->list_type_ = checked_cast<const TYPE*>(data->type.get());
- self->raw_value_offsets_ =
- data->GetValuesSafe<typename TYPE::offset_type>(1, /*offset=*/0);
-
- ARROW_CHECK_EQ(self->list_type_->value_type()->id(), data->child_data[0]->type->id());
- DCHECK(self->list_type_->value_type()->Equals(data->child_data[0]->type));
- self->values_ = MakeArray(self->data_->child_data[0]);
-}
-
-} // namespace internal
-
-ListArray::ListArray(std::shared_ptr<ArrayData> data) { SetData(std::move(data)); }
-
-LargeListArray::LargeListArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
-
-ListArray::ListArray(std::shared_ptr<DataType> type, int64_t length,
- std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
- std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
- int64_t offset) {
- ARROW_CHECK_EQ(type->id(), Type::LIST);
- auto internal_data = ArrayData::Make(
- std::move(type), length,
- BufferVector{std::move(null_bitmap), std::move(value_offsets)}, null_count, offset);
- internal_data->child_data.emplace_back(values->data());
- SetData(std::move(internal_data));
-}
-
-void ListArray::SetData(const std::shared_ptr<ArrayData>& data) {
- internal::SetListData(this, data);
-}
-
-LargeListArray::LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset) {
- ARROW_CHECK_EQ(type->id(), Type::LARGE_LIST);
- auto internal_data =
- ArrayData::Make(type, length, {null_bitmap, value_offsets}, null_count, offset);
- internal_data->child_data.emplace_back(values->data());
- SetData(internal_data);
-}
-
-void LargeListArray::SetData(const std::shared_ptr<ArrayData>& data) {
- internal::SetListData(this, data);
-}
-
-Result<std::shared_ptr<ListArray>> ListArray::FromArrays(const Array& offsets,
- const Array& values,
- MemoryPool* pool) {
- return ListArrayFromArrays<ListType>(offsets, values, pool);
-}
-
-Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(const Array& offsets,
- const Array& values,
- MemoryPool* pool) {
- return ListArrayFromArrays<LargeListType>(offsets, values, pool);
-}
-
-Result<std::shared_ptr<Array>> ListArray::Flatten(MemoryPool* memory_pool) const {
- return FlattenListArray(*this, memory_pool);
-}
-
-Result<std::shared_ptr<Array>> LargeListArray::Flatten(MemoryPool* memory_pool) const {
- return FlattenListArray(*this, memory_pool);
-}
-
-static std::shared_ptr<Array> BoxOffsets(const std::shared_ptr<DataType>& boxed_type,
- const ArrayData& data) {
- std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, data.buffers[1]};
- auto offsets_data =
- std::make_shared<ArrayData>(boxed_type, data.length + 1, std::move(buffers),
- /*null_count=*/0, data.offset);
- return MakeArray(offsets_data);
-}
-
-std::shared_ptr<Array> ListArray::offsets() const { return BoxOffsets(int32(), *data_); }
-
-std::shared_ptr<Array> LargeListArray::offsets() const {
- return BoxOffsets(int64(), *data_);
-}
-
-// ----------------------------------------------------------------------
-// MapArray
-
-MapArray::MapArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
-
-MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& offsets,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
- int64_t offset) {
- SetData(ArrayData::Make(type, length, {null_bitmap, offsets}, {values->data()},
- null_count, offset));
-}
-
-MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& offsets,
- const std::shared_ptr<Array>& keys,
- const std::shared_ptr<Array>& items,
- const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
- int64_t offset) {
- auto pair_data = ArrayData::Make(type->fields()[0]->type(), keys->data()->length,
- {nullptr}, {keys->data(), items->data()}, 0, offset);
- auto map_data = ArrayData::Make(type, length, {null_bitmap, offsets}, {pair_data},
- null_count, offset);
- SetData(map_data);
-}
-
-Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
- std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool) {
- using offset_type = typename MapType::offset_type;
- using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
-
- if (offsets->length() == 0) {
- return Status::Invalid("Map offsets must have non-zero length");
- }
-
- if (offsets->type_id() != OffsetArrowType::type_id) {
- return Status::TypeError("Map offsets must be ", OffsetArrowType::type_name());
- }
-
- if (keys->null_count() != 0) {
- return Status::Invalid("Map can not contain NULL valued keys");
- }
-
- if (keys->length() != items->length()) {
- return Status::Invalid("Map key and item arrays must be equal length");
- }
-
- std::shared_ptr<Buffer> offset_buf, validity_buf;
- RETURN_NOT_OK(CleanListOffsets<MapType>(*offsets, pool, &offset_buf, &validity_buf));
-
- return std::make_shared<MapArray>(type, offsets->length() - 1, offset_buf, keys, items,
- validity_buf, offsets->null_count(),
- offsets->offset());
-}
-
-Result<std::shared_ptr<Array>> MapArray::FromArrays(const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys,
- const std::shared_ptr<Array>& items,
- MemoryPool* pool) {
- return FromArraysInternal(std::make_shared<MapType>(keys->type(), items->type()),
- offsets, keys, items, pool);
-}
-
-Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> type,
- const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys,
- const std::shared_ptr<Array>& items,
- MemoryPool* pool) {
- if (type->id() != Type::MAP) {
- return Status::TypeError("Expected map type, got ", type->ToString());
- }
- const auto& map_type = checked_cast<const MapType&>(*type);
- if (!map_type.key_type()->Equals(keys->type())) {
- return Status::TypeError("Mismatching map keys type");
- }
- if (!map_type.item_type()->Equals(items->type())) {
- return Status::TypeError("Mismatching map items type");
- }
- return FromArraysInternal(std::move(type), offsets, keys, items, pool);
-}
-
-Status MapArray::ValidateChildData(
- const std::vector<std::shared_ptr<ArrayData>>& child_data) {
- if (child_data.size() != 1) {
- return Status::Invalid("Expected one child array for map array");
- }
- const auto& pair_data = child_data[0];
- if (pair_data->type->id() != Type::STRUCT) {
- return Status::Invalid("Map array child array should have struct type");
- }
- if (pair_data->null_count != 0) {
- return Status::Invalid("Map array child array should have no nulls");
- }
- if (pair_data->child_data.size() != 2) {
- return Status::Invalid("Map array child array should have two fields");
- }
- if (pair_data->child_data[0]->null_count != 0) {
- return Status::Invalid("Map array keys array should have no nulls");
- }
- return Status::OK();
-}
-
-void MapArray::SetData(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_OK(ValidateChildData(data->child_data));
-
- internal::SetListData(this, data, Type::MAP);
- map_type_ = checked_cast<const MapType*>(data->type.get());
- const auto& pair_data = data->child_data[0];
- keys_ = MakeArray(pair_data->child_data[0]);
- items_ = MakeArray(pair_data->child_data[1]);
-}
-
-// ----------------------------------------------------------------------
-// FixedSizeListArray
-
-FixedSizeListArray::FixedSizeListArray(const std::shared_ptr<ArrayData>& data) {
- SetData(data);
-}
-
-FixedSizeListArray::FixedSizeListArray(const std::shared_ptr<DataType>& type,
- int64_t length,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset) {
- auto internal_data = ArrayData::Make(type, length, {null_bitmap}, null_count, offset);
- internal_data->child_data.emplace_back(values->data());
- SetData(internal_data);
-}
-
-void FixedSizeListArray::SetData(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_EQ(data->type->id(), Type::FIXED_SIZE_LIST);
- this->Array::SetData(data);
-
- ARROW_CHECK_EQ(list_type()->value_type()->id(), data->child_data[0]->type->id());
- DCHECK(list_type()->value_type()->Equals(data->child_data[0]->type));
- list_size_ = list_type()->list_size();
-
- ARROW_CHECK_EQ(data_->child_data.size(), 1);
- values_ = MakeArray(data_->child_data[0]);
-}
-
-const FixedSizeListType* FixedSizeListArray::list_type() const {
- return checked_cast<const FixedSizeListType*>(data_->type.get());
-}
-
-std::shared_ptr<DataType> FixedSizeListArray::value_type() const {
- return list_type()->value_type();
-}
-
-std::shared_ptr<Array> FixedSizeListArray::values() const { return values_; }
-
-Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
- const std::shared_ptr<Array>& values, int32_t list_size) {
- if (list_size <= 0) {
- return Status::Invalid("list_size needs to be a strict positive integer");
- }
-
- if ((values->length() % list_size) != 0) {
- return Status::Invalid(
- "The length of the values Array needs to be a multiple of the list_size");
- }
- int64_t length = values->length() / list_size;
- auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
- std::shared_ptr<Buffer> validity_buf;
-
- return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
- /*null_count=*/0, /*offset=*/0);
-}
-
-// ----------------------------------------------------------------------
-// Struct
-
-StructArray::StructArray(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_EQ(data->type->id(), Type::STRUCT);
- SetData(data);
- boxed_fields_.resize(data->child_data.size());
-}
-
-StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::vector<std::shared_ptr<Array>>& children,
- std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
- int64_t offset) {
- ARROW_CHECK_EQ(type->id(), Type::STRUCT);
- SetData(ArrayData::Make(type, length, {null_bitmap}, null_count, offset));
- for (const auto& child : children) {
- data_->child_data.push_back(child->data());
- }
- boxed_fields_.resize(children.size());
-}
-
-Result<std::shared_ptr<StructArray>> StructArray::Make(
- const std::vector<std::shared_ptr<Array>>& children,
- const std::vector<std::shared_ptr<Field>>& fields,
- std::shared_ptr<Buffer> null_bitmap, int64_t null_count, int64_t offset) {
- if (children.size() != fields.size()) {
- return Status::Invalid("Mismatching number of fields and child arrays");
- }
- int64_t length = 0;
- if (children.size() == 0) {
- return Status::Invalid("Can't infer struct array length with 0 child arrays");
- }
- length = children.front()->length();
- for (const auto& child : children) {
- if (length != child->length()) {
- return Status::Invalid("Mismatching child array lengths");
- }
- }
- if (offset > length) {
- return Status::IndexError("Offset greater than length of child arrays");
- }
- if (null_bitmap == nullptr) {
- if (null_count > 0) {
- return Status::Invalid("null_count = ", null_count, " but no null bitmap given");
- }
- null_count = 0;
- }
- return std::make_shared<StructArray>(struct_(fields), length - offset, children,
- null_bitmap, null_count, offset);
-}
-
-Result<std::shared_ptr<StructArray>> StructArray::Make(
- const std::vector<std::shared_ptr<Array>>& children,
- const std::vector<std::string>& field_names, std::shared_ptr<Buffer> null_bitmap,
- int64_t null_count, int64_t offset) {
- if (children.size() != field_names.size()) {
- return Status::Invalid("Mismatching number of field names and child arrays");
- }
- std::vector<std::shared_ptr<Field>> fields(children.size());
- for (size_t i = 0; i < children.size(); ++i) {
- fields[i] = ::arrow::field(field_names[i], children[i]->type());
- }
- return Make(children, fields, std::move(null_bitmap), null_count, offset);
-}
-
-const StructType* StructArray::struct_type() const {
- return checked_cast<const StructType*>(data_->type.get());
-}
-
-const ArrayVector& StructArray::fields() const {
- for (int i = 0; i < num_fields(); ++i) {
- (void)field(i);
- }
- return boxed_fields_;
-}
-
-std::shared_ptr<Array> StructArray::field(int i) const {
- std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
- if (!result) {
- std::shared_ptr<ArrayData> field_data;
- if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
- field_data = data_->child_data[i]->Slice(data_->offset, data_->length);
- } else {
- field_data = data_->child_data[i];
- }
- result = MakeArray(field_data);
- internal::atomic_store(&boxed_fields_[i], result);
- }
- return result;
-}
-
-std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
- int i = struct_type()->GetFieldIndex(name);
- return i == -1 ? nullptr : field(i);
-}
-
-Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
- ArrayVector flattened;
- flattened.reserve(data_->child_data.size());
- std::shared_ptr<Buffer> null_bitmap = data_->buffers[0];
-
- for (const auto& child_data_ptr : data_->child_data) {
- auto child_data = child_data_ptr->Copy();
-
- std::shared_ptr<Buffer> flattened_null_bitmap;
- int64_t flattened_null_count = kUnknownNullCount;
-
- // Need to adjust for parent offset
- if (data_->offset != 0 || data_->length != child_data->length) {
- child_data = child_data->Slice(data_->offset, data_->length);
- }
- std::shared_ptr<Buffer> child_null_bitmap = child_data->buffers[0];
- const int64_t child_offset = child_data->offset;
-
- // The validity of a flattened datum is the logical AND of the struct
- // element's validity and the individual field element's validity.
- if (null_bitmap && child_null_bitmap) {
- ARROW_ASSIGN_OR_RAISE(
- flattened_null_bitmap,
- BitmapAnd(pool, child_null_bitmap->data(), child_offset, null_bitmap_data_,
- data_->offset, data_->length, child_offset));
- } else if (child_null_bitmap) {
- flattened_null_bitmap = child_null_bitmap;
- flattened_null_count = child_data->null_count;
- } else if (null_bitmap) {
- if (child_offset == data_->offset) {
- flattened_null_bitmap = null_bitmap;
- } else {
- ARROW_ASSIGN_OR_RAISE(
- flattened_null_bitmap,
- CopyBitmap(pool, null_bitmap_data_, data_->offset, data_->length));
- }
- flattened_null_count = data_->null_count;
- } else {
- flattened_null_count = 0;
- }
-
- auto flattened_data = child_data->Copy();
- flattened_data->buffers[0] = flattened_null_bitmap;
- flattened_data->null_count = flattened_null_count;
-
- flattened.push_back(MakeArray(flattened_data));
- }
-
- return flattened;
-}
-
-// ----------------------------------------------------------------------
-// UnionArray
-
-void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
- this->Array::SetData(std::move(data));
-
- union_type_ = checked_cast<const UnionType*>(data_->type.get());
-
- ARROW_CHECK_GE(data_->buffers.size(), 2);
- raw_type_codes_ = data->GetValuesSafe<int8_t>(1, /*offset=*/0);
- boxed_fields_.resize(data_->child_data.size());
-}
-
-void SparseUnionArray::SetData(std::shared_ptr<ArrayData> data) {
- this->UnionArray::SetData(std::move(data));
- ARROW_CHECK_EQ(data_->type->id(), Type::SPARSE_UNION);
- ARROW_CHECK_EQ(data_->buffers.size(), 2);
-
- // No validity bitmap
- ARROW_CHECK_EQ(data_->buffers[0], nullptr);
-}
-
-void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
- this->UnionArray::SetData(std::move(data));
-
- ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION);
- ARROW_CHECK_EQ(data_->buffers.size(), 3);
-
- // No validity bitmap
- ARROW_CHECK_EQ(data_->buffers[0], nullptr);
-
- raw_value_offsets_ = data->GetValuesSafe<int32_t>(2, /*offset=*/0);
-}
-
-SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
- SetData(std::move(data));
-}
-
-SparseUnionArray::SparseUnionArray(std::shared_ptr<DataType> type, int64_t length,
- ArrayVector children,
- std::shared_ptr<Buffer> type_codes, int64_t offset) {
- auto internal_data = ArrayData::Make(std::move(type), length,
- BufferVector{nullptr, std::move(type_codes)},
- /*null_count=*/0, offset);
- for (const auto& child : children) {
- internal_data->child_data.push_back(child->data());
- }
- SetData(std::move(internal_data));
-}
-
-DenseUnionArray::DenseUnionArray(const std::shared_ptr<ArrayData>& data) {
- SetData(data);
-}
-
-DenseUnionArray::DenseUnionArray(std::shared_ptr<DataType> type, int64_t length,
- ArrayVector children, std::shared_ptr<Buffer> type_ids,
- std::shared_ptr<Buffer> value_offsets, int64_t offset) {
- auto internal_data = ArrayData::Make(
- std::move(type), length,
- BufferVector{nullptr, std::move(type_ids), std::move(value_offsets)},
- /*null_count=*/0, offset);
- for (const auto& child : children) {
- internal_data->child_data.push_back(child->data());
- }
- SetData(internal_data);
-}
-
-Result<std::shared_ptr<Array>> DenseUnionArray::Make(
- const Array& type_ids, const Array& value_offsets, ArrayVector children,
- std::vector<std::string> field_names, std::vector<type_code_t> type_codes) {
- if (value_offsets.length() == 0) {
- return Status::Invalid("UnionArray offsets must have non-zero length");
- }
-
- if (value_offsets.type_id() != Type::INT32) {
- return Status::TypeError("UnionArray offsets must be signed int32");
- }
-
- if (type_ids.type_id() != Type::INT8) {
- return Status::TypeError("UnionArray type_ids must be signed int8");
- }
-
- if (type_ids.null_count() != 0) {
- return Status::Invalid("Union type ids may not have nulls");
- }
-
- if (value_offsets.null_count() != 0) {
- return Status::Invalid("Make does not allow nulls in value_offsets");
- }
-
- if (field_names.size() > 0 && field_names.size() != children.size()) {
- return Status::Invalid("field_names must have the same length as children");
- }
-
- if (type_codes.size() > 0 && type_codes.size() != children.size()) {
- return Status::Invalid("type_codes must have the same length as children");
- }
-
- BufferVector buffers = {nullptr, checked_cast<const Int8Array&>(type_ids).values(),
- checked_cast<const Int32Array&>(value_offsets).values()};
-
- auto union_type = dense_union(children, std::move(field_names), std::move(type_codes));
- auto internal_data =
- ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
- /*null_count=*/0, type_ids.offset());
- for (const auto& child : children) {
- internal_data->child_data.push_back(child->data());
- }
- return std::make_shared<DenseUnionArray>(std::move(internal_data));
-}
-
-Result<std::shared_ptr<Array>> SparseUnionArray::Make(
- const Array& type_ids, ArrayVector children, std::vector<std::string> field_names,
- std::vector<int8_t> type_codes) {
- if (type_ids.type_id() != Type::INT8) {
- return Status::TypeError("UnionArray type_ids must be signed int8");
- }
-
- if (type_ids.null_count() != 0) {
- return Status::Invalid("Union type ids may not have nulls");
- }
-
- if (field_names.size() > 0 && field_names.size() != children.size()) {
- return Status::Invalid("field_names must have the same length as children");
- }
-
- if (type_codes.size() > 0 && type_codes.size() != children.size()) {
- return Status::Invalid("type_codes must have the same length as children");
- }
-
- BufferVector buffers = {nullptr, checked_cast<const Int8Array&>(type_ids).values()};
- auto union_type = sparse_union(children, std::move(field_names), std::move(type_codes));
- auto internal_data =
- ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
- /*null_count=*/0, type_ids.offset());
- for (const auto& child : children) {
- internal_data->child_data.push_back(child->data());
- if (child->length() != type_ids.length()) {
- return Status::Invalid(
- "Sparse UnionArray must have len(child) == len(type_ids) for all children");
- }
- }
- return std::make_shared<SparseUnionArray>(std::move(internal_data));
-}
-
-std::shared_ptr<Array> UnionArray::child(int i) const { return field(i); }
-
-std::shared_ptr<Array> UnionArray::field(int i) const {
- if (i < 0 ||
- static_cast<decltype(boxed_fields_)::size_type>(i) >= boxed_fields_.size()) {
- return nullptr;
- }
- std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
- if (!result) {
- std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
- if (mode() == UnionMode::SPARSE) {
- // Sparse union: need to adjust child if union is sliced
- // (for dense unions, the need to lookup through the offsets
- // makes this unnecessary)
- if (data_->offset != 0 || child_data->length > data_->length) {
- child_data = child_data->Slice(data_->offset, data_->length);
- }
- }
- result = MakeArray(child_data);
- internal::atomic_store(&boxed_fields_[i], result);
- }
- return result;
-}
-
-} // namespace arrow
+ ARROW_ASSIGN_OR_RAISE(
+ auto clean_valid_bits,
+ offsets.null_bitmap()->CopySlice(0, BitUtil::BytesForBits(num_offsets - 1)));
+ *validity_buf_out = clean_valid_bits;
+
+ const offset_type* raw_offsets = typed_offsets.raw_values();
+ auto clean_raw_offsets =
+ reinterpret_cast<offset_type*>(clean_offsets->mutable_data());
+
+ // Must work backwards so we can tell how many values were in the last non-null value
+ offset_type current_offset = raw_offsets[num_offsets - 1];
+ for (int64_t i = num_offsets - 1; i >= 0; --i) {
+ if (offsets.IsValid(i)) {
+ current_offset = raw_offsets[i];
+ }
+ clean_raw_offsets[i] = current_offset;
+ }
+
+ *offset_buf_out = std::move(clean_offsets);
+ } else {
+ *validity_buf_out = offsets.null_bitmap();
+ *offset_buf_out = typed_offsets.values();
+ }
+
+ return Status::OK();
+}
+
+template <typename TYPE>
+Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>> ListArrayFromArrays(
+ const Array& offsets, const Array& values, MemoryPool* pool) {
+ using offset_type = typename TYPE::offset_type;
+ using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+ using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+ if (offsets.length() == 0) {
+ return Status::Invalid("List offsets must have non-zero length");
+ }
+
+ if (offsets.type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("List offsets must be ", OffsetArrowType::type_name());
+ }
+
+ std::shared_ptr<Buffer> offset_buf, validity_buf;
+ RETURN_NOT_OK(CleanListOffsets<TYPE>(offsets, pool, &offset_buf, &validity_buf));
+ BufferVector buffers = {validity_buf, offset_buf};
+
+ auto list_type = std::make_shared<TYPE>(values.type());
+ auto internal_data =
+ ArrayData::Make(list_type, offsets.length() - 1, std::move(buffers),
+ offsets.null_count(), offsets.offset());
+ internal_data->child_data.push_back(values.data());
+
+ return std::make_shared<ArrayType>(internal_data);
+}
+
+static std::shared_ptr<Array> SliceArrayWithOffsets(const Array& array, int64_t begin,
+ int64_t end) {
+ return array.Slice(begin, end - begin);
+}
+
+template <typename ListArrayT>
+Result<std::shared_ptr<Array>> FlattenListArray(const ListArrayT& list_array,
+ MemoryPool* memory_pool) {
+ const int64_t list_array_length = list_array.length();
+ std::shared_ptr<arrow::Array> value_array = list_array.values();
+
+ // Shortcut: if a ListArray does not contain nulls, then simply slice its
+ // value array with the first and the last offsets.
+ if (list_array.null_count() == 0) {
+ return SliceArrayWithOffsets(*value_array, list_array.value_offset(0),
+ list_array.value_offset(list_array_length));
+ }
+
+ // The ListArray contains nulls: there may be a non-empty sub-list behind
+ // a null and it must not be contained in the result.
+ std::vector<std::shared_ptr<Array>> non_null_fragments;
+ int64_t valid_begin = 0;
+ while (valid_begin < list_array_length) {
+ int64_t valid_end = valid_begin;
+ while (valid_end < list_array_length &&
+ (list_array.IsValid(valid_end) || list_array.value_length(valid_end) == 0)) {
+ ++valid_end;
+ }
+ if (valid_begin < valid_end) {
+ non_null_fragments.push_back(
+ SliceArrayWithOffsets(*value_array, list_array.value_offset(valid_begin),
+ list_array.value_offset(valid_end)));
+ }
+ valid_begin = valid_end + 1; // skip null entry
+ }
+
+ // Final attempt to avoid invoking Concatenate().
+ if (non_null_fragments.size() == 1) {
+ return non_null_fragments[0];
+ }
+
+ return Concatenate(non_null_fragments, memory_pool);
+}
+
+} // namespace
+
+namespace internal {
+
+template <typename TYPE>
+inline void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
+ Type::type expected_type_id) {
+ ARROW_CHECK_EQ(data->buffers.size(), 2);
+ ARROW_CHECK_EQ(data->type->id(), expected_type_id);
+ ARROW_CHECK_EQ(data->child_data.size(), 1);
+
+ self->Array::SetData(data);
+
+ self->list_type_ = checked_cast<const TYPE*>(data->type.get());
+ self->raw_value_offsets_ =
+ data->GetValuesSafe<typename TYPE::offset_type>(1, /*offset=*/0);
+
+ ARROW_CHECK_EQ(self->list_type_->value_type()->id(), data->child_data[0]->type->id());
+ DCHECK(self->list_type_->value_type()->Equals(data->child_data[0]->type));
+ self->values_ = MakeArray(self->data_->child_data[0]);
+}
+
+} // namespace internal
+
+ListArray::ListArray(std::shared_ptr<ArrayData> data) { SetData(std::move(data)); }
+
+LargeListArray::LargeListArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+ListArray::ListArray(std::shared_ptr<DataType> type, int64_t length,
+ std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
+ std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
+ int64_t offset) {
+ ARROW_CHECK_EQ(type->id(), Type::LIST);
+ auto internal_data = ArrayData::Make(
+ std::move(type), length,
+ BufferVector{std::move(null_bitmap), std::move(value_offsets)}, null_count, offset);
+ internal_data->child_data.emplace_back(values->data());
+ SetData(std::move(internal_data));
+}
+
+void ListArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ internal::SetListData(this, data);
+}
+
+LargeListArray::LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset) {
+ ARROW_CHECK_EQ(type->id(), Type::LARGE_LIST);
+ auto internal_data =
+ ArrayData::Make(type, length, {null_bitmap, value_offsets}, null_count, offset);
+ internal_data->child_data.emplace_back(values->data());
+ SetData(internal_data);
+}
+
+void LargeListArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ internal::SetListData(this, data);
+}
+
+Result<std::shared_ptr<ListArray>> ListArray::FromArrays(const Array& offsets,
+ const Array& values,
+ MemoryPool* pool) {
+ return ListArrayFromArrays<ListType>(offsets, values, pool);
+}
+
+Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(const Array& offsets,
+ const Array& values,
+ MemoryPool* pool) {
+ return ListArrayFromArrays<LargeListType>(offsets, values, pool);
+}
+
+Result<std::shared_ptr<Array>> ListArray::Flatten(MemoryPool* memory_pool) const {
+ return FlattenListArray(*this, memory_pool);
+}
+
+Result<std::shared_ptr<Array>> LargeListArray::Flatten(MemoryPool* memory_pool) const {
+ return FlattenListArray(*this, memory_pool);
+}
+
+static std::shared_ptr<Array> BoxOffsets(const std::shared_ptr<DataType>& boxed_type,
+ const ArrayData& data) {
+ std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, data.buffers[1]};
+ auto offsets_data =
+ std::make_shared<ArrayData>(boxed_type, data.length + 1, std::move(buffers),
+ /*null_count=*/0, data.offset);
+ return MakeArray(offsets_data);
+}
+
+std::shared_ptr<Array> ListArray::offsets() const { return BoxOffsets(int32(), *data_); }
+
+std::shared_ptr<Array> LargeListArray::offsets() const {
+ return BoxOffsets(int64(), *data_);
+}
+
+// ----------------------------------------------------------------------
+// MapArray
+
+MapArray::MapArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& offsets,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
+ int64_t offset) {
+ SetData(ArrayData::Make(type, length, {null_bitmap, offsets}, {values->data()},
+ null_count, offset));
+}
+
+MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& offsets,
+ const std::shared_ptr<Array>& keys,
+ const std::shared_ptr<Array>& items,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
+ int64_t offset) {
+ auto pair_data = ArrayData::Make(type->fields()[0]->type(), keys->data()->length,
+ {nullptr}, {keys->data(), items->data()}, 0, offset);
+ auto map_data = ArrayData::Make(type, length, {null_bitmap, offsets}, {pair_data},
+ null_count, offset);
+ SetData(map_data);
+}
+
+Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
+ std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+ const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+ MemoryPool* pool) {
+ using offset_type = typename MapType::offset_type;
+ using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+ if (offsets->length() == 0) {
+ return Status::Invalid("Map offsets must have non-zero length");
+ }
+
+ if (offsets->type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("Map offsets must be ", OffsetArrowType::type_name());
+ }
+
+ if (keys->null_count() != 0) {
+ return Status::Invalid("Map can not contain NULL valued keys");
+ }
+
+ if (keys->length() != items->length()) {
+ return Status::Invalid("Map key and item arrays must be equal length");
+ }
+
+ std::shared_ptr<Buffer> offset_buf, validity_buf;
+ RETURN_NOT_OK(CleanListOffsets<MapType>(*offsets, pool, &offset_buf, &validity_buf));
+
+ return std::make_shared<MapArray>(type, offsets->length() - 1, offset_buf, keys, items,
+ validity_buf, offsets->null_count(),
+ offsets->offset());
+}
+
+Result<std::shared_ptr<Array>> MapArray::FromArrays(const std::shared_ptr<Array>& offsets,
+ const std::shared_ptr<Array>& keys,
+ const std::shared_ptr<Array>& items,
+ MemoryPool* pool) {
+ return FromArraysInternal(std::make_shared<MapType>(keys->type(), items->type()),
+ offsets, keys, items, pool);
+}
+
+Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> type,
+ const std::shared_ptr<Array>& offsets,
+ const std::shared_ptr<Array>& keys,
+ const std::shared_ptr<Array>& items,
+ MemoryPool* pool) {
+ if (type->id() != Type::MAP) {
+ return Status::TypeError("Expected map type, got ", type->ToString());
+ }
+ const auto& map_type = checked_cast<const MapType&>(*type);
+ if (!map_type.key_type()->Equals(keys->type())) {
+ return Status::TypeError("Mismatching map keys type");
+ }
+ if (!map_type.item_type()->Equals(items->type())) {
+ return Status::TypeError("Mismatching map items type");
+ }
+ return FromArraysInternal(std::move(type), offsets, keys, items, pool);
+}
+
+Status MapArray::ValidateChildData(
+ const std::vector<std::shared_ptr<ArrayData>>& child_data) {
+ if (child_data.size() != 1) {
+ return Status::Invalid("Expected one child array for map array");
+ }
+ const auto& pair_data = child_data[0];
+ if (pair_data->type->id() != Type::STRUCT) {
+ return Status::Invalid("Map array child array should have struct type");
+ }
+ if (pair_data->null_count != 0) {
+ return Status::Invalid("Map array child array should have no nulls");
+ }
+ if (pair_data->child_data.size() != 2) {
+ return Status::Invalid("Map array child array should have two fields");
+ }
+ if (pair_data->child_data[0]->null_count != 0) {
+ return Status::Invalid("Map array keys array should have no nulls");
+ }
+ return Status::OK();
+}
+
+void MapArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_OK(ValidateChildData(data->child_data));
+
+ internal::SetListData(this, data, Type::MAP);
+ map_type_ = checked_cast<const MapType*>(data->type.get());
+ const auto& pair_data = data->child_data[0];
+ keys_ = MakeArray(pair_data->child_data[0]);
+ items_ = MakeArray(pair_data->child_data[1]);
+}
+
+// ----------------------------------------------------------------------
+// FixedSizeListArray
+
+FixedSizeListArray::FixedSizeListArray(const std::shared_ptr<ArrayData>& data) {
+ SetData(data);
+}
+
+FixedSizeListArray::FixedSizeListArray(const std::shared_ptr<DataType>& type,
+ int64_t length,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset) {
+ auto internal_data = ArrayData::Make(type, length, {null_bitmap}, null_count, offset);
+ internal_data->child_data.emplace_back(values->data());
+ SetData(internal_data);
+}
+
+void FixedSizeListArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::FIXED_SIZE_LIST);
+ this->Array::SetData(data);
+
+ ARROW_CHECK_EQ(list_type()->value_type()->id(), data->child_data[0]->type->id());
+ DCHECK(list_type()->value_type()->Equals(data->child_data[0]->type));
+ list_size_ = list_type()->list_size();
+
+ ARROW_CHECK_EQ(data_->child_data.size(), 1);
+ values_ = MakeArray(data_->child_data[0]);
+}
+
+const FixedSizeListType* FixedSizeListArray::list_type() const {
+ return checked_cast<const FixedSizeListType*>(data_->type.get());
+}
+
+std::shared_ptr<DataType> FixedSizeListArray::value_type() const {
+ return list_type()->value_type();
+}
+
+std::shared_ptr<Array> FixedSizeListArray::values() const { return values_; }
+
+Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
+ const std::shared_ptr<Array>& values, int32_t list_size) {
+ if (list_size <= 0) {
+ return Status::Invalid("list_size needs to be a strict positive integer");
+ }
+
+ if ((values->length() % list_size) != 0) {
+ return Status::Invalid(
+ "The length of the values Array needs to be a multiple of the list_size");
+ }
+ int64_t length = values->length() / list_size;
+ auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
+ std::shared_ptr<Buffer> validity_buf;
+
+ return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
+ /*null_count=*/0, /*offset=*/0);
+}
+
+// ----------------------------------------------------------------------
+// Struct
+
+StructArray::StructArray(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::STRUCT);
+ SetData(data);
+ boxed_fields_.resize(data->child_data.size());
+}
+
+StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::vector<std::shared_ptr<Array>>& children,
+ std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
+ int64_t offset) {
+ ARROW_CHECK_EQ(type->id(), Type::STRUCT);
+ SetData(ArrayData::Make(type, length, {null_bitmap}, null_count, offset));
+ for (const auto& child : children) {
+ data_->child_data.push_back(child->data());
+ }
+ boxed_fields_.resize(children.size());
+}
+
+Result<std::shared_ptr<StructArray>> StructArray::Make(
+ const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::shared_ptr<Field>>& fields,
+ std::shared_ptr<Buffer> null_bitmap, int64_t null_count, int64_t offset) {
+ if (children.size() != fields.size()) {
+ return Status::Invalid("Mismatching number of fields and child arrays");
+ }
+ int64_t length = 0;
+ if (children.size() == 0) {
+ return Status::Invalid("Can't infer struct array length with 0 child arrays");
+ }
+ length = children.front()->length();
+ for (const auto& child : children) {
+ if (length != child->length()) {
+ return Status::Invalid("Mismatching child array lengths");
+ }
+ }
+ if (offset > length) {
+ return Status::IndexError("Offset greater than length of child arrays");
+ }
+ if (null_bitmap == nullptr) {
+ if (null_count > 0) {
+ return Status::Invalid("null_count = ", null_count, " but no null bitmap given");
+ }
+ null_count = 0;
+ }
+ return std::make_shared<StructArray>(struct_(fields), length - offset, children,
+ null_bitmap, null_count, offset);
+}
+
+Result<std::shared_ptr<StructArray>> StructArray::Make(
+ const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::string>& field_names, std::shared_ptr<Buffer> null_bitmap,
+ int64_t null_count, int64_t offset) {
+ if (children.size() != field_names.size()) {
+ return Status::Invalid("Mismatching number of field names and child arrays");
+ }
+ std::vector<std::shared_ptr<Field>> fields(children.size());
+ for (size_t i = 0; i < children.size(); ++i) {
+ fields[i] = ::arrow::field(field_names[i], children[i]->type());
+ }
+ return Make(children, fields, std::move(null_bitmap), null_count, offset);
+}
+
+const StructType* StructArray::struct_type() const {
+ return checked_cast<const StructType*>(data_->type.get());
+}
+
+const ArrayVector& StructArray::fields() const {
+ for (int i = 0; i < num_fields(); ++i) {
+ (void)field(i);
+ }
+ return boxed_fields_;
+}
+
+std::shared_ptr<Array> StructArray::field(int i) const {
+ std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
+ if (!result) {
+ std::shared_ptr<ArrayData> field_data;
+ if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
+ field_data = data_->child_data[i]->Slice(data_->offset, data_->length);
+ } else {
+ field_data = data_->child_data[i];
+ }
+ result = MakeArray(field_data);
+ internal::atomic_store(&boxed_fields_[i], result);
+ }
+ return result;
+}
+
+std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
+ int i = struct_type()->GetFieldIndex(name);
+ return i == -1 ? nullptr : field(i);
+}
+
+Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
+ ArrayVector flattened;
+ flattened.reserve(data_->child_data.size());
+ std::shared_ptr<Buffer> null_bitmap = data_->buffers[0];
+
+ for (const auto& child_data_ptr : data_->child_data) {
+ auto child_data = child_data_ptr->Copy();
+
+ std::shared_ptr<Buffer> flattened_null_bitmap;
+ int64_t flattened_null_count = kUnknownNullCount;
+
+ // Need to adjust for parent offset
+ if (data_->offset != 0 || data_->length != child_data->length) {
+ child_data = child_data->Slice(data_->offset, data_->length);
+ }
+ std::shared_ptr<Buffer> child_null_bitmap = child_data->buffers[0];
+ const int64_t child_offset = child_data->offset;
+
+ // The validity of a flattened datum is the logical AND of the struct
+ // element's validity and the individual field element's validity.
+ if (null_bitmap && child_null_bitmap) {
+ ARROW_ASSIGN_OR_RAISE(
+ flattened_null_bitmap,
+ BitmapAnd(pool, child_null_bitmap->data(), child_offset, null_bitmap_data_,
+ data_->offset, data_->length, child_offset));
+ } else if (child_null_bitmap) {
+ flattened_null_bitmap = child_null_bitmap;
+ flattened_null_count = child_data->null_count;
+ } else if (null_bitmap) {
+ if (child_offset == data_->offset) {
+ flattened_null_bitmap = null_bitmap;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ flattened_null_bitmap,
+ CopyBitmap(pool, null_bitmap_data_, data_->offset, data_->length));
+ }
+ flattened_null_count = data_->null_count;
+ } else {
+ flattened_null_count = 0;
+ }
+
+ auto flattened_data = child_data->Copy();
+ flattened_data->buffers[0] = flattened_null_bitmap;
+ flattened_data->null_count = flattened_null_count;
+
+ flattened.push_back(MakeArray(flattened_data));
+ }
+
+ return flattened;
+}
+
+// ----------------------------------------------------------------------
+// UnionArray
+
+void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
+ this->Array::SetData(std::move(data));
+
+ union_type_ = checked_cast<const UnionType*>(data_->type.get());
+
+ ARROW_CHECK_GE(data_->buffers.size(), 2);
+ raw_type_codes_ = data->GetValuesSafe<int8_t>(1, /*offset=*/0);
+ boxed_fields_.resize(data_->child_data.size());
+}
+
+void SparseUnionArray::SetData(std::shared_ptr<ArrayData> data) {
+ this->UnionArray::SetData(std::move(data));
+ ARROW_CHECK_EQ(data_->type->id(), Type::SPARSE_UNION);
+ ARROW_CHECK_EQ(data_->buffers.size(), 2);
+
+ // No validity bitmap
+ ARROW_CHECK_EQ(data_->buffers[0], nullptr);
+}
+
+void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ this->UnionArray::SetData(std::move(data));
+
+ ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION);
+ ARROW_CHECK_EQ(data_->buffers.size(), 3);
+
+ // No validity bitmap
+ ARROW_CHECK_EQ(data_->buffers[0], nullptr);
+
+ raw_value_offsets_ = data->GetValuesSafe<int32_t>(2, /*offset=*/0);
+}
+
+SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
+ SetData(std::move(data));
+}
+
+SparseUnionArray::SparseUnionArray(std::shared_ptr<DataType> type, int64_t length,
+ ArrayVector children,
+ std::shared_ptr<Buffer> type_codes, int64_t offset) {
+ auto internal_data = ArrayData::Make(std::move(type), length,
+ BufferVector{nullptr, std::move(type_codes)},
+ /*null_count=*/0, offset);
+ for (const auto& child : children) {
+ internal_data->child_data.push_back(child->data());
+ }
+ SetData(std::move(internal_data));
+}
+
+DenseUnionArray::DenseUnionArray(const std::shared_ptr<ArrayData>& data) {
+ SetData(data);
+}
+
+DenseUnionArray::DenseUnionArray(std::shared_ptr<DataType> type, int64_t length,
+ ArrayVector children, std::shared_ptr<Buffer> type_ids,
+ std::shared_ptr<Buffer> value_offsets, int64_t offset) {
+ auto internal_data = ArrayData::Make(
+ std::move(type), length,
+ BufferVector{nullptr, std::move(type_ids), std::move(value_offsets)},
+ /*null_count=*/0, offset);
+ for (const auto& child : children) {
+ internal_data->child_data.push_back(child->data());
+ }
+ SetData(internal_data);
+}
+
+Result<std::shared_ptr<Array>> DenseUnionArray::Make(
+ const Array& type_ids, const Array& value_offsets, ArrayVector children,
+ std::vector<std::string> field_names, std::vector<type_code_t> type_codes) {
+ if (value_offsets.length() == 0) {
+ return Status::Invalid("UnionArray offsets must have non-zero length");
+ }
+
+ if (value_offsets.type_id() != Type::INT32) {
+ return Status::TypeError("UnionArray offsets must be signed int32");
+ }
+
+ if (type_ids.type_id() != Type::INT8) {
+ return Status::TypeError("UnionArray type_ids must be signed int8");
+ }
+
+ if (type_ids.null_count() != 0) {
+ return Status::Invalid("Union type ids may not have nulls");
+ }
+
+ if (value_offsets.null_count() != 0) {
+ return Status::Invalid("Make does not allow nulls in value_offsets");
+ }
+
+ if (field_names.size() > 0 && field_names.size() != children.size()) {
+ return Status::Invalid("field_names must have the same length as children");
+ }
+
+ if (type_codes.size() > 0 && type_codes.size() != children.size()) {
+ return Status::Invalid("type_codes must have the same length as children");
+ }
+
+ BufferVector buffers = {nullptr, checked_cast<const Int8Array&>(type_ids).values(),
+ checked_cast<const Int32Array&>(value_offsets).values()};
+
+ auto union_type = dense_union(children, std::move(field_names), std::move(type_codes));
+ auto internal_data =
+ ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
+ /*null_count=*/0, type_ids.offset());
+ for (const auto& child : children) {
+ internal_data->child_data.push_back(child->data());
+ }
+ return std::make_shared<DenseUnionArray>(std::move(internal_data));
+}
+
+Result<std::shared_ptr<Array>> SparseUnionArray::Make(
+ const Array& type_ids, ArrayVector children, std::vector<std::string> field_names,
+ std::vector<int8_t> type_codes) {
+ if (type_ids.type_id() != Type::INT8) {
+ return Status::TypeError("UnionArray type_ids must be signed int8");
+ }
+
+ if (type_ids.null_count() != 0) {
+ return Status::Invalid("Union type ids may not have nulls");
+ }
+
+ if (field_names.size() > 0 && field_names.size() != children.size()) {
+ return Status::Invalid("field_names must have the same length as children");
+ }
+
+ if (type_codes.size() > 0 && type_codes.size() != children.size()) {
+ return Status::Invalid("type_codes must have the same length as children");
+ }
+
+ BufferVector buffers = {nullptr, checked_cast<const Int8Array&>(type_ids).values()};
+ auto union_type = sparse_union(children, std::move(field_names), std::move(type_codes));
+ auto internal_data =
+ ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
+ /*null_count=*/0, type_ids.offset());
+ for (const auto& child : children) {
+ internal_data->child_data.push_back(child->data());
+ if (child->length() != type_ids.length()) {
+ return Status::Invalid(
+ "Sparse UnionArray must have len(child) == len(type_ids) for all children");
+ }
+ }
+ return std::make_shared<SparseUnionArray>(std::move(internal_data));
+}
+
+std::shared_ptr<Array> UnionArray::child(int i) const { return field(i); }
+
+std::shared_ptr<Array> UnionArray::field(int i) const {
+ if (i < 0 ||
+ static_cast<decltype(boxed_fields_)::size_type>(i) >= boxed_fields_.size()) {
+ return nullptr;
+ }
+ std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
+ if (!result) {
+ std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
+ if (mode() == UnionMode::SPARSE) {
+ // Sparse union: need to adjust child if union is sliced
+ // (for dense unions, the need to lookup through the offsets
+ // makes this unnecessary)
+ if (data_->offset != 0 || child_data->length > data_->length) {
+ child_data = child_data->Slice(data_->offset, data_->length);
+ }
+ }
+ result = MakeArray(child_data);
+ internal::atomic_store(&boxed_fields_[i], result);
+ }
+ return result;
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.h
index d39f33f4702..25a85134a57 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.h
@@ -1,523 +1,523 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Array accessor classes for List, LargeList, FixedSizeList, Map, Struct, and
-// Union
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// ListArray
-
-template <typename TYPE>
-class BaseListArray;
-
-namespace internal {
-
-// Private helper for ListArray::SetData.
-// Unfortunately, trying to define BaseListArray::SetData outside of this header
-// doesn't play well with MSVC.
-template <typename TYPE>
-void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
- Type::type expected_type_id = TYPE::type_id);
-
-} // namespace internal
-
-/// Base class for variable-sized list arrays, regardless of offset size.
-template <typename TYPE>
-class BaseListArray : public Array {
- public:
- using TypeClass = TYPE;
- using offset_type = typename TypeClass::offset_type;
-
- const TypeClass* list_type() const { return list_type_; }
-
- /// \brief Return array object containing the list's values
- std::shared_ptr<Array> values() const { return values_; }
-
- /// Note that this buffer does not account for any slice offset
- std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
-
- std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
-
- /// Return pointer to raw value offsets accounting for any slice offset
- const offset_type* raw_value_offsets() const {
- return raw_value_offsets_ + data_->offset;
- }
-
- // The following functions will not perform boundschecking
- offset_type value_offset(int64_t i) const {
- return raw_value_offsets_[i + data_->offset];
- }
- offset_type value_length(int64_t i) const {
- i += data_->offset;
- return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
- }
- std::shared_ptr<Array> value_slice(int64_t i) const {
- return values_->Slice(value_offset(i), value_length(i));
- }
-
- protected:
- friend void internal::SetListData<TYPE>(BaseListArray<TYPE>* self,
- const std::shared_ptr<ArrayData>& data,
- Type::type expected_type_id);
-
- const TypeClass* list_type_ = NULLPTR;
- std::shared_ptr<Array> values_;
- const offset_type* raw_value_offsets_ = NULLPTR;
-};
-
-/// Concrete Array class for list data
-class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
- public:
- explicit ListArray(std::shared_ptr<ArrayData> data);
-
- ListArray(std::shared_ptr<DataType> type, int64_t length,
- std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Construct ListArray from array of offsets and child value array
- ///
- /// This function does the bare minimum of validation of the offsets and
- /// input types, and will allocate a new offsets array if necessary (i.e. if
- /// the offsets contain any nulls). If the offsets do not have nulls, they
- /// are assumed to be well-formed
- ///
- /// \param[in] offsets Array containing n + 1 offsets encoding length and
- /// size. Must be of int32 type
- /// \param[in] values Array containing list values
- /// \param[in] pool MemoryPool in case new offsets array needs to be
- /// allocated because of null values
- static Result<std::shared_ptr<ListArray>> FromArrays(
- const Array& offsets, const Array& values,
- MemoryPool* pool = default_memory_pool());
-
- /// \brief Return an Array that is a concatenation of the lists in this array.
- ///
- /// Note that it's different from `values()` in that it takes into
- /// consideration of this array's offsets as well as null elements backed
- /// by non-empty lists (they are skipped, thus copying may be needed).
- Result<std::shared_ptr<Array>> Flatten(
- MemoryPool* memory_pool = default_memory_pool()) const;
-
- /// \brief Return list offsets as an Int32Array
- std::shared_ptr<Array> offsets() const;
-
- protected:
- // This constructor defers SetData to a derived array class
- ListArray() = default;
-
- void SetData(const std::shared_ptr<ArrayData>& data);
-};
-
-/// Concrete Array class for large list data (with 64-bit offsets)
-class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
- public:
- explicit LargeListArray(const std::shared_ptr<ArrayData>& data);
-
- LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Construct LargeListArray from array of offsets and child value array
- ///
- /// This function does the bare minimum of validation of the offsets and
- /// input types, and will allocate a new offsets array if necessary (i.e. if
- /// the offsets contain any nulls). If the offsets do not have nulls, they
- /// are assumed to be well-formed
- ///
- /// \param[in] offsets Array containing n + 1 offsets encoding length and
- /// size. Must be of int64 type
- /// \param[in] values Array containing list values
- /// \param[in] pool MemoryPool in case new offsets array needs to be
- /// allocated because of null values
- static Result<std::shared_ptr<LargeListArray>> FromArrays(
- const Array& offsets, const Array& values,
- MemoryPool* pool = default_memory_pool());
-
- /// \brief Return an Array that is a concatenation of the lists in this array.
- ///
- /// Note that it's different from `values()` in that it takes into
- /// consideration of this array's offsets as well as null elements backed
- /// by non-empty lists (they are skipped, thus copying may be needed).
- Result<std::shared_ptr<Array>> Flatten(
- MemoryPool* memory_pool = default_memory_pool()) const;
-
- /// \brief Return list offsets as an Int64Array
- std::shared_ptr<Array> offsets() const;
-
- protected:
- void SetData(const std::shared_ptr<ArrayData>& data);
-};
-
-// ----------------------------------------------------------------------
-// MapArray
-
-/// Concrete Array class for map data
-///
-/// NB: "value" in this context refers to a pair of a key and the corresponding item
-class ARROW_EXPORT MapArray : public ListArray {
- public:
- using TypeClass = MapType;
-
- explicit MapArray(const std::shared_ptr<ArrayData>& data);
-
- MapArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- MapArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& value_offsets,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Construct MapArray from array of offsets and child key, item arrays
- ///
- /// This function does the bare minimum of validation of the offsets and
- /// input types, and will allocate a new offsets array if necessary (i.e. if
- /// the offsets contain any nulls). If the offsets do not have nulls, they
- /// are assumed to be well-formed
- ///
- /// \param[in] offsets Array containing n + 1 offsets encoding length and
- /// size. Must be of int32 type
- /// \param[in] keys Array containing key values
- /// \param[in] items Array containing item values
- /// \param[in] pool MemoryPool in case new offsets array needs to be
- /// allocated because of null values
- static Result<std::shared_ptr<Array>> FromArrays(
- const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
- const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
-
- static Result<std::shared_ptr<Array>> FromArrays(
- std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool = default_memory_pool());
-
- const MapType* map_type() const { return map_type_; }
-
- /// \brief Return array object containing all map keys
- std::shared_ptr<Array> keys() const { return keys_; }
-
- /// \brief Return array object containing all mapped items
- std::shared_ptr<Array> items() const { return items_; }
-
- /// Validate child data before constructing the actual MapArray.
- static Status ValidateChildData(
- const std::vector<std::shared_ptr<ArrayData>>& child_data);
-
- protected:
- void SetData(const std::shared_ptr<ArrayData>& data);
-
- static Result<std::shared_ptr<Array>> FromArraysInternal(
- std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
- const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
- MemoryPool* pool);
-
- private:
- const MapType* map_type_;
- std::shared_ptr<Array> keys_, items_;
-};
-
-// ----------------------------------------------------------------------
-// FixedSizeListArray
-
-/// Concrete Array class for fixed size list data
-class ARROW_EXPORT FixedSizeListArray : public Array {
- public:
- using TypeClass = FixedSizeListType;
- using offset_type = TypeClass::offset_type;
-
- explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);
-
- FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Array>& values,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- const FixedSizeListType* list_type() const;
-
- /// \brief Return array object containing the list's values
- std::shared_ptr<Array> values() const;
-
- std::shared_ptr<DataType> value_type() const;
-
- // The following functions will not perform boundschecking
- int32_t value_offset(int64_t i) const {
- i += data_->offset;
- return static_cast<int32_t>(list_size_ * i);
- }
- int32_t value_length(int64_t i = 0) const {
- ARROW_UNUSED(i);
- return list_size_;
- }
- std::shared_ptr<Array> value_slice(int64_t i) const {
- return values_->Slice(value_offset(i), value_length(i));
- }
-
- /// \brief Construct FixedSizeListArray from child value array and value_length
- ///
- /// \param[in] values Array containing list values
- /// \param[in] list_size The fixed length of each list
- /// \return Will have length equal to values.length() / list_size
- static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
- int32_t list_size);
-
- protected:
- void SetData(const std::shared_ptr<ArrayData>& data);
- int32_t list_size_;
-
- private:
- std::shared_ptr<Array> values_;
-};
-
-// ----------------------------------------------------------------------
-// Struct
-
-/// Concrete Array class for struct data
-class ARROW_EXPORT StructArray : public Array {
- public:
- using TypeClass = StructType;
-
- explicit StructArray(const std::shared_ptr<ArrayData>& data);
-
- StructArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::vector<std::shared_ptr<Array>>& children,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Return a StructArray from child arrays and field names.
- ///
- /// The length and data type are automatically inferred from the arguments.
- /// There should be at least one child array.
- static Result<std::shared_ptr<StructArray>> Make(
- const ArrayVector& children, const std::vector<std::string>& field_names,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- /// \brief Return a StructArray from child arrays and fields.
- ///
- /// The length is automatically inferred from the arguments.
- /// There should be at least one child array. This method does not
- /// check that field types and child array types are consistent.
- static Result<std::shared_ptr<StructArray>> Make(
- const ArrayVector& children, const FieldVector& fields,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- const StructType* struct_type() const;
-
- // Return a shared pointer in case the requestor desires to share ownership
- // with this array. The returned array has its offset, length and null
- // count adjusted.
- std::shared_ptr<Array> field(int pos) const;
-
- const ArrayVector& fields() const;
-
- /// Returns null if name not found
- std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
-
- /// \brief Flatten this array as a vector of arrays, one for each field
- ///
- /// \param[in] pool The pool to allocate null bitmaps from, if necessary
- Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
-
- private:
- // For caching boxed child data
- // XXX This is not handled in a thread-safe manner.
- mutable ArrayVector boxed_fields_;
-};
-
-// ----------------------------------------------------------------------
-// Union
-
-/// Base class for SparseUnionArray and DenseUnionArray
-class ARROW_EXPORT UnionArray : public Array {
- public:
- using type_code_t = int8_t;
-
- /// Note that this buffer does not account for any slice offset
- std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
-
- const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
-
- /// The physical child id containing value at index.
- int child_id(int64_t i) const {
- return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
- }
-
- const UnionType* union_type() const { return union_type_; }
-
- UnionMode::type mode() const { return union_type_->mode(); }
-
- // Return the given field as an individual array.
- // For sparse unions, the returned array has its offset, length and null
- // count adjusted.
- ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
- std::shared_ptr<Array> child(int pos) const;
-
- /// \brief Return the given field as an individual array.
- ///
- /// For sparse unions, the returned array has its offset, length and null
- /// count adjusted.
- std::shared_ptr<Array> field(int pos) const;
-
- protected:
- void SetData(std::shared_ptr<ArrayData> data);
-
- const type_code_t* raw_type_codes_;
- const UnionType* union_type_;
-
- // For caching boxed child data
- mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
-};
-
-/// Concrete Array class for sparse union data
-class ARROW_EXPORT SparseUnionArray : public UnionArray {
- public:
- using TypeClass = SparseUnionType;
-
- explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
-
- SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
- std::shared_ptr<Buffer> type_ids, int64_t offset = 0);
-
- /// \brief Construct SparseUnionArray from type_ids and children
- ///
- /// This function does the bare minimum of validation of the input types.
- ///
- /// \param[in] type_ids An array of logical type ids for the union type
- /// \param[in] children Vector of children Arrays containing the data for each type.
- /// \param[in] type_codes Vector of type codes.
- static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
- std::vector<type_code_t> type_codes) {
- return Make(std::move(type_ids), std::move(children), std::vector<std::string>{},
- std::move(type_codes));
- }
-
- /// \brief Construct SparseUnionArray with custom field names from type_ids and children
- ///
- /// This function does the bare minimum of validation of the input types.
- ///
- /// \param[in] type_ids An array of logical type ids for the union type
- /// \param[in] children Vector of children Arrays containing the data for each type.
- /// \param[in] field_names Vector of strings containing the name of each field.
- /// \param[in] type_codes Vector of type codes.
- static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
- std::vector<std::string> field_names = {},
- std::vector<type_code_t> type_codes = {});
-
- const SparseUnionType* union_type() const {
- return internal::checked_cast<const SparseUnionType*>(union_type_);
- }
-
- protected:
- void SetData(std::shared_ptr<ArrayData> data);
-};
-
-/// \brief Concrete Array class for dense union data
-///
-/// Note that union types do not have a validity bitmap
-class ARROW_EXPORT DenseUnionArray : public UnionArray {
- public:
- using TypeClass = DenseUnionType;
-
- explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);
-
- DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
- std::shared_ptr<Buffer> type_ids,
- std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t offset = 0);
-
- /// \brief Construct DenseUnionArray from type_ids, value_offsets, and children
- ///
- /// This function does the bare minimum of validation of the offsets and
- /// input types.
- ///
- /// \param[in] type_ids An array of logical type ids for the union type
- /// \param[in] value_offsets An array of signed int32 values indicating the
- /// relative offset into the respective child array for the type in a given slot.
- /// The respective offsets for each child value array must be in order / increasing.
- /// \param[in] children Vector of children Arrays containing the data for each type.
- /// \param[in] type_codes Vector of type codes.
- static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
- const Array& value_offsets,
- ArrayVector children,
- std::vector<type_code_t> type_codes) {
- return Make(type_ids, value_offsets, std::move(children), std::vector<std::string>{},
- std::move(type_codes));
- }
-
- /// \brief Construct DenseUnionArray with custom field names from type_ids,
- /// value_offsets, and children
- ///
- /// This function does the bare minimum of validation of the offsets and
- /// input types.
- ///
- /// \param[in] type_ids An array of logical type ids for the union type
- /// \param[in] value_offsets An array of signed int32 values indicating the
- /// relative offset into the respective child array for the type in a given slot.
- /// The respective offsets for each child value array must be in order / increasing.
- /// \param[in] children Vector of children Arrays containing the data for each type.
- /// \param[in] field_names Vector of strings containing the name of each field.
- /// \param[in] type_codes Vector of type codes.
- static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
- const Array& value_offsets,
- ArrayVector children,
- std::vector<std::string> field_names = {},
- std::vector<type_code_t> type_codes = {});
-
- const DenseUnionType* union_type() const {
- return internal::checked_cast<const DenseUnionType*>(union_type_);
- }
-
- /// Note that this buffer does not account for any slice offset
- std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
-
- int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
-
- const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
-
- protected:
- const int32_t* raw_value_offsets_;
-
- void SetData(const std::shared_ptr<ArrayData>& data);
-};
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for List, LargeList, FixedSizeList, Map, Struct, and
+// Union
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// ListArray
+
+template <typename TYPE>
+class BaseListArray;
+
+namespace internal {
+
+// Private helper for ListArray::SetData.
+// Unfortunately, trying to define BaseListArray::SetData outside of this header
+// doesn't play well with MSVC.
+template <typename TYPE>
+void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
+ Type::type expected_type_id = TYPE::type_id);
+
+} // namespace internal
+
+/// Base class for variable-sized list arrays, regardless of offset size.
+template <typename TYPE>
+class BaseListArray : public Array {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TypeClass::offset_type;
+
+ const TypeClass* list_type() const { return list_type_; }
+
+ /// \brief Return array object containing the list's values
+ std::shared_ptr<Array> values() const { return values_; }
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+ std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
+
+ /// Return pointer to raw value offsets accounting for any slice offset
+ const offset_type* raw_value_offsets() const {
+ return raw_value_offsets_ + data_->offset;
+ }
+
+ // The following functions will not perform boundschecking
+ offset_type value_offset(int64_t i) const {
+ return raw_value_offsets_[i + data_->offset];
+ }
+ offset_type value_length(int64_t i) const {
+ i += data_->offset;
+ return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+ }
+ std::shared_ptr<Array> value_slice(int64_t i) const {
+ return values_->Slice(value_offset(i), value_length(i));
+ }
+
+ protected:
+ friend void internal::SetListData<TYPE>(BaseListArray<TYPE>* self,
+ const std::shared_ptr<ArrayData>& data,
+ Type::type expected_type_id);
+
+ const TypeClass* list_type_ = NULLPTR;
+ std::shared_ptr<Array> values_;
+ const offset_type* raw_value_offsets_ = NULLPTR;
+};
+
+/// Concrete Array class for list data
+class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
+ public:
+ explicit ListArray(std::shared_ptr<ArrayData> data);
+
+ ListArray(std::shared_ptr<DataType> type, int64_t length,
+ std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Construct ListArray from array of offsets and child value array
+ ///
+ /// This function does the bare minimum of validation of the offsets and
+ /// input types, and will allocate a new offsets array if necessary (i.e. if
+ /// the offsets contain any nulls). If the offsets do not have nulls, they
+ /// are assumed to be well-formed
+ ///
+ /// \param[in] offsets Array containing n + 1 offsets encoding length and
+ /// size. Must be of int32 type
+ /// \param[in] values Array containing list values
+ /// \param[in] pool MemoryPool in case new offsets array needs to be
+ /// allocated because of null values
+ static Result<std::shared_ptr<ListArray>> FromArrays(
+ const Array& offsets, const Array& values,
+ MemoryPool* pool = default_memory_pool());
+
+ /// \brief Return an Array that is a concatenation of the lists in this array.
+ ///
+ /// Note that it's different from `values()` in that it takes into
+ /// consideration of this array's offsets as well as null elements backed
+ /// by non-empty lists (they are skipped, thus copying may be needed).
+ Result<std::shared_ptr<Array>> Flatten(
+ MemoryPool* memory_pool = default_memory_pool()) const;
+
+ /// \brief Return list offsets as an Int32Array
+ std::shared_ptr<Array> offsets() const;
+
+ protected:
+ // This constructor defers SetData to a derived array class
+ ListArray() = default;
+
+ void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+/// Concrete Array class for large list data (with 64-bit offsets)
+class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
+ public:
+ explicit LargeListArray(const std::shared_ptr<ArrayData>& data);
+
+ LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Construct LargeListArray from array of offsets and child value array
+ ///
+ /// This function does the bare minimum of validation of the offsets and
+ /// input types, and will allocate a new offsets array if necessary (i.e. if
+ /// the offsets contain any nulls). If the offsets do not have nulls, they
+ /// are assumed to be well-formed
+ ///
+ /// \param[in] offsets Array containing n + 1 offsets encoding length and
+ /// size. Must be of int64 type
+ /// \param[in] values Array containing list values
+ /// \param[in] pool MemoryPool in case new offsets array needs to be
+ /// allocated because of null values
+ static Result<std::shared_ptr<LargeListArray>> FromArrays(
+ const Array& offsets, const Array& values,
+ MemoryPool* pool = default_memory_pool());
+
+ /// \brief Return an Array that is a concatenation of the lists in this array.
+ ///
+ /// Note that it's different from `values()` in that it takes into
+ /// consideration of this array's offsets as well as null elements backed
+ /// by non-empty lists (they are skipped, thus copying may be needed).
+ Result<std::shared_ptr<Array>> Flatten(
+ MemoryPool* memory_pool = default_memory_pool()) const;
+
+ /// \brief Return list offsets as an Int64Array
+ std::shared_ptr<Array> offsets() const;
+
+ protected:
+ void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+// ----------------------------------------------------------------------
+// MapArray
+
+/// Concrete Array class for map data
+///
+/// NB: "value" in this context refers to a pair of a key and the corresponding item
+class ARROW_EXPORT MapArray : public ListArray {
+ public:
+ using TypeClass = MapType;
+
+ explicit MapArray(const std::shared_ptr<ArrayData>& data);
+
+ MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& value_offsets,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Construct MapArray from array of offsets and child key, item arrays
+ ///
+ /// This function does the bare minimum of validation of the offsets and
+ /// input types, and will allocate a new offsets array if necessary (i.e. if
+ /// the offsets contain any nulls). If the offsets do not have nulls, they
+ /// are assumed to be well-formed
+ ///
+ /// \param[in] offsets Array containing n + 1 offsets encoding length and
+ /// size. Must be of int32 type
+ /// \param[in] keys Array containing key values
+ /// \param[in] items Array containing item values
+ /// \param[in] pool MemoryPool in case new offsets array needs to be
+ /// allocated because of null values
+ static Result<std::shared_ptr<Array>> FromArrays(
+ const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
+ const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
+
+ static Result<std::shared_ptr<Array>> FromArrays(
+ std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+ const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+ MemoryPool* pool = default_memory_pool());
+
+ const MapType* map_type() const { return map_type_; }
+
+ /// \brief Return array object containing all map keys
+ std::shared_ptr<Array> keys() const { return keys_; }
+
+ /// \brief Return array object containing all mapped items
+ std::shared_ptr<Array> items() const { return items_; }
+
+ /// Validate child data before constructing the actual MapArray.
+ static Status ValidateChildData(
+ const std::vector<std::shared_ptr<ArrayData>>& child_data);
+
+ protected:
+ void SetData(const std::shared_ptr<ArrayData>& data);
+
+ static Result<std::shared_ptr<Array>> FromArraysInternal(
+ std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+ const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+ MemoryPool* pool);
+
+ private:
+ const MapType* map_type_;
+ std::shared_ptr<Array> keys_, items_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeListArray
+
+/// Concrete Array class for fixed size list data
+class ARROW_EXPORT FixedSizeListArray : public Array {
+ public:
+ using TypeClass = FixedSizeListType;
+ using offset_type = TypeClass::offset_type;
+
+ explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);
+
+ FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Array>& values,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ const FixedSizeListType* list_type() const;
+
+ /// \brief Return array object containing the list's values
+ std::shared_ptr<Array> values() const;
+
+ std::shared_ptr<DataType> value_type() const;
+
+ // The following functions will not perform boundschecking
+ int32_t value_offset(int64_t i) const {
+ i += data_->offset;
+ return static_cast<int32_t>(list_size_ * i);
+ }
+ int32_t value_length(int64_t i = 0) const {
+ ARROW_UNUSED(i);
+ return list_size_;
+ }
+ std::shared_ptr<Array> value_slice(int64_t i) const {
+ return values_->Slice(value_offset(i), value_length(i));
+ }
+
+ /// \brief Construct FixedSizeListArray from child value array and value_length
+ ///
+ /// \param[in] values Array containing list values
+ /// \param[in] list_size The fixed length of each list
+ /// \return Will have length equal to values.length() / list_size
+ static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+ int32_t list_size);
+
+ protected:
+ void SetData(const std::shared_ptr<ArrayData>& data);
+ int32_t list_size_;
+
+ private:
+ std::shared_ptr<Array> values_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+/// Concrete Array class for struct data
+class ARROW_EXPORT StructArray : public Array {
+ public:
+ using TypeClass = StructType;
+
+ explicit StructArray(const std::shared_ptr<ArrayData>& data);
+
+ StructArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::vector<std::shared_ptr<Array>>& children,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Return a StructArray from child arrays and field names.
+ ///
+ /// The length and data type are automatically inferred from the arguments.
+ /// There should be at least one child array.
+ static Result<std::shared_ptr<StructArray>> Make(
+ const ArrayVector& children, const std::vector<std::string>& field_names,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Return a StructArray from child arrays and fields.
+ ///
+ /// The length is automatically inferred from the arguments.
+ /// There should be at least one child array. This method does not
+ /// check that field types and child array types are consistent.
+ static Result<std::shared_ptr<StructArray>> Make(
+ const ArrayVector& children, const FieldVector& fields,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ const StructType* struct_type() const;
+
+ // Return a shared pointer in case the requestor desires to share ownership
+ // with this array. The returned array has its offset, length and null
+ // count adjusted.
+ std::shared_ptr<Array> field(int pos) const;
+
+ const ArrayVector& fields() const;
+
+ /// Returns null if name not found
+ std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
+
+ /// \brief Flatten this array as a vector of arrays, one for each field
+ ///
+ /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+ Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
+
+ private:
+ // For caching boxed child data
+ // XXX This is not handled in a thread-safe manner.
+ mutable ArrayVector boxed_fields_;
+};
+
+// ----------------------------------------------------------------------
+// Union
+
+/// Base class for SparseUnionArray and DenseUnionArray
+class ARROW_EXPORT UnionArray : public Array {
+ public:
+ using type_code_t = int8_t;
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
+
+ const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
+
+ /// The physical child id containing value at index.
+ int child_id(int64_t i) const {
+ return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
+ }
+
+ const UnionType* union_type() const { return union_type_; }
+
+ UnionMode::type mode() const { return union_type_->mode(); }
+
+ // Return the given field as an individual array.
+ // For sparse unions, the returned array has its offset, length and null
+ // count adjusted.
+ ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
+ std::shared_ptr<Array> child(int pos) const;
+
+ /// \brief Return the given field as an individual array.
+ ///
+ /// For sparse unions, the returned array has its offset, length and null
+ /// count adjusted.
+ std::shared_ptr<Array> field(int pos) const;
+
+ protected:
+ void SetData(std::shared_ptr<ArrayData> data);
+
+ const type_code_t* raw_type_codes_;
+ const UnionType* union_type_;
+
+ // For caching boxed child data
+ mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+};
+
+/// Concrete Array class for sparse union data
+class ARROW_EXPORT SparseUnionArray : public UnionArray {
+ public:
+ using TypeClass = SparseUnionType;
+
+ explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
+
+ SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+ std::shared_ptr<Buffer> type_ids, int64_t offset = 0);
+
+ /// \brief Construct SparseUnionArray from type_ids and children
+ ///
+ /// This function does the bare minimum of validation of the input types.
+ ///
+ /// \param[in] type_ids An array of logical type ids for the union type
+ /// \param[in] children Vector of children Arrays containing the data for each type.
+ /// \param[in] type_codes Vector of type codes.
+ static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+ std::vector<type_code_t> type_codes) {
+ return Make(std::move(type_ids), std::move(children), std::vector<std::string>{},
+ std::move(type_codes));
+ }
+
+ /// \brief Construct SparseUnionArray with custom field names from type_ids and children
+ ///
+ /// This function does the bare minimum of validation of the input types.
+ ///
+ /// \param[in] type_ids An array of logical type ids for the union type
+ /// \param[in] children Vector of children Arrays containing the data for each type.
+ /// \param[in] field_names Vector of strings containing the name of each field.
+ /// \param[in] type_codes Vector of type codes.
+ static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+ std::vector<std::string> field_names = {},
+ std::vector<type_code_t> type_codes = {});
+
+ const SparseUnionType* union_type() const {
+ return internal::checked_cast<const SparseUnionType*>(union_type_);
+ }
+
+ protected:
+ void SetData(std::shared_ptr<ArrayData> data);
+};
+
+/// \brief Concrete Array class for dense union data
+///
+/// Note that union types do not have a validity bitmap
+class ARROW_EXPORT DenseUnionArray : public UnionArray {
+ public:
+ using TypeClass = DenseUnionType;
+
+ explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);
+
+ DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+ std::shared_ptr<Buffer> type_ids,
+ std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t offset = 0);
+
+ /// \brief Construct DenseUnionArray from type_ids, value_offsets, and children
+ ///
+ /// This function does the bare minimum of validation of the offsets and
+ /// input types.
+ ///
+ /// \param[in] type_ids An array of logical type ids for the union type
+ /// \param[in] value_offsets An array of signed int32 values indicating the
+ /// relative offset into the respective child array for the type in a given slot.
+ /// The respective offsets for each child value array must be in order / increasing.
+ /// \param[in] children Vector of children Arrays containing the data for each type.
+ /// \param[in] type_codes Vector of type codes.
+ static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+ const Array& value_offsets,
+ ArrayVector children,
+ std::vector<type_code_t> type_codes) {
+ return Make(type_ids, value_offsets, std::move(children), std::vector<std::string>{},
+ std::move(type_codes));
+ }
+
+ /// \brief Construct DenseUnionArray with custom field names from type_ids,
+ /// value_offsets, and children
+ ///
+ /// This function does the bare minimum of validation of the offsets and
+ /// input types.
+ ///
+ /// \param[in] type_ids An array of logical type ids for the union type
+ /// \param[in] value_offsets An array of signed int32 values indicating the
+ /// relative offset into the respective child array for the type in a given slot.
+ /// The respective offsets for each child value array must be in order / increasing.
+ /// \param[in] children Vector of children Arrays containing the data for each type.
+ /// \param[in] field_names Vector of strings containing the name of each field.
+ /// \param[in] type_codes Vector of type codes.
+ static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+ const Array& value_offsets,
+ ArrayVector children,
+ std::vector<std::string> field_names = {},
+ std::vector<type_code_t> type_codes = {});
+
+ const DenseUnionType* union_type() const {
+ return internal::checked_cast<const DenseUnionType*>(union_type_);
+ }
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
+
+ int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
+
+ const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+
+ protected:
+ const int32_t* raw_value_offsets_;
+
+ void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.cc
index a1aff933af4..bf990cba716 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.cc
@@ -1,99 +1,99 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/array_primitive.h"
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/array/array_base.h"
-#include "arrow/type.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Primitive array base
-
-PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset) {
- SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
-}
-
-// ----------------------------------------------------------------------
-// BooleanArray
-
-BooleanArray::BooleanArray(const std::shared_ptr<ArrayData>& data)
- : PrimitiveArray(data) {
- ARROW_CHECK_EQ(data->type->id(), Type::BOOL);
-}
-
-BooleanArray::BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
- int64_t offset)
- : PrimitiveArray(boolean(), length, data, null_bitmap, null_count, offset) {}
-
-int64_t BooleanArray::false_count() const {
- return this->length() - this->null_count() - this->true_count();
-}
-
-int64_t BooleanArray::true_count() const {
- if (data_->null_count.load() != 0) {
- DCHECK(data_->buffers[0]);
- internal::BinaryBitBlockCounter bit_counter(data_->buffers[0]->data(), data_->offset,
- data_->buffers[1]->data(), data_->offset,
- data_->length);
- int64_t count = 0;
- while (true) {
- internal::BitBlockCount block = bit_counter.NextAndWord();
- if (block.length == 0) {
- break;
- }
- count += block.popcount;
- }
- return count;
- } else {
- return internal::CountSetBits(data_->buffers[1]->data(), data_->offset,
- data_->length);
- }
-}
-
-// ----------------------------------------------------------------------
-// Day time interval
-
-DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data) {
- SetData(data);
-}
-
-DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type,
- int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap,
- int64_t null_count, int64_t offset)
- : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
-
-DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
- DCHECK(i < length());
- return *reinterpret_cast<const DayTimeIntervalType::DayMilliseconds*>(
- raw_values_ + (i + data_->offset) * byte_width());
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/array_primitive.h"
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Primitive array base
+
+PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset) {
+ SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
+}
+
+// ----------------------------------------------------------------------
+// BooleanArray
+
+BooleanArray::BooleanArray(const std::shared_ptr<ArrayData>& data)
+ : PrimitiveArray(data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::BOOL);
+}
+
+BooleanArray::BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
+ int64_t offset)
+ : PrimitiveArray(boolean(), length, data, null_bitmap, null_count, offset) {}
+
+int64_t BooleanArray::false_count() const {
+ return this->length() - this->null_count() - this->true_count();
+}
+
+int64_t BooleanArray::true_count() const {
+ if (data_->null_count.load() != 0) {
+ DCHECK(data_->buffers[0]);
+ internal::BinaryBitBlockCounter bit_counter(data_->buffers[0]->data(), data_->offset,
+ data_->buffers[1]->data(), data_->offset,
+ data_->length);
+ int64_t count = 0;
+ while (true) {
+ internal::BitBlockCount block = bit_counter.NextAndWord();
+ if (block.length == 0) {
+ break;
+ }
+ count += block.popcount;
+ }
+ return count;
+ } else {
+ return internal::CountSetBits(data_->buffers[1]->data(), data_->offset,
+ data_->length);
+ }
+}
+
+// ----------------------------------------------------------------------
+// Day time interval
+
+DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data) {
+ SetData(data);
+}
+
+DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type,
+ int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset)
+ : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
+
+DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
+ DCHECK(i < length());
+ return *reinterpret_cast<const DayTimeIntervalType::DayMilliseconds*>(
+ raw_values_ + (i + data_->offset) * byte_width());
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
index b601eb770c3..46c02167ca3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
@@ -1,135 +1,135 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Array accessor types for primitive/C-type-based arrays, such as numbers,
-// boolean, and temporal types.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor types for primitive/C-type-based arrays, such as numbers,
+// boolean, and temporal types.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
#include "arrow/stl_iterator.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h" // IWYU pragma: export
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// Concrete Array class for numeric data.
-template <typename TYPE>
-class NumericArray : public PrimitiveArray {
- public:
- using TypeClass = TYPE;
- using value_type = typename TypeClass::c_type;
+#include "arrow/type.h"
+#include "arrow/type_fwd.h" // IWYU pragma: export
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// Concrete Array class for numeric data.
+template <typename TYPE>
+class NumericArray : public PrimitiveArray {
+ public:
+ using TypeClass = TYPE;
+ using value_type = typename TypeClass::c_type;
using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;
-
- explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
-
- // Only enable this constructor without a type argument for types without additional
- // metadata
- template <typename T1 = TYPE>
- NumericArray(enable_if_parameter_free<T1, int64_t> length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0)
- : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
- null_count, offset) {}
-
- const value_type* raw_values() const {
- return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
- }
-
- value_type Value(int64_t i) const { return raw_values()[i]; }
-
- // For API compatibility with BinaryArray etc.
- value_type GetView(int64_t i) const { return Value(i); }
-
+
+ explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
+
+ // Only enable this constructor without a type argument for types without additional
+ // metadata
+ template <typename T1 = TYPE>
+ NumericArray(enable_if_parameter_free<T1, int64_t> length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+ : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
+ null_count, offset) {}
+
+ const value_type* raw_values() const {
+ return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
+ }
+
+ value_type Value(int64_t i) const { return raw_values()[i]; }
+
+ // For API compatibility with BinaryArray etc.
+ value_type GetView(int64_t i) const { return Value(i); }
+
IteratorType begin() const { return IteratorType(*this); }
IteratorType end() const { return IteratorType(*this, length()); }
- protected:
- using PrimitiveArray::PrimitiveArray;
-};
-
-/// Concrete Array class for boolean data
-class ARROW_EXPORT BooleanArray : public PrimitiveArray {
- public:
- using TypeClass = BooleanType;
+ protected:
+ using PrimitiveArray::PrimitiveArray;
+};
+
+/// Concrete Array class for boolean data
+class ARROW_EXPORT BooleanArray : public PrimitiveArray {
+ public:
+ using TypeClass = BooleanType;
using IteratorType = stl::ArrayIterator<BooleanArray>;
-
- explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
-
- BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- bool Value(int64_t i) const {
- return BitUtil::GetBit(reinterpret_cast<const uint8_t*>(raw_values_),
- i + data_->offset);
- }
-
- bool GetView(int64_t i) const { return Value(i); }
-
- /// \brief Return the number of false (0) values among the valid
- /// values. Result is not cached.
- int64_t false_count() const;
-
- /// \brief Return the number of true (1) values among the valid
- /// values. Result is not cached.
- int64_t true_count() const;
-
+
+ explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
+
+ BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ bool Value(int64_t i) const {
+ return BitUtil::GetBit(reinterpret_cast<const uint8_t*>(raw_values_),
+ i + data_->offset);
+ }
+
+ bool GetView(int64_t i) const { return Value(i); }
+
+ /// \brief Return the number of false (0) values among the valid
+ /// values. Result is not cached.
+ int64_t false_count() const;
+
+ /// \brief Return the number of true (1) values among the valid
+ /// values. Result is not cached.
+ int64_t true_count() const;
+
IteratorType begin() const { return IteratorType(*this); }
IteratorType end() const { return IteratorType(*this, length()); }
- protected:
- using PrimitiveArray::PrimitiveArray;
-};
-
-/// DayTimeArray
-/// ---------------------
-/// \brief Array of Day and Millisecond values.
-class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
- public:
- using TypeClass = DayTimeIntervalType;
-
- explicit DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data);
-
- DayTimeIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
- const std::shared_ptr<Buffer>& data,
- const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- TypeClass::DayMilliseconds GetValue(int64_t i) const;
- TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }
-
- // For compatibility with Take kernel.
- TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
-
- int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
-
- const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
-};
-
-} // namespace arrow
+ protected:
+ using PrimitiveArray::PrimitiveArray;
+};
+
+/// DayTimeArray
+/// ---------------------
+/// \brief Array of Day and Millisecond values.
+class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
+ public:
+ using TypeClass = DayTimeIntervalType;
+
+ explicit DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+ DayTimeIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ TypeClass::DayMilliseconds GetValue(int64_t i) const;
+ TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }
+
+ // For compatibility with Take kernel.
+ TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
+
+ int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
+
+ const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.cc
index 36e5546a749..fa784dcd864 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.cc
@@ -1,380 +1,380 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_adaptive.h"
-
-#include <algorithm>
-#include <cstdint>
-
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/int_util.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::AdaptiveIntBuilderBase;
-
-AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool)
- : ArrayBuilder(pool), start_int_size_(start_int_size), int_size_(start_int_size) {}
-
-void AdaptiveIntBuilderBase::Reset() {
- ArrayBuilder::Reset();
- data_.reset();
- raw_data_ = nullptr;
- pending_pos_ = 0;
- pending_has_nulls_ = false;
- int_size_ = start_int_size_;
-}
-
-Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity));
- capacity = std::max(capacity, kMinBuilderCapacity);
-
- int64_t nbytes = capacity * int_size_;
- if (capacity_ == 0) {
- ARROW_ASSIGN_OR_RAISE(data_, AllocateResizableBuffer(nbytes, pool_));
- } else {
- RETURN_NOT_OK(data_->Resize(nbytes));
- }
- raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
- return ArrayBuilder::Resize(capacity);
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-AdaptiveIntBuilderBase::ExpandIntSizeInternal() {
- return Status::OK();
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
-AdaptiveIntBuilderBase::ExpandIntSizeInternal() {
- int_size_ = sizeof(new_type);
- RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
-
- const old_type* src = reinterpret_cast<old_type*>(raw_data_);
- new_type* dst = reinterpret_cast<new_type*>(raw_data_);
- // By doing the backward copy, we ensure that no element is overridden during
- // the copy process while the copy stays in-place.
- std::copy_backward(src, src + length_, dst + length_);
-
- return Status::OK();
-}
-
-std::shared_ptr<DataType> AdaptiveUIntBuilder::type() const {
- auto int_size = int_size_;
- if (pending_pos_ != 0) {
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- int_size =
- internal::DetectUIntWidth(pending_data_, valid_bytes, pending_pos_, int_size_);
- }
- switch (int_size) {
- case 1:
- return uint8();
- case 2:
- return uint16();
- case 4:
- return uint32();
- case 8:
- return uint64();
- default:
- DCHECK(false);
- }
- return nullptr;
-}
-
-std::shared_ptr<DataType> AdaptiveIntBuilder::type() const {
- auto int_size = int_size_;
- if (pending_pos_ != 0) {
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- int_size = internal::DetectIntWidth(reinterpret_cast<const int64_t*>(pending_data_),
- valid_bytes, pending_pos_, int_size_);
- }
- switch (int_size) {
- case 1:
- return int8();
- case 2:
- return int16();
- case 4:
- return int32();
- case 8:
- return int64();
- default:
- DCHECK(false);
- }
- return nullptr;
-}
-
-AdaptiveIntBuilder::AdaptiveIntBuilder(uint8_t start_int_size, MemoryPool* pool)
- : AdaptiveIntBuilderBase(start_int_size, pool) {}
-
-Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- RETURN_NOT_OK(CommitPendingData());
-
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
- RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
- *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
-
- data_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::CommitPendingData() {
- if (pending_pos_ == 0) {
- return Status::OK();
- }
- RETURN_NOT_OK(Reserve(pending_pos_));
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
- pending_pos_, valid_bytes));
- pending_has_nulls_ = false;
- pending_pos_ = 0;
- return Status::OK();
-}
-
-static constexpr int64_t kAdaptiveIntChunkSize = 8192;
-
-Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- if (pending_pos_ > 0) {
- // UnsafeAppendToBitmap expects length_ to be the pre-update value, satisfy it
- DCHECK_EQ(length, pending_pos_) << "AppendValuesInternal called while data pending";
- length_ -= pending_pos_;
- }
-
- while (length > 0) {
- // In case `length` is very large, we don't want to trash the cache by
- // scanning it twice (first to detect int width, second to copy the data).
- // Instead, process data in L2-cacheable chunks.
- const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
- uint8_t new_int_size;
- new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
-
- DCHECK_GE(new_int_size, int_size_);
- if (new_int_size > int_size_) {
- // This updates int_size_
- RETURN_NOT_OK(ExpandIntSize(new_int_size));
- }
-
- switch (int_size_) {
- case 1:
- internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 2:
- internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 4:
- internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 8:
- internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
- chunk_size);
- break;
- default:
- DCHECK(false);
- }
-
- // UnsafeAppendToBitmap increments length_ by chunk_size
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
- values += chunk_size;
- if (valid_bytes != nullptr) {
- valid_bytes += chunk_size;
- }
- length -= chunk_size;
- }
-
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::CommitPendingData() {
- if (pending_pos_ == 0) {
- return Status::OK();
- }
- RETURN_NOT_OK(Reserve(pending_pos_));
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
- pending_has_nulls_ = false;
- pending_pos_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(CommitPendingData());
- RETURN_NOT_OK(Reserve(length));
-
- return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type>
-Status AdaptiveIntBuilder::ExpandIntSizeN() {
- switch (int_size_) {
- case 1:
- return ExpandIntSizeInternal<new_type, int8_t>();
- case 2:
- return ExpandIntSizeInternal<new_type, int16_t>();
- case 4:
- return ExpandIntSizeInternal<new_type, int32_t>();
- case 8:
- return ExpandIntSizeInternal<new_type, int64_t>();
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
- switch (new_int_size) {
- case 1:
- return ExpandIntSizeN<int8_t>();
- case 2:
- return ExpandIntSizeN<int16_t>();
- case 4:
- return ExpandIntSizeN<int32_t>();
- case 8:
- return ExpandIntSizeN<int64_t>();
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-AdaptiveUIntBuilder::AdaptiveUIntBuilder(uint8_t start_int_size, MemoryPool* pool)
- : AdaptiveIntBuilderBase(start_int_size, pool) {}
-
-Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- RETURN_NOT_OK(CommitPendingData());
-
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
- RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
- *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
-
- data_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- if (pending_pos_ > 0) {
- // UnsafeAppendToBitmap expects length_ to be the pre-update value, satisfy it
- DCHECK_EQ(length, pending_pos_) << "AppendValuesInternal called while data pending";
- length_ -= pending_pos_;
- }
-
- while (length > 0) {
- // See AdaptiveIntBuilder::AppendValuesInternal
- const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
- uint8_t new_int_size;
- new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
-
- DCHECK_GE(new_int_size, int_size_);
- if (new_int_size > int_size_) {
- // This updates int_size_
- RETURN_NOT_OK(ExpandIntSize(new_int_size));
- }
-
- switch (int_size_) {
- case 1:
- internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 2:
- internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 4:
- internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 8:
- internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
- chunk_size);
- break;
- default:
- DCHECK(false);
- }
-
- // UnsafeAppendToBitmap increments length_ by chunk_size
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
- values += chunk_size;
- if (valid_bytes != nullptr) {
- valid_bytes += chunk_size;
- }
- length -= chunk_size;
- }
-
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
-
- return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type>
-Status AdaptiveUIntBuilder::ExpandIntSizeN() {
- switch (int_size_) {
- case 1:
- return ExpandIntSizeInternal<new_type, uint8_t>();
- case 2:
- return ExpandIntSizeInternal<new_type, uint16_t>();
- case 4:
- return ExpandIntSizeInternal<new_type, uint32_t>();
- case 8:
- return ExpandIntSizeInternal<new_type, uint64_t>();
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
- switch (new_int_size) {
- case 1:
- return ExpandIntSizeN<uint8_t>();
- case 2:
- return ExpandIntSizeN<uint16_t>();
- case 4:
- return ExpandIntSizeN<uint32_t>();
- case 8:
- return ExpandIntSizeN<uint64_t>();
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_adaptive.h"
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::AdaptiveIntBuilderBase;
+
+AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool)
+ : ArrayBuilder(pool), start_int_size_(start_int_size), int_size_(start_int_size) {}
+
+void AdaptiveIntBuilderBase::Reset() {
+ ArrayBuilder::Reset();
+ data_.reset();
+ raw_data_ = nullptr;
+ pending_pos_ = 0;
+ pending_has_nulls_ = false;
+ int_size_ = start_int_size_;
+}
+
+Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
+ int64_t nbytes = capacity * int_size_;
+ if (capacity_ == 0) {
+ ARROW_ASSIGN_OR_RAISE(data_, AllocateResizableBuffer(nbytes, pool_));
+ } else {
+ RETURN_NOT_OK(data_->Resize(nbytes));
+ }
+ raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+
+ return ArrayBuilder::Resize(capacity);
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+AdaptiveIntBuilderBase::ExpandIntSizeInternal() {
+ return Status::OK();
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
+AdaptiveIntBuilderBase::ExpandIntSizeInternal() {
+ int_size_ = sizeof(new_type);
+ RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
+
+ const old_type* src = reinterpret_cast<old_type*>(raw_data_);
+ new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+ // By doing the backward copy, we ensure that no element is overridden during
+ // the copy process while the copy stays in-place.
+ std::copy_backward(src, src + length_, dst + length_);
+
+ return Status::OK();
+}
+
+std::shared_ptr<DataType> AdaptiveUIntBuilder::type() const {
+ auto int_size = int_size_;
+ if (pending_pos_ != 0) {
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ int_size =
+ internal::DetectUIntWidth(pending_data_, valid_bytes, pending_pos_, int_size_);
+ }
+ switch (int_size) {
+ case 1:
+ return uint8();
+ case 2:
+ return uint16();
+ case 4:
+ return uint32();
+ case 8:
+ return uint64();
+ default:
+ DCHECK(false);
+ }
+ return nullptr;
+}
+
+std::shared_ptr<DataType> AdaptiveIntBuilder::type() const {
+ auto int_size = int_size_;
+ if (pending_pos_ != 0) {
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ int_size = internal::DetectIntWidth(reinterpret_cast<const int64_t*>(pending_data_),
+ valid_bytes, pending_pos_, int_size_);
+ }
+ switch (int_size) {
+ case 1:
+ return int8();
+ case 2:
+ return int16();
+ case 4:
+ return int32();
+ case 8:
+ return int64();
+ default:
+ DCHECK(false);
+ }
+ return nullptr;
+}
+
+AdaptiveIntBuilder::AdaptiveIntBuilder(uint8_t start_int_size, MemoryPool* pool)
+ : AdaptiveIntBuilderBase(start_int_size, pool) {}
+
+Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ RETURN_NOT_OK(CommitPendingData());
+
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
+
+ data_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::CommitPendingData() {
+ if (pending_pos_ == 0) {
+ return Status::OK();
+ }
+ RETURN_NOT_OK(Reserve(pending_pos_));
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
+ pending_pos_, valid_bytes));
+ pending_has_nulls_ = false;
+ pending_pos_ = 0;
+ return Status::OK();
+}
+
+static constexpr int64_t kAdaptiveIntChunkSize = 8192;
+
+Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ if (pending_pos_ > 0) {
+ // UnsafeAppendToBitmap expects length_ to be the pre-update value, satisfy it
+ DCHECK_EQ(length, pending_pos_) << "AppendValuesInternal called while data pending";
+ length_ -= pending_pos_;
+ }
+
+ while (length > 0) {
+ // In case `length` is very large, we don't want to trash the cache by
+ // scanning it twice (first to detect int width, second to copy the data).
+ // Instead, process data in L2-cacheable chunks.
+ const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+ uint8_t new_int_size;
+ new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+ DCHECK_GE(new_int_size, int_size_);
+ if (new_int_size > int_size_) {
+ // This updates int_size_
+ RETURN_NOT_OK(ExpandIntSize(new_int_size));
+ }
+
+ switch (int_size_) {
+ case 1:
+ internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 2:
+ internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 4:
+ internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 8:
+ internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ default:
+ DCHECK(false);
+ }
+
+ // UnsafeAppendToBitmap increments length_ by chunk_size
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+ values += chunk_size;
+ if (valid_bytes != nullptr) {
+ valid_bytes += chunk_size;
+ }
+ length -= chunk_size;
+ }
+
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::CommitPendingData() {
+ if (pending_pos_ == 0) {
+ return Status::OK();
+ }
+ RETURN_NOT_OK(Reserve(pending_pos_));
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
+ pending_has_nulls_ = false;
+ pending_pos_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(CommitPendingData());
+ RETURN_NOT_OK(Reserve(length));
+
+ return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type>
+Status AdaptiveIntBuilder::ExpandIntSizeN() {
+ switch (int_size_) {
+ case 1:
+ return ExpandIntSizeInternal<new_type, int8_t>();
+ case 2:
+ return ExpandIntSizeInternal<new_type, int16_t>();
+ case 4:
+ return ExpandIntSizeInternal<new_type, int32_t>();
+ case 8:
+ return ExpandIntSizeInternal<new_type, int64_t>();
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+ switch (new_int_size) {
+ case 1:
+ return ExpandIntSizeN<int8_t>();
+ case 2:
+ return ExpandIntSizeN<int16_t>();
+ case 4:
+ return ExpandIntSizeN<int32_t>();
+ case 8:
+ return ExpandIntSizeN<int64_t>();
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+AdaptiveUIntBuilder::AdaptiveUIntBuilder(uint8_t start_int_size, MemoryPool* pool)
+ : AdaptiveIntBuilderBase(start_int_size, pool) {}
+
+Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ RETURN_NOT_OK(CommitPendingData());
+
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
+
+ data_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ if (pending_pos_ > 0) {
+ // UnsafeAppendToBitmap expects length_ to be the pre-update value, satisfy it
+ DCHECK_EQ(length, pending_pos_) << "AppendValuesInternal called while data pending";
+ length_ -= pending_pos_;
+ }
+
+ while (length > 0) {
+ // See AdaptiveIntBuilder::AppendValuesInternal
+ const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+ uint8_t new_int_size;
+ new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+ DCHECK_GE(new_int_size, int_size_);
+ if (new_int_size > int_size_) {
+ // This updates int_size_
+ RETURN_NOT_OK(ExpandIntSize(new_int_size));
+ }
+
+ switch (int_size_) {
+ case 1:
+ internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 2:
+ internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 4:
+ internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 8:
+ internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ default:
+ DCHECK(false);
+ }
+
+ // UnsafeAppendToBitmap increments length_ by chunk_size
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+ values += chunk_size;
+ if (valid_bytes != nullptr) {
+ valid_bytes += chunk_size;
+ }
+ length -= chunk_size;
+ }
+
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+
+ return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type>
+Status AdaptiveUIntBuilder::ExpandIntSizeN() {
+ switch (int_size_) {
+ case 1:
+ return ExpandIntSizeInternal<new_type, uint8_t>();
+ case 2:
+ return ExpandIntSizeInternal<new_type, uint16_t>();
+ case 4:
+ return ExpandIntSizeInternal<new_type, uint32_t>();
+ case 8:
+ return ExpandIntSizeInternal<new_type, uint64_t>();
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+ switch (new_int_size) {
+ case 1:
+ return ExpandIntSizeN<uint8_t>();
+ case 2:
+ return ExpandIntSizeN<uint16_t>();
+ case 4:
+ return ExpandIntSizeN<uint32_t>();
+ case 8:
+ return ExpandIntSizeN<uint64_t>();
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
index c0df797256d..ebc9a9dba68 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
@@ -1,69 +1,69 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <type_traits>
-
-#include "arrow/array/builder_base.h"
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-namespace internal {
-
-class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
- public:
- AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
-
- explicit AdaptiveIntBuilderBase(MemoryPool* pool)
- : AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
-
- /// \brief Append multiple nulls
- /// \param[in] length the number of nulls to append
- Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(CommitPendingData());
- ARROW_RETURN_NOT_OK(Reserve(length));
- memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
- UnsafeSetNull(length);
- return Status::OK();
- }
-
- Status AppendNull() final {
- pending_data_[pending_pos_] = 0;
- pending_valid_[pending_pos_] = 0;
- pending_has_nulls_ = true;
- ++pending_pos_;
- ++length_;
- ++null_count_;
-
- if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
- return CommitPendingData();
- }
- return Status::OK();
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+namespace internal {
+
+class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
+ public:
+ AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
+
+ explicit AdaptiveIntBuilderBase(MemoryPool* pool)
+ : AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
+
+ /// \brief Append multiple nulls
+ /// \param[in] length the number of nulls to append
+ Status AppendNulls(int64_t length) final {
+ ARROW_RETURN_NOT_OK(CommitPendingData());
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+ UnsafeSetNull(length);
+ return Status::OK();
+ }
+
+ Status AppendNull() final {
+ pending_data_[pending_pos_] = 0;
+ pending_valid_[pending_pos_] = 0;
+ pending_has_nulls_ = true;
+ ++pending_pos_;
+ ++length_;
+ ++null_count_;
+
+ if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+ return CommitPendingData();
+ }
+ return Status::OK();
+ }
+
Status AppendEmptyValues(int64_t length) final {
ARROW_RETURN_NOT_OK(CommitPendingData());
ARROW_RETURN_NOT_OK(Reserve(length));
@@ -84,120 +84,120 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
return Status::OK();
}
- void Reset() override;
- Status Resize(int64_t capacity) override;
-
- protected:
- Status AppendInternal(const uint64_t val) {
- pending_data_[pending_pos_] = val;
- pending_valid_[pending_pos_] = 1;
- ++pending_pos_;
- ++length_;
-
- if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
- return CommitPendingData();
- }
- return Status::OK();
- }
-
- virtual Status CommitPendingData() = 0;
-
- template <typename new_type, typename old_type>
- typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
- ExpandIntSizeInternal();
- template <typename new_type, typename old_type>
- typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
- ExpandIntSizeInternal();
-
- std::shared_ptr<ResizableBuffer> data_;
- uint8_t* raw_data_ = NULLPTR;
-
- const uint8_t start_int_size_;
- uint8_t int_size_;
-
- static constexpr int32_t pending_size_ = 1024;
- uint8_t pending_valid_[pending_size_];
- uint64_t pending_data_[pending_size_];
- int32_t pending_pos_ = 0;
- bool pending_has_nulls_ = false;
-};
-
-} // namespace internal
-
-class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
- public:
- explicit AdaptiveUIntBuilder(uint8_t start_int_size,
- MemoryPool* pool = default_memory_pool());
-
- explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
- : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
-
- using ArrayBuilder::Advance;
- using internal::AdaptiveIntBuilderBase::Reset;
-
- /// Scalar append
- Status Append(const uint64_t val) { return AppendInternal(val); }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous C array of values
- /// \param[in] length the number of values to append
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR);
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- std::shared_ptr<DataType> type() const override;
-
- protected:
- Status CommitPendingData() override;
- Status ExpandIntSize(uint8_t new_int_size);
-
- Status AppendValuesInternal(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes);
-
- template <typename new_type>
- Status ExpandIntSizeN();
-};
-
-class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
- public:
- explicit AdaptiveIntBuilder(uint8_t start_int_size,
- MemoryPool* pool = default_memory_pool());
-
- explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
- : AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
-
- using ArrayBuilder::Advance;
- using internal::AdaptiveIntBuilderBase::Reset;
-
- /// Scalar append
- Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous C array of values
- /// \param[in] length the number of values to append
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR);
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- std::shared_ptr<DataType> type() const override;
-
- protected:
- Status CommitPendingData() override;
- Status ExpandIntSize(uint8_t new_int_size);
-
- Status AppendValuesInternal(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes);
-
- template <typename new_type>
- Status ExpandIntSizeN();
-};
-
-} // namespace arrow
+ void Reset() override;
+ Status Resize(int64_t capacity) override;
+
+ protected:
+ Status AppendInternal(const uint64_t val) {
+ pending_data_[pending_pos_] = val;
+ pending_valid_[pending_pos_] = 1;
+ ++pending_pos_;
+ ++length_;
+
+ if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+ return CommitPendingData();
+ }
+ return Status::OK();
+ }
+
+ virtual Status CommitPendingData() = 0;
+
+ template <typename new_type, typename old_type>
+ typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+ ExpandIntSizeInternal();
+ template <typename new_type, typename old_type>
+ typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
+ ExpandIntSizeInternal();
+
+ std::shared_ptr<ResizableBuffer> data_;
+ uint8_t* raw_data_ = NULLPTR;
+
+ const uint8_t start_int_size_;
+ uint8_t int_size_;
+
+ static constexpr int32_t pending_size_ = 1024;
+ uint8_t pending_valid_[pending_size_];
+ uint64_t pending_data_[pending_size_];
+ int32_t pending_pos_ = 0;
+ bool pending_has_nulls_ = false;
+};
+
+} // namespace internal
+
+class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+ explicit AdaptiveUIntBuilder(uint8_t start_int_size,
+ MemoryPool* pool = default_memory_pool());
+
+ explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
+ : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
+
+ using ArrayBuilder::Advance;
+ using internal::AdaptiveIntBuilderBase::Reset;
+
+ /// Scalar append
+ Status Append(const uint64_t val) { return AppendInternal(val); }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous C array of values
+ /// \param[in] length the number of values to append
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR);
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ std::shared_ptr<DataType> type() const override;
+
+ protected:
+ Status CommitPendingData() override;
+ Status ExpandIntSize(uint8_t new_int_size);
+
+ Status AppendValuesInternal(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes);
+
+ template <typename new_type>
+ Status ExpandIntSizeN();
+};
+
+class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+ explicit AdaptiveIntBuilder(uint8_t start_int_size,
+ MemoryPool* pool = default_memory_pool());
+
+ explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
+ : AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
+
+ using ArrayBuilder::Advance;
+ using internal::AdaptiveIntBuilderBase::Reset;
+
+ /// Scalar append
+ Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous C array of values
+ /// \param[in] length the number of values to append
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR);
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ std::shared_ptr<DataType> type() const override;
+
+ protected:
+ Status CommitPendingData() override;
+ Status ExpandIntSize(uint8_t new_int_size);
+
+ Status AppendValuesInternal(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes);
+
+ template <typename new_type>
+ Status ExpandIntSizeN();
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
index c892e3d664b..254c1537963 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
@@ -1,100 +1,100 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_base.h"
-
-#include <cstdint>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/array/util.h"
-#include "arrow/buffer.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_base.h"
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/scalar.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-Status ArrayBuilder::CheckArrayType(const std::shared_ptr<DataType>& expected_type,
- const Array& array, const char* message) {
- if (!expected_type->Equals(*array.type())) {
- return Status::TypeError(message);
- }
- return Status::OK();
-}
-
-Status ArrayBuilder::CheckArrayType(Type::type expected_type, const Array& array,
- const char* message) {
- if (array.type_id() != expected_type) {
- return Status::TypeError(message);
- }
- return Status::OK();
-}
-
-Status ArrayBuilder::TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
- if (buffer) {
- if (bytes_filled < buffer->size()) {
- // Trim buffer
- RETURN_NOT_OK(buffer->Resize(bytes_filled));
- }
- // zero the padding
- buffer->ZeroPadding();
- } else {
- // Null buffers are allowed in place of 0-byte buffers
- DCHECK_EQ(bytes_filled, 0);
- }
- return Status::OK();
-}
-
-Status ArrayBuilder::AppendToBitmap(bool is_valid) {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(is_valid);
- return Status::OK();
-}
-
-Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- return Status::OK();
-}
-
-Status ArrayBuilder::AppendToBitmap(int64_t num_bits, bool value) {
- RETURN_NOT_OK(Reserve(num_bits));
- UnsafeAppendToBitmap(num_bits, value);
- return Status::OK();
-}
-
-Status ArrayBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity));
- capacity_ = capacity;
- return null_bitmap_builder_.Resize(capacity);
-}
-
-Status ArrayBuilder::Advance(int64_t elements) {
- if (length_ + elements > capacity_) {
- return Status::Invalid("Builder must be expanded");
- }
- length_ += elements;
- return null_bitmap_builder_.Advance(elements);
-}
-
+
+namespace arrow {
+
+Status ArrayBuilder::CheckArrayType(const std::shared_ptr<DataType>& expected_type,
+ const Array& array, const char* message) {
+ if (!expected_type->Equals(*array.type())) {
+ return Status::TypeError(message);
+ }
+ return Status::OK();
+}
+
+Status ArrayBuilder::CheckArrayType(Type::type expected_type, const Array& array,
+ const char* message) {
+ if (array.type_id() != expected_type) {
+ return Status::TypeError(message);
+ }
+ return Status::OK();
+}
+
+Status ArrayBuilder::TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
+ if (buffer) {
+ if (bytes_filled < buffer->size()) {
+ // Trim buffer
+ RETURN_NOT_OK(buffer->Resize(bytes_filled));
+ }
+ // zero the padding
+ buffer->ZeroPadding();
+ } else {
+ // Null buffers are allowed in place of 0-byte buffers
+ DCHECK_EQ(bytes_filled, 0);
+ }
+ return Status::OK();
+}
+
+Status ArrayBuilder::AppendToBitmap(bool is_valid) {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(is_valid);
+ return Status::OK();
+}
+
+Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return Status::OK();
+}
+
+Status ArrayBuilder::AppendToBitmap(int64_t num_bits, bool value) {
+ RETURN_NOT_OK(Reserve(num_bits));
+ UnsafeAppendToBitmap(num_bits, value);
+ return Status::OK();
+}
+
+Status ArrayBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity_ = capacity;
+ return null_bitmap_builder_.Resize(capacity);
+}
+
+Status ArrayBuilder::Advance(int64_t elements) {
+ if (length_ + elements > capacity_) {
+ return Status::Invalid("Builder must be expanded");
+ }
+ length_ += elements;
+ return null_bitmap_builder_.Advance(elements);
+}
+
namespace {
struct AppendScalarImpl {
template <typename T>
@@ -251,45 +251,45 @@ Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
.Convert();
}
-Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
- std::shared_ptr<ArrayData> internal_data;
- RETURN_NOT_OK(FinishInternal(&internal_data));
- *out = MakeArray(internal_data);
- return Status::OK();
-}
-
-Result<std::shared_ptr<Array>> ArrayBuilder::Finish() {
- std::shared_ptr<Array> out;
- RETURN_NOT_OK(Finish(&out));
- return out;
-}
-
-void ArrayBuilder::Reset() {
- capacity_ = length_ = null_count_ = 0;
- null_bitmap_builder_.Reset();
-}
-
-Status ArrayBuilder::SetNotNull(int64_t length) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeSetNotNull(length);
- return Status::OK();
-}
-
-void ArrayBuilder::UnsafeAppendToBitmap(const std::vector<bool>& is_valid) {
- for (bool element_valid : is_valid) {
- UnsafeAppendToBitmap(element_valid);
- }
-}
-
-void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
- length_ += length;
- null_bitmap_builder_.UnsafeAppend(length, true);
-}
-
-void ArrayBuilder::UnsafeSetNull(int64_t length) {
- length_ += length;
- null_count_ += length;
- null_bitmap_builder_.UnsafeAppend(length, false);
-}
-
-} // namespace arrow
+Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
+ std::shared_ptr<ArrayData> internal_data;
+ RETURN_NOT_OK(FinishInternal(&internal_data));
+ *out = MakeArray(internal_data);
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Array>> ArrayBuilder::Finish() {
+ std::shared_ptr<Array> out;
+ RETURN_NOT_OK(Finish(&out));
+ return out;
+}
+
+void ArrayBuilder::Reset() {
+ capacity_ = length_ = null_count_ = 0;
+ null_bitmap_builder_.Reset();
+}
+
+Status ArrayBuilder::SetNotNull(int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeSetNotNull(length);
+ return Status::OK();
+}
+
+void ArrayBuilder::UnsafeAppendToBitmap(const std::vector<bool>& is_valid) {
+ for (bool element_valid : is_valid) {
+ UnsafeAppendToBitmap(element_valid);
+ }
+}
+
+void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
+ length_ += length;
+ null_bitmap_builder_.UnsafeAppend(length, true);
+}
+
+void ArrayBuilder::UnsafeSetNull(int64_t length) {
+ length_ += length;
+ null_count_ += length;
+ null_bitmap_builder_.UnsafeAppend(length, false);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
index 905b3c1b491..2d360c955d6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
@@ -1,108 +1,108 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm> // IWYU pragma: keep
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_primitive.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm> // IWYU pragma: keep
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-constexpr int64_t kMinBuilderCapacity = 1 << 5;
-constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
-
-/// Base class for all data array builders.
-///
-/// This class provides a facilities for incrementally building the null bitmap
-/// (see Append methods) and as a side effect the current number of slots and
-/// the null count.
-///
-/// \note Users are expected to use builders as one of the concrete types below.
-/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
-class ARROW_EXPORT ArrayBuilder {
- public:
- explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
-
- virtual ~ArrayBuilder() = default;
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+constexpr int64_t kMinBuilderCapacity = 1 << 5;
+constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
+
+/// Base class for all data array builders.
+///
+/// This class provides a facilities for incrementally building the null bitmap
+/// (see Append methods) and as a side effect the current number of slots and
+/// the null count.
+///
+/// \note Users are expected to use builders as one of the concrete types below.
+/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
+class ARROW_EXPORT ArrayBuilder {
+ public:
+ explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
+
+ virtual ~ArrayBuilder() = default;
ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
-
- /// For nested types. Since the objects are owned by this class instance, we
- /// skip shared pointers and just return a raw pointer
- ArrayBuilder* child(int i) { return children_[i].get(); }
-
- const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
-
- int num_children() const { return static_cast<int>(children_.size()); }
-
- virtual int64_t length() const { return length_; }
- int64_t null_count() const { return null_count_; }
- int64_t capacity() const { return capacity_; }
-
- /// \brief Ensure that enough memory has been allocated to fit the indicated
- /// number of total elements in the builder, including any that have already
- /// been appended. Does not account for reallocations that may be due to
- /// variable size data, like binary values. To make space for incremental
- /// appends, use Reserve instead.
- ///
- /// \param[in] capacity the minimum number of total array values to
- /// accommodate. Must be greater than the current capacity.
- /// \return Status
- virtual Status Resize(int64_t capacity);
-
- /// \brief Ensure that there is enough space allocated to append the indicated
- /// number of elements without any further reallocation. Overallocation is
- /// used in order to minimize the impact of incremental Reserve() calls.
- /// Note that additional_capacity is relative to the current number of elements
- /// rather than to the current capacity, so calls to Reserve() which are not
- /// interspersed with addition of new elements may not increase the capacity.
- ///
- /// \param[in] additional_capacity the number of additional array values
- /// \return Status
- Status Reserve(int64_t additional_capacity) {
- auto current_capacity = capacity();
- auto min_capacity = length() + additional_capacity;
- if (min_capacity <= current_capacity) return Status::OK();
-
- // leave growth factor up to BufferBuilder
- auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
- return Resize(new_capacity);
- }
-
- /// Reset the builder.
- virtual void Reset();
-
+
+ /// For nested types. Since the objects are owned by this class instance, we
+ /// skip shared pointers and just return a raw pointer
+ ArrayBuilder* child(int i) { return children_[i].get(); }
+
+ const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
+
+ int num_children() const { return static_cast<int>(children_.size()); }
+
+ virtual int64_t length() const { return length_; }
+ int64_t null_count() const { return null_count_; }
+ int64_t capacity() const { return capacity_; }
+
+ /// \brief Ensure that enough memory has been allocated to fit the indicated
+ /// number of total elements in the builder, including any that have already
+ /// been appended. Does not account for reallocations that may be due to
+ /// variable size data, like binary values. To make space for incremental
+ /// appends, use Reserve instead.
+ ///
+ /// \param[in] capacity the minimum number of total array values to
+ /// accommodate. Must be greater than the current capacity.
+ /// \return Status
+ virtual Status Resize(int64_t capacity);
+
+ /// \brief Ensure that there is enough space allocated to append the indicated
+ /// number of elements without any further reallocation. Overallocation is
+ /// used in order to minimize the impact of incremental Reserve() calls.
+ /// Note that additional_capacity is relative to the current number of elements
+ /// rather than to the current capacity, so calls to Reserve() which are not
+ /// interspersed with addition of new elements may not increase the capacity.
+ ///
+ /// \param[in] additional_capacity the number of additional array values
+ /// \return Status
+ Status Reserve(int64_t additional_capacity) {
+ auto current_capacity = capacity();
+ auto min_capacity = length() + additional_capacity;
+ if (min_capacity <= current_capacity) return Status::OK();
+
+ // leave growth factor up to BufferBuilder
+ auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
+ return Resize(new_capacity);
+ }
+
+ /// Reset the builder.
+ virtual void Reset();
+
/// \brief Append a null value to builder
- virtual Status AppendNull() = 0;
+ virtual Status AppendNull() = 0;
/// \brief Append a number of null values to builder
- virtual Status AppendNulls(int64_t length) = 0;
-
+ virtual Status AppendNulls(int64_t length) = 0;
+
/// \brief Append a non-null value to builder
///
/// The appended value is an implementation detail, but the corresponding
@@ -122,137 +122,137 @@ class ARROW_EXPORT ArrayBuilder {
Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
Status AppendScalars(const ScalarVector& scalars);
- /// For cases where raw data was memcpy'd into the internal buffers, allows us
- /// to advance the length of the builder. It is your responsibility to use
- /// this function responsibly.
- Status Advance(int64_t elements);
-
- /// \brief Return result of builder as an internal generic ArrayData
- /// object. Resets builder except for dictionary builder
- ///
- /// \param[out] out the finalized ArrayData object
- /// \return Status
- virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
-
- /// \brief Return result of builder as an Array object.
- ///
- /// The builder is reset except for DictionaryBuilder.
- ///
- /// \param[out] out the finalized Array object
- /// \return Status
- Status Finish(std::shared_ptr<Array>* out);
-
- /// \brief Return result of builder as an Array object.
- ///
- /// The builder is reset except for DictionaryBuilder.
- ///
- /// \return The finalized Array object
- Result<std::shared_ptr<Array>> Finish();
-
- /// \brief Return the type of the built Array
- virtual std::shared_ptr<DataType> type() const = 0;
-
- protected:
- /// Append to null bitmap
- Status AppendToBitmap(bool is_valid);
-
- /// Vector append. Treat each zero byte as a null. If valid_bytes is null
- /// assume all of length bits are valid.
- Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
-
- /// Uniform append. Append N times the same validity bit.
- Status AppendToBitmap(int64_t num_bits, bool value);
-
- /// Set the next length bits to not null (i.e. valid).
- Status SetNotNull(int64_t length);
-
- // Unsafe operations (don't check capacity/don't resize)
-
- void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
-
- // Append to null bitmap, update the length
- void UnsafeAppendToBitmap(bool is_valid) {
- null_bitmap_builder_.UnsafeAppend(is_valid);
- ++length_;
- if (!is_valid) ++null_count_;
- }
-
- // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
- // assume all of length bits are valid.
- void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
- if (valid_bytes == NULLPTR) {
- return UnsafeSetNotNull(length);
- }
- null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
- length_ += length;
- null_count_ = null_bitmap_builder_.false_count();
- }
-
- // Append the same validity value a given number of times.
- void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
- if (value) {
- UnsafeSetNotNull(num_bits);
- } else {
- UnsafeSetNull(num_bits);
- }
- }
-
- void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
-
- // Set the next validity bits to not null (i.e. valid).
- void UnsafeSetNotNull(int64_t length);
-
- // Set the next validity bits to null (i.e. invalid).
- void UnsafeSetNull(int64_t length);
-
- static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
-
- /// \brief Finish to an array of the specified ArrayType
- template <typename ArrayType>
- Status FinishTyped(std::shared_ptr<ArrayType>* out) {
- std::shared_ptr<Array> out_untyped;
- ARROW_RETURN_NOT_OK(Finish(&out_untyped));
- *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
- return Status::OK();
- }
-
- // Check the requested capacity for validity
- Status CheckCapacity(int64_t new_capacity) {
- if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
- return Status::Invalid(
- "Resize capacity must be positive (requested: ", new_capacity, ")");
- }
-
- if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
- return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
- ", current length: ", length_, ")");
- }
-
- return Status::OK();
- }
-
- // Check for array type
- Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
- const Array& array, const char* message);
- Status CheckArrayType(Type::type expected_type, const Array& array,
- const char* message);
-
- MemoryPool* pool_;
-
- TypedBufferBuilder<bool> null_bitmap_builder_;
- int64_t null_count_ = 0;
-
- // Array length, so far. Also, the index of the next element to be added
- int64_t length_ = 0;
- int64_t capacity_ = 0;
-
- // Child value array builders. These are owned by this class
- std::vector<std::shared_ptr<ArrayBuilder>> children_;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
-};
-
+ /// For cases where raw data was memcpy'd into the internal buffers, allows us
+ /// to advance the length of the builder. It is your responsibility to use
+ /// this function responsibly.
+ Status Advance(int64_t elements);
+
+ /// \brief Return result of builder as an internal generic ArrayData
+ /// object. Resets builder except for dictionary builder
+ ///
+ /// \param[out] out the finalized ArrayData object
+ /// \return Status
+ virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
+
+ /// \brief Return result of builder as an Array object.
+ ///
+ /// The builder is reset except for DictionaryBuilder.
+ ///
+ /// \param[out] out the finalized Array object
+ /// \return Status
+ Status Finish(std::shared_ptr<Array>* out);
+
+ /// \brief Return result of builder as an Array object.
+ ///
+ /// The builder is reset except for DictionaryBuilder.
+ ///
+ /// \return The finalized Array object
+ Result<std::shared_ptr<Array>> Finish();
+
+ /// \brief Return the type of the built Array
+ virtual std::shared_ptr<DataType> type() const = 0;
+
+ protected:
+ /// Append to null bitmap
+ Status AppendToBitmap(bool is_valid);
+
+ /// Vector append. Treat each zero byte as a null. If valid_bytes is null
+ /// assume all of length bits are valid.
+ Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+
+ /// Uniform append. Append N times the same validity bit.
+ Status AppendToBitmap(int64_t num_bits, bool value);
+
+ /// Set the next length bits to not null (i.e. valid).
+ Status SetNotNull(int64_t length);
+
+ // Unsafe operations (don't check capacity/don't resize)
+
+ void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+
+ // Append to null bitmap, update the length
+ void UnsafeAppendToBitmap(bool is_valid) {
+ null_bitmap_builder_.UnsafeAppend(is_valid);
+ ++length_;
+ if (!is_valid) ++null_count_;
+ }
+
+ // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
+ // assume all of length bits are valid.
+ void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+ if (valid_bytes == NULLPTR) {
+ return UnsafeSetNotNull(length);
+ }
+ null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
+ length_ += length;
+ null_count_ = null_bitmap_builder_.false_count();
+ }
+
+ // Append the same validity value a given number of times.
+ void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
+ if (value) {
+ UnsafeSetNotNull(num_bits);
+ } else {
+ UnsafeSetNull(num_bits);
+ }
+ }
+
+ void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
+
+ // Set the next validity bits to not null (i.e. valid).
+ void UnsafeSetNotNull(int64_t length);
+
+ // Set the next validity bits to null (i.e. invalid).
+ void UnsafeSetNull(int64_t length);
+
+ static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
+
+ /// \brief Finish to an array of the specified ArrayType
+ template <typename ArrayType>
+ Status FinishTyped(std::shared_ptr<ArrayType>* out) {
+ std::shared_ptr<Array> out_untyped;
+ ARROW_RETURN_NOT_OK(Finish(&out_untyped));
+ *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
+ return Status::OK();
+ }
+
+ // Check the requested capacity for validity
+ Status CheckCapacity(int64_t new_capacity) {
+ if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
+ return Status::Invalid(
+ "Resize capacity must be positive (requested: ", new_capacity, ")");
+ }
+
+ if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
+ return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
+ ", current length: ", length_, ")");
+ }
+
+ return Status::OK();
+ }
+
+ // Check for array type
+ Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
+ const Array& array, const char* message);
+ Status CheckArrayType(Type::type expected_type, const Array& array,
+ const char* message);
+
+ MemoryPool* pool_;
+
+ TypedBufferBuilder<bool> null_bitmap_builder_;
+ int64_t null_count_ = 0;
+
+ // Array length, so far. Also, the index of the next element to be added
+ int64_t length_ = 0;
+ int64_t capacity_ = 0;
+
+ // Child value array builders. These are owned by this class
+ std::vector<std::shared_ptr<ArrayBuilder>> children_;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
+};
+
/// \brief Construct an empty ArrayBuilder corresponding to the data
/// type
/// \param[in] pool the MemoryPool to use for allocations
@@ -273,4 +273,4 @@ Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>&
const std::shared_ptr<Array>& dictionary,
std::unique_ptr<ArrayBuilder>* out);
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
index 6822dc89903..23fd6977ce7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
@@ -1,78 +1,78 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_binary.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-// ----------------------------------------------------------------------
-// Fixed width binary
-
-FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : ArrayBuilder(pool),
- byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
- byte_builder_(pool) {}
-
-void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
- DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
-}
-
-Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- return byte_builder_.Append(data, length * byte_width_);
-}
-
-Status FixedSizeBinaryBuilder::AppendNull() {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendNull();
- return Status::OK();
-}
-
-Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(length, false);
- byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
- return Status::OK();
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_binary.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : ArrayBuilder(pool),
+ byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
+ byte_builder_(pool) {}
+
+void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
+ DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
+}
+
+Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return byte_builder_.Append(data, length * byte_width_);
+}
+
+Status FixedSizeBinaryBuilder::AppendNull() {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendNull();
+ return Status::OK();
+}
+
+Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(length, false);
+ byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
+ return Status::OK();
+}
+
Status FixedSizeBinaryBuilder::AppendEmptyValue() {
RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(true);
@@ -87,113 +87,113 @@ Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) {
return Status::OK();
}
-void FixedSizeBinaryBuilder::Reset() {
- ArrayBuilder::Reset();
- byte_builder_.Reset();
-}
-
-Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity));
- RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> data;
- RETURN_NOT_OK(byte_builder_.Finish(&data));
-
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
- *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
-
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
- const uint8_t* data_ptr = byte_builder_.data();
- return data_ptr + i * byte_width_;
-}
-
-util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
- const uint8_t* data_ptr = byte_builder_.data();
- return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
- byte_width_);
-}
-
-// ----------------------------------------------------------------------
-// ChunkedArray builders
-
-namespace internal {
-
-ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
- MemoryPool* pool)
- : max_chunk_value_length_(max_chunk_value_length), builder_(new BinaryBuilder(pool)) {
- DCHECK_LE(max_chunk_value_length, kBinaryMemoryLimit);
-}
-
-ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
- int32_t max_chunk_length, MemoryPool* pool)
- : ChunkedBinaryBuilder(max_chunk_value_length, pool) {
- max_chunk_length_ = max_chunk_length;
-}
-
-Status ChunkedBinaryBuilder::Finish(ArrayVector* out) {
- if (builder_->length() > 0 || chunks_.size() == 0) {
- std::shared_ptr<Array> chunk;
- RETURN_NOT_OK(builder_->Finish(&chunk));
- chunks_.emplace_back(std::move(chunk));
- }
- *out = std::move(chunks_);
- return Status::OK();
-}
-
-Status ChunkedBinaryBuilder::NextChunk() {
- std::shared_ptr<Array> chunk;
- RETURN_NOT_OK(builder_->Finish(&chunk));
- chunks_.emplace_back(std::move(chunk));
-
- if (auto capacity = extra_capacity_) {
- extra_capacity_ = 0;
- return Reserve(capacity);
- }
-
- return Status::OK();
-}
-
-Status ChunkedStringBuilder::Finish(ArrayVector* out) {
- RETURN_NOT_OK(ChunkedBinaryBuilder::Finish(out));
-
- // Change data type to string/utf8
- for (size_t i = 0; i < out->size(); ++i) {
- std::shared_ptr<ArrayData> data = (*out)[i]->data();
- data->type = ::arrow::utf8();
- (*out)[i] = std::make_shared<StringArray>(data);
- }
- return Status::OK();
-}
-
-Status ChunkedBinaryBuilder::Reserve(int64_t values) {
- if (ARROW_PREDICT_FALSE(extra_capacity_ != 0)) {
- extra_capacity_ += values;
- return Status::OK();
- }
-
- auto current_capacity = builder_->capacity();
- auto min_capacity = builder_->length() + values;
- if (current_capacity >= min_capacity) {
- return Status::OK();
- }
-
- auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
- if (ARROW_PREDICT_TRUE(new_capacity <= max_chunk_length_)) {
- return builder_->Resize(new_capacity);
- }
-
- extra_capacity_ = new_capacity - max_chunk_length_;
- return builder_->Resize(max_chunk_length_);
-}
-
-} // namespace internal
-
-} // namespace arrow
+void FixedSizeBinaryBuilder::Reset() {
+ ArrayBuilder::Reset();
+ byte_builder_.Reset();
+}
+
+Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity));
+ RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Buffer> data;
+ RETURN_NOT_OK(byte_builder_.Finish(&data));
+
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
+ const uint8_t* data_ptr = byte_builder_.data();
+ return data_ptr + i * byte_width_;
+}
+
+util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
+ const uint8_t* data_ptr = byte_builder_.data();
+ return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
+ byte_width_);
+}
+
+// ----------------------------------------------------------------------
+// ChunkedArray builders
+
+namespace internal {
+
+ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
+ MemoryPool* pool)
+ : max_chunk_value_length_(max_chunk_value_length), builder_(new BinaryBuilder(pool)) {
+ DCHECK_LE(max_chunk_value_length, kBinaryMemoryLimit);
+}
+
+ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
+ int32_t max_chunk_length, MemoryPool* pool)
+ : ChunkedBinaryBuilder(max_chunk_value_length, pool) {
+ max_chunk_length_ = max_chunk_length;
+}
+
+Status ChunkedBinaryBuilder::Finish(ArrayVector* out) {
+ if (builder_->length() > 0 || chunks_.size() == 0) {
+ std::shared_ptr<Array> chunk;
+ RETURN_NOT_OK(builder_->Finish(&chunk));
+ chunks_.emplace_back(std::move(chunk));
+ }
+ *out = std::move(chunks_);
+ return Status::OK();
+}
+
+Status ChunkedBinaryBuilder::NextChunk() {
+ std::shared_ptr<Array> chunk;
+ RETURN_NOT_OK(builder_->Finish(&chunk));
+ chunks_.emplace_back(std::move(chunk));
+
+ if (auto capacity = extra_capacity_) {
+ extra_capacity_ = 0;
+ return Reserve(capacity);
+ }
+
+ return Status::OK();
+}
+
+Status ChunkedStringBuilder::Finish(ArrayVector* out) {
+ RETURN_NOT_OK(ChunkedBinaryBuilder::Finish(out));
+
+ // Change data type to string/utf8
+ for (size_t i = 0; i < out->size(); ++i) {
+ std::shared_ptr<ArrayData> data = (*out)[i]->data();
+ data->type = ::arrow::utf8();
+ (*out)[i] = std::make_shared<StringArray>(data);
+ }
+ return Status::OK();
+}
+
+Status ChunkedBinaryBuilder::Reserve(int64_t values) {
+ if (ARROW_PREDICT_FALSE(extra_capacity_ != 0)) {
+ extra_capacity_ += values;
+ return Status::OK();
+ }
+
+ auto current_capacity = builder_->capacity();
+ auto min_capacity = builder_->length() + values;
+ if (current_capacity >= min_capacity) {
+ return Status::OK();
+ }
+
+ auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
+ if (ARROW_PREDICT_TRUE(new_capacity <= max_chunk_length_)) {
+ return builder_->Resize(new_capacity);
+ }
+
+ extra_capacity_ = new_capacity - max_chunk_length_;
+ return builder_->Resize(max_chunk_length_);
+}
+
+} // namespace internal
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
index 62edc69fb8e..cab682821b5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
@@ -1,82 +1,82 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <memory>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_binary.h"
-#include "arrow/array/builder_base.h"
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h" // IWYU pragma: export
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Binary and String
-
-template <typename TYPE>
-class BaseBinaryBuilder : public ArrayBuilder {
- public:
- using TypeClass = TYPE;
- using offset_type = typename TypeClass::offset_type;
-
- explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h" // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+template <typename TYPE>
+class BaseBinaryBuilder : public ArrayBuilder {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TypeClass::offset_type;
+
+ explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
+
BaseBinaryBuilder(const std::shared_ptr<DataType>& /*type*/, MemoryPool* pool)
- : BaseBinaryBuilder(pool) {}
-
- Status Append(const uint8_t* value, offset_type length) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- ARROW_RETURN_NOT_OK(AppendNextOffset());
- // Safety check for UBSAN.
- if (ARROW_PREDICT_TRUE(length > 0)) {
+ : BaseBinaryBuilder(pool) {}
+
+ Status Append(const uint8_t* value, offset_type length) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ ARROW_RETURN_NOT_OK(AppendNextOffset());
+ // Safety check for UBSAN.
+ if (ARROW_PREDICT_TRUE(length > 0)) {
ARROW_RETURN_NOT_OK(ValidateOverflow(length));
- ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
- }
-
- UnsafeAppendToBitmap(true);
- return Status::OK();
- }
-
- Status Append(const char* value, offset_type length) {
- return Append(reinterpret_cast<const uint8_t*>(value), length);
- }
-
- Status Append(util::string_view value) {
- return Append(value.data(), static_cast<offset_type>(value.size()));
- }
-
+ ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+ }
+
+ UnsafeAppendToBitmap(true);
+ return Status::OK();
+ }
+
+ Status Append(const char* value, offset_type length) {
+ return Append(reinterpret_cast<const uint8_t*>(value), length);
+ }
+
+ Status Append(util::string_view value) {
+ return Append(value.data(), static_cast<offset_type>(value.size()));
+ }
+
/// Extend the last appended value by appending more data at the end
///
/// Unlike Append, this does not create a new offset.
@@ -94,23 +94,23 @@ class BaseBinaryBuilder : public ArrayBuilder {
static_cast<offset_type>(value.size()));
}
- Status AppendNulls(int64_t length) final {
- const int64_t num_bytes = value_data_builder_.length();
- ARROW_RETURN_NOT_OK(Reserve(length));
- for (int64_t i = 0; i < length; ++i) {
- offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
- }
- UnsafeAppendToBitmap(length, false);
- return Status::OK();
- }
-
- Status AppendNull() final {
- ARROW_RETURN_NOT_OK(AppendNextOffset());
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(false);
- return Status::OK();
- }
-
+ Status AppendNulls(int64_t length) final {
+ const int64_t num_bytes = value_data_builder_.length();
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ for (int64_t i = 0; i < length; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+ }
+ UnsafeAppendToBitmap(length, false);
+ return Status::OK();
+ }
+
+ Status AppendNull() final {
+ ARROW_RETURN_NOT_OK(AppendNextOffset());
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(false);
+ return Status::OK();
+ }
+
Status AppendEmptyValue() final {
ARROW_RETURN_NOT_OK(AppendNextOffset());
ARROW_RETURN_NOT_OK(Reserve(1));
@@ -128,28 +128,28 @@ class BaseBinaryBuilder : public ArrayBuilder {
return Status::OK();
}
- /// \brief Append without checking capacity
- ///
- /// Offsets and data should have been presized using Reserve() and
- /// ReserveData(), respectively.
- void UnsafeAppend(const uint8_t* value, offset_type length) {
- UnsafeAppendNextOffset();
- value_data_builder_.UnsafeAppend(value, length);
- UnsafeAppendToBitmap(true);
- }
-
- void UnsafeAppend(const char* value, offset_type length) {
- UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
- }
-
- void UnsafeAppend(const std::string& value) {
- UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
- }
-
- void UnsafeAppend(util::string_view value) {
- UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
- }
-
+ /// \brief Append without checking capacity
+ ///
+ /// Offsets and data should have been presized using Reserve() and
+ /// ReserveData(), respectively.
+ void UnsafeAppend(const uint8_t* value, offset_type length) {
+ UnsafeAppendNextOffset();
+ value_data_builder_.UnsafeAppend(value, length);
+ UnsafeAppendToBitmap(true);
+ }
+
+ void UnsafeAppend(const char* value, offset_type length) {
+ UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
+ }
+
+ void UnsafeAppend(const std::string& value) {
+ UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
+ }
+
+ void UnsafeAppend(util::string_view value) {
+ UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
+ }
+
/// Like ExtendCurrent, but do not check capacity
void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
value_data_builder_.UnsafeAppend(value, length);
@@ -160,313 +160,313 @@ class BaseBinaryBuilder : public ArrayBuilder {
static_cast<offset_type>(value.size()));
}
- void UnsafeAppendNull() {
- const int64_t num_bytes = value_data_builder_.length();
- offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
- UnsafeAppendToBitmap(false);
- }
-
+ void UnsafeAppendNull() {
+ const int64_t num_bytes = value_data_builder_.length();
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+ UnsafeAppendToBitmap(false);
+ }
+
void UnsafeAppendEmptyValue() {
const int64_t num_bytes = value_data_builder_.length();
offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
UnsafeAppendToBitmap(true);
}
- /// \brief Append a sequence of strings in one shot.
- ///
- /// \param[in] values a vector of strings
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const std::vector<std::string>& values,
- const uint8_t* valid_bytes = NULLPTR) {
- std::size_t total_length = std::accumulate(
- values.begin(), values.end(), 0ULL,
- [](uint64_t sum, const std::string& str) { return sum + str.size(); });
- ARROW_RETURN_NOT_OK(Reserve(values.size()));
- ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
- ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
-
- if (valid_bytes != NULLPTR) {
- for (std::size_t i = 0; i < values.size(); ++i) {
- UnsafeAppendNextOffset();
- if (valid_bytes[i]) {
- value_data_builder_.UnsafeAppend(
- reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
- }
- }
- } else {
- for (std::size_t i = 0; i < values.size(); ++i) {
- UnsafeAppendNextOffset();
- value_data_builder_.UnsafeAppend(
- reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
- }
- }
-
- UnsafeAppendToBitmap(valid_bytes, values.size());
- return Status::OK();
- }
-
- /// \brief Append a sequence of nul-terminated strings in one shot.
- /// If one of the values is NULL, it is processed as a null
- /// value even if the corresponding valid_bytes entry is 1.
- ///
- /// \param[in] values a contiguous C array of nul-terminated char *
- /// \param[in] length the number of values to append
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const char** values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR) {
- std::size_t total_length = 0;
- std::vector<std::size_t> value_lengths(length);
- bool have_null_value = false;
- for (int64_t i = 0; i < length; ++i) {
- if (values[i] != NULLPTR) {
- auto value_length = strlen(values[i]);
- value_lengths[i] = value_length;
- total_length += value_length;
- } else {
- have_null_value = true;
- }
- }
- ARROW_RETURN_NOT_OK(Reserve(length));
- ARROW_RETURN_NOT_OK(ReserveData(total_length));
-
- if (valid_bytes) {
- int64_t valid_bytes_offset = 0;
- for (int64_t i = 0; i < length; ++i) {
- UnsafeAppendNextOffset();
- if (valid_bytes[i]) {
- if (values[i]) {
- value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
- value_lengths[i]);
- } else {
- UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
- i - valid_bytes_offset);
- UnsafeAppendToBitmap(false);
- valid_bytes_offset = i + 1;
- }
- }
- }
- UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
- } else {
- if (have_null_value) {
- std::vector<uint8_t> valid_vector(length, 0);
- for (int64_t i = 0; i < length; ++i) {
- UnsafeAppendNextOffset();
- if (values[i]) {
- value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
- value_lengths[i]);
- valid_vector[i] = 1;
- }
- }
- UnsafeAppendToBitmap(valid_vector.data(), length);
- } else {
- for (int64_t i = 0; i < length; ++i) {
- UnsafeAppendNextOffset();
- value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
- value_lengths[i]);
- }
- UnsafeAppendToBitmap(NULLPTR, length);
- }
- }
- return Status::OK();
- }
-
- void Reset() override {
- ArrayBuilder::Reset();
- offsets_builder_.Reset();
- value_data_builder_.Reset();
- }
-
- Status ValidateOverflow(int64_t new_bytes) {
- auto new_size = value_data_builder_.length() + new_bytes;
- if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
- return Status::CapacityError("array cannot contain more than ", memory_limit(),
- " bytes, have ", new_size);
- } else {
- return Status::OK();
- }
- }
-
- Status Resize(int64_t capacity) override {
- ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
- // One more than requested for offsets
- ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
- return ArrayBuilder::Resize(capacity);
- }
-
- /// \brief Ensures there is enough allocated capacity to append the indicated
- /// number of bytes to the value data buffer without additional allocations
- Status ReserveData(int64_t elements) {
- ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
- return value_data_builder_.Reserve(elements);
- }
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
- // Write final offset (values length)
- ARROW_RETURN_NOT_OK(AppendNextOffset());
-
- // These buffers' padding zeroed by BufferBuilder
- std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
- ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
- ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
- ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
- *out = ArrayData::Make(type(), length_, {null_bitmap, offsets, value_data},
- null_count_, 0);
- Reset();
- return Status::OK();
- }
-
- /// \return data pointer of the value date builder
- const uint8_t* value_data() const { return value_data_builder_.data(); }
- /// \return size of values buffer so far
- int64_t value_data_length() const { return value_data_builder_.length(); }
- /// \return capacity of values buffer
- int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
-
- /// \return data pointer of the value date builder
- const offset_type* offsets_data() const { return offsets_builder_.data(); }
-
- /// Temporary access to a value.
- ///
- /// This pointer becomes invalid on the next modifying operation.
- const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
- const offset_type* offsets = offsets_builder_.data();
- const auto offset = offsets[i];
- if (i == (length_ - 1)) {
- *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
- } else {
- *out_length = offsets[i + 1] - offset;
- }
- return value_data_builder_.data() + offset;
- }
-
- offset_type offset(int64_t i) const { return offsets_data()[i]; }
-
- /// Temporary access to a value.
- ///
- /// This view becomes invalid on the next modifying operation.
- util::string_view GetView(int64_t i) const {
- offset_type value_length;
- const uint8_t* value_data = GetValue(i, &value_length);
- return util::string_view(reinterpret_cast<const char*>(value_data), value_length);
- }
-
- // Cannot make this a static attribute because of linking issues
- static constexpr int64_t memory_limit() {
- return std::numeric_limits<offset_type>::max() - 1;
- }
-
- protected:
- TypedBufferBuilder<offset_type> offsets_builder_;
- TypedBufferBuilder<uint8_t> value_data_builder_;
-
- Status AppendNextOffset() {
- const int64_t num_bytes = value_data_builder_.length();
- return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
- }
-
- void UnsafeAppendNextOffset() {
- const int64_t num_bytes = value_data_builder_.length();
- offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
- }
-};
-
-/// \class BinaryBuilder
-/// \brief Builder class for variable-length binary data
-class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
- public:
- using BaseBinaryBuilder::BaseBinaryBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override { return binary(); }
-};
-
-/// \class StringBuilder
-/// \brief Builder class for UTF8 strings
-class ARROW_EXPORT StringBuilder : public BinaryBuilder {
- public:
- using BinaryBuilder::BinaryBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override { return utf8(); }
-};
-
-/// \class LargeBinaryBuilder
-/// \brief Builder class for large variable-length binary data
-class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
- public:
- using BaseBinaryBuilder::BaseBinaryBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override { return large_binary(); }
-};
-
-/// \class LargeStringBuilder
-/// \brief Builder class for large UTF8 strings
-class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
- public:
- using LargeBinaryBuilder::LargeBinaryBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override { return large_utf8(); }
-};
-
-// ----------------------------------------------------------------------
-// FixedSizeBinaryBuilder
-
-class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
- public:
- using TypeClass = FixedSizeBinaryType;
-
- explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool = default_memory_pool());
-
- Status Append(const uint8_t* value) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(value);
- return Status::OK();
- }
-
- Status Append(const char* value) {
- return Append(reinterpret_cast<const uint8_t*>(value));
- }
-
- Status Append(const util::string_view& view) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(view);
- return Status::OK();
- }
-
- Status Append(const std::string& s) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(s);
- return Status::OK();
- }
-
+ /// \brief Append a sequence of strings in one shot.
+ ///
+ /// \param[in] values a vector of strings
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const std::vector<std::string>& values,
+ const uint8_t* valid_bytes = NULLPTR) {
+ std::size_t total_length = std::accumulate(
+ values.begin(), values.end(), 0ULL,
+ [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+ ARROW_RETURN_NOT_OK(Reserve(values.size()));
+ ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+ ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+ if (valid_bytes != NULLPTR) {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ UnsafeAppendNextOffset();
+ if (valid_bytes[i]) {
+ value_data_builder_.UnsafeAppend(
+ reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+ }
+ }
+ } else {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ UnsafeAppendNextOffset();
+ value_data_builder_.UnsafeAppend(
+ reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+ }
+ }
+
+ UnsafeAppendToBitmap(valid_bytes, values.size());
+ return Status::OK();
+ }
+
+ /// \brief Append a sequence of nul-terminated strings in one shot.
+ /// If one of the values is NULL, it is processed as a null
+ /// value even if the corresponding valid_bytes entry is 1.
+ ///
+ /// \param[in] values a contiguous C array of nul-terminated char *
+ /// \param[in] length the number of values to append
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const char** values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ std::size_t total_length = 0;
+ std::vector<std::size_t> value_lengths(length);
+ bool have_null_value = false;
+ for (int64_t i = 0; i < length; ++i) {
+ if (values[i] != NULLPTR) {
+ auto value_length = strlen(values[i]);
+ value_lengths[i] = value_length;
+ total_length += value_length;
+ } else {
+ have_null_value = true;
+ }
+ }
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ ARROW_RETURN_NOT_OK(ReserveData(total_length));
+
+ if (valid_bytes) {
+ int64_t valid_bytes_offset = 0;
+ for (int64_t i = 0; i < length; ++i) {
+ UnsafeAppendNextOffset();
+ if (valid_bytes[i]) {
+ if (values[i]) {
+ value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+ value_lengths[i]);
+ } else {
+ UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
+ i - valid_bytes_offset);
+ UnsafeAppendToBitmap(false);
+ valid_bytes_offset = i + 1;
+ }
+ }
+ }
+ UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+ } else {
+ if (have_null_value) {
+ std::vector<uint8_t> valid_vector(length, 0);
+ for (int64_t i = 0; i < length; ++i) {
+ UnsafeAppendNextOffset();
+ if (values[i]) {
+ value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+ value_lengths[i]);
+ valid_vector[i] = 1;
+ }
+ }
+ UnsafeAppendToBitmap(valid_vector.data(), length);
+ } else {
+ for (int64_t i = 0; i < length; ++i) {
+ UnsafeAppendNextOffset();
+ value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+ value_lengths[i]);
+ }
+ UnsafeAppendToBitmap(NULLPTR, length);
+ }
+ }
+ return Status::OK();
+ }
+
+ void Reset() override {
+ ArrayBuilder::Reset();
+ offsets_builder_.Reset();
+ value_data_builder_.Reset();
+ }
+
+ Status ValidateOverflow(int64_t new_bytes) {
+ auto new_size = value_data_builder_.length() + new_bytes;
+ if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+ return Status::CapacityError("array cannot contain more than ", memory_limit(),
+ " bytes, have ", new_size);
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Status Resize(int64_t capacity) override {
+ ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+ // One more than requested for offsets
+ ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+ return ArrayBuilder::Resize(capacity);
+ }
+
+ /// \brief Ensures there is enough allocated capacity to append the indicated
+ /// number of bytes to the value data buffer without additional allocations
+ Status ReserveData(int64_t elements) {
+ ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+ return value_data_builder_.Reserve(elements);
+ }
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+ // Write final offset (values length)
+ ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+ // These buffers' padding zeroed by BufferBuilder
+ std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+ ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+ ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+ ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap, offsets, value_data},
+ null_count_, 0);
+ Reset();
+ return Status::OK();
+ }
+
+ /// \return data pointer of the value date builder
+ const uint8_t* value_data() const { return value_data_builder_.data(); }
+ /// \return size of values buffer so far
+ int64_t value_data_length() const { return value_data_builder_.length(); }
+ /// \return capacity of values buffer
+ int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
+
+ /// \return data pointer of the value date builder
+ const offset_type* offsets_data() const { return offsets_builder_.data(); }
+
+ /// Temporary access to a value.
+ ///
+ /// This pointer becomes invalid on the next modifying operation.
+ const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+ const offset_type* offsets = offsets_builder_.data();
+ const auto offset = offsets[i];
+ if (i == (length_ - 1)) {
+ *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+ } else {
+ *out_length = offsets[i + 1] - offset;
+ }
+ return value_data_builder_.data() + offset;
+ }
+
+ offset_type offset(int64_t i) const { return offsets_data()[i]; }
+
+ /// Temporary access to a value.
+ ///
+ /// This view becomes invalid on the next modifying operation.
+ util::string_view GetView(int64_t i) const {
+ offset_type value_length;
+ const uint8_t* value_data = GetValue(i, &value_length);
+ return util::string_view(reinterpret_cast<const char*>(value_data), value_length);
+ }
+
+ // Cannot make this a static attribute because of linking issues
+ static constexpr int64_t memory_limit() {
+ return std::numeric_limits<offset_type>::max() - 1;
+ }
+
+ protected:
+ TypedBufferBuilder<offset_type> offsets_builder_;
+ TypedBufferBuilder<uint8_t> value_data_builder_;
+
+ Status AppendNextOffset() {
+ const int64_t num_bytes = value_data_builder_.length();
+ return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
+ }
+
+ void UnsafeAppendNextOffset() {
+ const int64_t num_bytes = value_data_builder_.length();
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+ }
+};
+
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
+ public:
+ using BaseBinaryBuilder::BaseBinaryBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override { return binary(); }
+};
+
+/// \class StringBuilder
+/// \brief Builder class for UTF8 strings
+class ARROW_EXPORT StringBuilder : public BinaryBuilder {
+ public:
+ using BinaryBuilder::BinaryBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override { return utf8(); }
+};
+
+/// \class LargeBinaryBuilder
+/// \brief Builder class for large variable-length binary data
+class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
+ public:
+ using BaseBinaryBuilder::BaseBinaryBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override { return large_binary(); }
+};
+
+/// \class LargeStringBuilder
+/// \brief Builder class for large UTF8 strings
+class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
+ public:
+ using LargeBinaryBuilder::LargeBinaryBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override { return large_utf8(); }
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeBinaryBuilder
+
+class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
+ public:
+ using TypeClass = FixedSizeBinaryType;
+
+ explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool = default_memory_pool());
+
+ Status Append(const uint8_t* value) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(value);
+ return Status::OK();
+ }
+
+ Status Append(const char* value) {
+ return Append(reinterpret_cast<const uint8_t*>(value));
+ }
+
+ Status Append(const util::string_view& view) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(view);
+ return Status::OK();
+ }
+
+ Status Append(const std::string& s) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(s);
+ return Status::OK();
+ }
+
Status Append(const Buffer& s) {
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppend(util::string_view(s));
@@ -475,196 +475,196 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
- template <size_t NBYTES>
- Status Append(const std::array<uint8_t, NBYTES>& value) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(
- util::string_view(reinterpret_cast<const char*>(value.data()), value.size()));
- return Status::OK();
- }
-
- Status AppendValues(const uint8_t* data, int64_t length,
- const uint8_t* valid_bytes = NULLPTR);
-
- Status AppendNull() final;
- Status AppendNulls(int64_t length) final;
-
+ template <size_t NBYTES>
+ Status Append(const std::array<uint8_t, NBYTES>& value) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(
+ util::string_view(reinterpret_cast<const char*>(value.data()), value.size()));
+ return Status::OK();
+ }
+
+ Status AppendValues(const uint8_t* data, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR);
+
+ Status AppendNull() final;
+ Status AppendNulls(int64_t length) final;
+
Status AppendEmptyValue() final;
Status AppendEmptyValues(int64_t length) final;
- void UnsafeAppend(const uint8_t* value) {
- UnsafeAppendToBitmap(true);
- if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
- byte_builder_.UnsafeAppend(value, byte_width_);
- }
- }
-
- void UnsafeAppend(const char* value) {
- UnsafeAppend(reinterpret_cast<const uint8_t*>(value));
- }
-
- void UnsafeAppend(util::string_view value) {
-#ifndef NDEBUG
- CheckValueSize(static_cast<size_t>(value.size()));
-#endif
- UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
- }
-
+ void UnsafeAppend(const uint8_t* value) {
+ UnsafeAppendToBitmap(true);
+ if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
+ byte_builder_.UnsafeAppend(value, byte_width_);
+ }
+ }
+
+ void UnsafeAppend(const char* value) {
+ UnsafeAppend(reinterpret_cast<const uint8_t*>(value));
+ }
+
+ void UnsafeAppend(util::string_view value) {
+#ifndef NDEBUG
+ CheckValueSize(static_cast<size_t>(value.size()));
+#endif
+ UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
+ }
+
void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
- void UnsafeAppendNull() {
- UnsafeAppendToBitmap(false);
- byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
- }
-
- Status ValidateOverflow(int64_t new_bytes) const {
- auto new_size = byte_builder_.length() + new_bytes;
- if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
- return Status::CapacityError("array cannot contain more than ", memory_limit(),
- " bytes, have ", new_size);
- } else {
- return Status::OK();
- }
- }
-
- /// \brief Ensures there is enough allocated capacity to append the indicated
- /// number of bytes to the value data buffer without additional allocations
- Status ReserveData(int64_t elements) {
- ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
- return byte_builder_.Reserve(elements);
- }
-
- void Reset() override;
- Status Resize(int64_t capacity) override;
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<FixedSizeBinaryArray>* out) { return FinishTyped(out); }
-
- /// \return size of values buffer so far
- int64_t value_data_length() const { return byte_builder_.length(); }
-
- int32_t byte_width() const { return byte_width_; }
-
- /// Temporary access to a value.
- ///
- /// This pointer becomes invalid on the next modifying operation.
- const uint8_t* GetValue(int64_t i) const;
-
- /// Temporary access to a value.
- ///
- /// This view becomes invalid on the next modifying operation.
- util::string_view GetView(int64_t i) const;
-
- static constexpr int64_t memory_limit() {
- return std::numeric_limits<int64_t>::max() - 1;
- }
-
- std::shared_ptr<DataType> type() const override {
- return fixed_size_binary(byte_width_);
- }
-
- protected:
- int32_t byte_width_;
- BufferBuilder byte_builder_;
-
- /// Temporary access to a value.
- ///
- /// This pointer becomes invalid on the next modifying operation.
- uint8_t* GetMutableValue(int64_t i) {
- uint8_t* data_ptr = byte_builder_.mutable_data();
- return data_ptr + i * byte_width_;
- }
-
- void CheckValueSize(int64_t size);
-};
-
-// ----------------------------------------------------------------------
-// Chunked builders: build a sequence of BinaryArray or StringArray that are
-// limited to a particular size (to the upper limit of 2GB)
-
-namespace internal {
-
-class ARROW_EXPORT ChunkedBinaryBuilder {
- public:
- explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length,
- MemoryPool* pool = default_memory_pool());
-
- ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length,
- MemoryPool* pool = default_memory_pool());
-
- virtual ~ChunkedBinaryBuilder() = default;
-
- Status Append(const uint8_t* value, int32_t length) {
- if (ARROW_PREDICT_FALSE(length + builder_->value_data_length() >
- max_chunk_value_length_)) {
- if (builder_->value_data_length() == 0) {
- // The current item is larger than max_chunk_size_;
- // this chunk will be oversize and hold *only* this item
- ARROW_RETURN_NOT_OK(builder_->Append(value, length));
- return NextChunk();
- }
- // The current item would cause builder_->value_data_length() to exceed
- // max_chunk_size_, so finish this chunk and append the current item to the next
- // chunk
- ARROW_RETURN_NOT_OK(NextChunk());
- return Append(value, length);
- }
-
- if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
- // The current item would cause builder_->length() to exceed max_chunk_length_, so
- // finish this chunk and append the current item to the next chunk
- ARROW_RETURN_NOT_OK(NextChunk());
- }
-
- return builder_->Append(value, length);
- }
-
- Status Append(const util::string_view& value) {
- return Append(reinterpret_cast<const uint8_t*>(value.data()),
- static_cast<int32_t>(value.size()));
- }
-
- Status AppendNull() {
- if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
- ARROW_RETURN_NOT_OK(NextChunk());
- }
- return builder_->AppendNull();
- }
-
- Status Reserve(int64_t values);
-
- virtual Status Finish(ArrayVector* out);
-
- protected:
- Status NextChunk();
-
- // maximum total character data size per chunk
- int64_t max_chunk_value_length_;
-
- // maximum elements allowed per chunk
- int64_t max_chunk_length_ = kListMaximumElements;
-
- // when Reserve() would cause builder_ to exceed its max_chunk_length_,
- // add to extra_capacity_ instead and wait to reserve until the next chunk
- int64_t extra_capacity_ = 0;
-
- std::unique_ptr<BinaryBuilder> builder_;
- std::vector<std::shared_ptr<Array>> chunks_;
-};
-
-class ARROW_EXPORT ChunkedStringBuilder : public ChunkedBinaryBuilder {
- public:
- using ChunkedBinaryBuilder::ChunkedBinaryBuilder;
-
- Status Finish(ArrayVector* out) override;
-};
-
-} // namespace internal
-
-} // namespace arrow
+ void UnsafeAppendNull() {
+ UnsafeAppendToBitmap(false);
+ byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
+ }
+
+ Status ValidateOverflow(int64_t new_bytes) const {
+ auto new_size = byte_builder_.length() + new_bytes;
+ if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+ return Status::CapacityError("array cannot contain more than ", memory_limit(),
+ " bytes, have ", new_size);
+ } else {
+ return Status::OK();
+ }
+ }
+
+ /// \brief Ensures there is enough allocated capacity to append the indicated
+ /// number of bytes to the value data buffer without additional allocations
+ Status ReserveData(int64_t elements) {
+ ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+ return byte_builder_.Reserve(elements);
+ }
+
+ void Reset() override;
+ Status Resize(int64_t capacity) override;
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<FixedSizeBinaryArray>* out) { return FinishTyped(out); }
+
+ /// \return size of values buffer so far
+ int64_t value_data_length() const { return byte_builder_.length(); }
+
+ int32_t byte_width() const { return byte_width_; }
+
+ /// Temporary access to a value.
+ ///
+ /// This pointer becomes invalid on the next modifying operation.
+ const uint8_t* GetValue(int64_t i) const;
+
+ /// Temporary access to a value.
+ ///
+ /// This view becomes invalid on the next modifying operation.
+ util::string_view GetView(int64_t i) const;
+
+ static constexpr int64_t memory_limit() {
+ return std::numeric_limits<int64_t>::max() - 1;
+ }
+
+ std::shared_ptr<DataType> type() const override {
+ return fixed_size_binary(byte_width_);
+ }
+
+ protected:
+ int32_t byte_width_;
+ BufferBuilder byte_builder_;
+
+ /// Temporary access to a value.
+ ///
+ /// This pointer becomes invalid on the next modifying operation.
+ uint8_t* GetMutableValue(int64_t i) {
+ uint8_t* data_ptr = byte_builder_.mutable_data();
+ return data_ptr + i * byte_width_;
+ }
+
+ void CheckValueSize(int64_t size);
+};
+
+// ----------------------------------------------------------------------
+// Chunked builders: build a sequence of BinaryArray or StringArray that are
+// limited to a particular size (to the upper limit of 2GB)
+
+namespace internal {
+
+class ARROW_EXPORT ChunkedBinaryBuilder {
+ public:
+ explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length,
+ MemoryPool* pool = default_memory_pool());
+
+ ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length,
+ MemoryPool* pool = default_memory_pool());
+
+ virtual ~ChunkedBinaryBuilder() = default;
+
+ Status Append(const uint8_t* value, int32_t length) {
+ if (ARROW_PREDICT_FALSE(length + builder_->value_data_length() >
+ max_chunk_value_length_)) {
+ if (builder_->value_data_length() == 0) {
+ // The current item is larger than max_chunk_size_;
+ // this chunk will be oversize and hold *only* this item
+ ARROW_RETURN_NOT_OK(builder_->Append(value, length));
+ return NextChunk();
+ }
+ // The current item would cause builder_->value_data_length() to exceed
+ // max_chunk_size_, so finish this chunk and append the current item to the next
+ // chunk
+ ARROW_RETURN_NOT_OK(NextChunk());
+ return Append(value, length);
+ }
+
+ if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+ // The current item would cause builder_->length() to exceed max_chunk_length_, so
+ // finish this chunk and append the current item to the next chunk
+ ARROW_RETURN_NOT_OK(NextChunk());
+ }
+
+ return builder_->Append(value, length);
+ }
+
+ Status Append(const util::string_view& value) {
+ return Append(reinterpret_cast<const uint8_t*>(value.data()),
+ static_cast<int32_t>(value.size()));
+ }
+
+ Status AppendNull() {
+ if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+ ARROW_RETURN_NOT_OK(NextChunk());
+ }
+ return builder_->AppendNull();
+ }
+
+ Status Reserve(int64_t values);
+
+ virtual Status Finish(ArrayVector* out);
+
+ protected:
+ Status NextChunk();
+
+ // maximum total character data size per chunk
+ int64_t max_chunk_value_length_;
+
+ // maximum elements allowed per chunk
+ int64_t max_chunk_length_ = kListMaximumElements;
+
+ // when Reserve() would cause builder_ to exceed its max_chunk_length_,
+ // add to extra_capacity_ instead and wait to reserve until the next chunk
+ int64_t extra_capacity_ = 0;
+
+ std::unique_ptr<BinaryBuilder> builder_;
+ std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+class ARROW_EXPORT ChunkedStringBuilder : public ChunkedBinaryBuilder {
+ public:
+ using ChunkedBinaryBuilder::ChunkedBinaryBuilder;
+
+ Status Finish(ArrayVector* out) override;
+};
+
+} // namespace internal
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
index bd7615a7309..fb2c202af71 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
@@ -1,72 +1,72 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_decimal.h"
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/status.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-
-namespace arrow {
-
-class Buffer;
-class MemoryPool;
-
-// ----------------------------------------------------------------------
-// Decimal128Builder
-
-Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : FixedSizeBinaryBuilder(type, pool),
- decimal_type_(internal::checked_pointer_cast<Decimal128Type>(type)) {}
-
-Status Decimal128Builder::Append(Decimal128 value) {
- RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
- UnsafeAppend(value);
- return Status::OK();
-}
-
-void Decimal128Builder::UnsafeAppend(Decimal128 value) {
- value.ToBytes(GetMutableValue(length()));
- byte_builder_.UnsafeAdvance(16);
- UnsafeAppendToBitmap(true);
-}
-
-void Decimal128Builder::UnsafeAppend(util::string_view value) {
- FixedSizeBinaryBuilder::UnsafeAppend(value);
-}
-
-Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> data;
- RETURN_NOT_OK(byte_builder_.Finish(&data));
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
- *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_decimal.h"
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+
+// ----------------------------------------------------------------------
+// Decimal128Builder
+
+Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : FixedSizeBinaryBuilder(type, pool),
+ decimal_type_(internal::checked_pointer_cast<Decimal128Type>(type)) {}
+
+Status Decimal128Builder::Append(Decimal128 value) {
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+ UnsafeAppend(value);
+ return Status::OK();
+}
+
+void Decimal128Builder::UnsafeAppend(Decimal128 value) {
+ value.ToBytes(GetMutableValue(length()));
+ byte_builder_.UnsafeAdvance(16);
+ UnsafeAppendToBitmap(true);
+}
+
+void Decimal128Builder::UnsafeAppend(util::string_view value) {
+ FixedSizeBinaryBuilder::UnsafeAppend(value);
+}
+
+Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Buffer> data;
+ RETURN_NOT_OK(byte_builder_.Finish(&data));
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
// ----------------------------------------------------------------------
// Decimal256Builder
@@ -102,4 +102,4 @@ Status Decimal256Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
return Status::OK();
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
index f48392ed001..b89373ba829 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
@@ -1,64 +1,64 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/array/array_decimal.h"
-#include "arrow/array/builder_base.h"
-#include "arrow/array/builder_binary.h"
-#include "arrow/array/data.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
- public:
- using TypeClass = Decimal128Type;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
+ public:
+ using TypeClass = Decimal128Type;
using ValueType = Decimal128;
-
- explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool = default_memory_pool());
-
- using FixedSizeBinaryBuilder::Append;
- using FixedSizeBinaryBuilder::AppendValues;
- using FixedSizeBinaryBuilder::Reset;
-
- Status Append(Decimal128 val);
- void UnsafeAppend(Decimal128 val);
- void UnsafeAppend(util::string_view val);
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<Decimal128Array>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override { return decimal_type_; }
-
- protected:
- std::shared_ptr<Decimal128Type> decimal_type_;
-};
-
+
+ explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool = default_memory_pool());
+
+ using FixedSizeBinaryBuilder::Append;
+ using FixedSizeBinaryBuilder::AppendValues;
+ using FixedSizeBinaryBuilder::Reset;
+
+ Status Append(Decimal128 val);
+ void UnsafeAppend(Decimal128 val);
+ void UnsafeAppend(util::string_view val);
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<Decimal128Array>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+ std::shared_ptr<Decimal128Type> decimal_type_;
+};
+
class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
public:
using TypeClass = Decimal256Type;
@@ -89,6 +89,6 @@ class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
std::shared_ptr<Decimal256Type> decimal_type_;
};
-using DecimalBuilder = Decimal128Builder;
-
-} // namespace arrow
+using DecimalBuilder = Decimal128Builder;
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
index b13f6a2db34..301790edb95 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
@@ -1,204 +1,204 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_dict.h"
-
-#include <cstdint>
-#include <utility>
-
-#include "arrow/array/dict_internal.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// DictionaryBuilder
-
-namespace internal {
-
-class DictionaryMemoTable::DictionaryMemoTableImpl {
- // Type-dependent visitor for memo table initialization
- struct MemoTableInitializer {
- std::shared_ptr<DataType> value_type_;
- MemoryPool* pool_;
- std::unique_ptr<MemoTable>* memo_table_;
-
- template <typename T>
- enable_if_no_memoize<T, Status> Visit(const T&) {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_dict.h"
+
+#include <cstdint>
+#include <utility>
+
+#include "arrow/array/dict_internal.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// DictionaryBuilder
+
+namespace internal {
+
+class DictionaryMemoTable::DictionaryMemoTableImpl {
+ // Type-dependent visitor for memo table initialization
+ struct MemoTableInitializer {
+ std::shared_ptr<DataType> value_type_;
+ MemoryPool* pool_;
+ std::unique_ptr<MemoTable>* memo_table_;
+
+ template <typename T>
+ enable_if_no_memoize<T, Status> Visit(const T&) {
return Status::NotImplemented("Initialization of ", value_type_->ToString(),
- " memo table is not implemented");
- }
-
- template <typename T>
- enable_if_memoize<T, Status> Visit(const T&) {
- using MemoTable = typename DictionaryTraits<T>::MemoTableType;
- memo_table_->reset(new MemoTable(pool_, 0));
- return Status::OK();
- }
- };
-
- // Type-dependent visitor for memo table insertion
- struct ArrayValuesInserter {
- DictionaryMemoTableImpl* impl_;
- const Array& values_;
-
- template <typename T>
- Status Visit(const T& type) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- return InsertValues(type, checked_cast<const ArrayType&>(values_));
- }
-
- private:
- template <typename T, typename ArrayType>
- enable_if_no_memoize<T, Status> InsertValues(const T& type, const ArrayType&) {
- return Status::NotImplemented("Inserting array values of ", type,
- " is not implemented");
- }
-
- template <typename T, typename ArrayType>
- enable_if_memoize<T, Status> InsertValues(const T&, const ArrayType& array) {
- if (array.null_count() > 0) {
- return Status::Invalid("Cannot insert dictionary values containing nulls");
- }
- for (int64_t i = 0; i < array.length(); ++i) {
- int32_t unused_memo_index;
- RETURN_NOT_OK(impl_->GetOrInsert<T>(array.GetView(i), &unused_memo_index));
- }
- return Status::OK();
- }
- };
-
- // Type-dependent visitor for building ArrayData from memo table
- struct ArrayDataGetter {
- std::shared_ptr<DataType> value_type_;
- MemoTable* memo_table_;
- MemoryPool* pool_;
- int64_t start_offset_;
- std::shared_ptr<ArrayData>* out_;
-
- template <typename T>
- enable_if_no_memoize<T, Status> Visit(const T&) {
- return Status::NotImplemented("Getting array data of ", value_type_,
- " is not implemented");
- }
-
- template <typename T>
- enable_if_memoize<T, Status> Visit(const T&) {
- using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
- auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
- return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
- start_offset_, out_);
- }
- };
-
- public:
- DictionaryMemoTableImpl(MemoryPool* pool, std::shared_ptr<DataType> type)
- : pool_(pool), type_(std::move(type)), memo_table_(nullptr) {
- MemoTableInitializer visitor{type_, pool_, &memo_table_};
- ARROW_CHECK_OK(VisitTypeInline(*type_, &visitor));
- }
-
- Status InsertValues(const Array& array) {
- if (!array.type()->Equals(*type_)) {
- return Status::Invalid("Array value type does not match memo type: ",
- array.type()->ToString());
- }
- ArrayValuesInserter visitor{this, array};
- return VisitTypeInline(*array.type(), &visitor);
- }
-
- template <typename PhysicalType,
- typename CType = typename DictionaryValue<PhysicalType>::type>
- Status GetOrInsert(CType value, int32_t* out) {
- using ConcreteMemoTable = typename DictionaryTraits<PhysicalType>::MemoTableType;
- return checked_cast<ConcreteMemoTable*>(memo_table_.get())->GetOrInsert(value, out);
- }
-
- Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out) {
- ArrayDataGetter visitor{type_, memo_table_.get(), pool_, start_offset, out};
- return VisitTypeInline(*type_, &visitor);
- }
-
- int32_t size() const { return memo_table_->size(); }
-
- private:
- MemoryPool* pool_;
- std::shared_ptr<DataType> type_;
- std::unique_ptr<MemoTable> memo_table_;
-};
-
-DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
- const std::shared_ptr<DataType>& type)
- : impl_(new DictionaryMemoTableImpl(pool, type)) {}
-
-DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
- const std::shared_ptr<Array>& dictionary)
- : impl_(new DictionaryMemoTableImpl(pool, dictionary->type())) {
- ARROW_CHECK_OK(impl_->InsertValues(*dictionary));
-}
-
-DictionaryMemoTable::~DictionaryMemoTable() = default;
-
-#define GET_OR_INSERT(C_TYPE) \
- Status DictionaryMemoTable::GetOrInsert( \
- const typename CTypeTraits<C_TYPE>::ArrowType*, C_TYPE value, int32_t* out) { \
- return impl_->GetOrInsert<typename CTypeTraits<C_TYPE>::ArrowType>(value, out); \
- }
-
-GET_OR_INSERT(bool)
-GET_OR_INSERT(int8_t)
-GET_OR_INSERT(int16_t)
-GET_OR_INSERT(int32_t)
-GET_OR_INSERT(int64_t)
-GET_OR_INSERT(uint8_t)
-GET_OR_INSERT(uint16_t)
-GET_OR_INSERT(uint32_t)
-GET_OR_INSERT(uint64_t)
-GET_OR_INSERT(float)
-GET_OR_INSERT(double)
-
-#undef GET_OR_INSERT
-
-Status DictionaryMemoTable::GetOrInsert(const BinaryType*, util::string_view value,
- int32_t* out) {
- return impl_->GetOrInsert<BinaryType>(value, out);
-}
-
-Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, util::string_view value,
- int32_t* out) {
- return impl_->GetOrInsert<LargeBinaryType>(value, out);
-}
-
-Status DictionaryMemoTable::GetArrayData(int64_t start_offset,
- std::shared_ptr<ArrayData>* out) {
- return impl_->GetArrayData(start_offset, out);
-}
-
-Status DictionaryMemoTable::InsertValues(const Array& array) {
- return impl_->InsertValues(array);
-}
-
-int32_t DictionaryMemoTable::size() const { return impl_->size(); }
-
-} // namespace internal
-} // namespace arrow
+ " memo table is not implemented");
+ }
+
+ template <typename T>
+ enable_if_memoize<T, Status> Visit(const T&) {
+ using MemoTable = typename DictionaryTraits<T>::MemoTableType;
+ memo_table_->reset(new MemoTable(pool_, 0));
+ return Status::OK();
+ }
+ };
+
+ // Type-dependent visitor for memo table insertion
+ struct ArrayValuesInserter {
+ DictionaryMemoTableImpl* impl_;
+ const Array& values_;
+
+ template <typename T>
+ Status Visit(const T& type) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ return InsertValues(type, checked_cast<const ArrayType&>(values_));
+ }
+
+ private:
+ template <typename T, typename ArrayType>
+ enable_if_no_memoize<T, Status> InsertValues(const T& type, const ArrayType&) {
+ return Status::NotImplemented("Inserting array values of ", type,
+ " is not implemented");
+ }
+
+ template <typename T, typename ArrayType>
+ enable_if_memoize<T, Status> InsertValues(const T&, const ArrayType& array) {
+ if (array.null_count() > 0) {
+ return Status::Invalid("Cannot insert dictionary values containing nulls");
+ }
+ for (int64_t i = 0; i < array.length(); ++i) {
+ int32_t unused_memo_index;
+ RETURN_NOT_OK(impl_->GetOrInsert<T>(array.GetView(i), &unused_memo_index));
+ }
+ return Status::OK();
+ }
+ };
+
+ // Type-dependent visitor for building ArrayData from memo table
+ struct ArrayDataGetter {
+ std::shared_ptr<DataType> value_type_;
+ MemoTable* memo_table_;
+ MemoryPool* pool_;
+ int64_t start_offset_;
+ std::shared_ptr<ArrayData>* out_;
+
+ template <typename T>
+ enable_if_no_memoize<T, Status> Visit(const T&) {
+ return Status::NotImplemented("Getting array data of ", value_type_,
+ " is not implemented");
+ }
+
+ template <typename T>
+ enable_if_memoize<T, Status> Visit(const T&) {
+ using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
+ auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
+ return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
+ start_offset_, out_);
+ }
+ };
+
+ public:
+ DictionaryMemoTableImpl(MemoryPool* pool, std::shared_ptr<DataType> type)
+ : pool_(pool), type_(std::move(type)), memo_table_(nullptr) {
+ MemoTableInitializer visitor{type_, pool_, &memo_table_};
+ ARROW_CHECK_OK(VisitTypeInline(*type_, &visitor));
+ }
+
+ Status InsertValues(const Array& array) {
+ if (!array.type()->Equals(*type_)) {
+ return Status::Invalid("Array value type does not match memo type: ",
+ array.type()->ToString());
+ }
+ ArrayValuesInserter visitor{this, array};
+ return VisitTypeInline(*array.type(), &visitor);
+ }
+
+ template <typename PhysicalType,
+ typename CType = typename DictionaryValue<PhysicalType>::type>
+ Status GetOrInsert(CType value, int32_t* out) {
+ using ConcreteMemoTable = typename DictionaryTraits<PhysicalType>::MemoTableType;
+ return checked_cast<ConcreteMemoTable*>(memo_table_.get())->GetOrInsert(value, out);
+ }
+
+ Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out) {
+ ArrayDataGetter visitor{type_, memo_table_.get(), pool_, start_offset, out};
+ return VisitTypeInline(*type_, &visitor);
+ }
+
+ int32_t size() const { return memo_table_->size(); }
+
+ private:
+ MemoryPool* pool_;
+ std::shared_ptr<DataType> type_;
+ std::unique_ptr<MemoTable> memo_table_;
+};
+
+DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
+ const std::shared_ptr<DataType>& type)
+ : impl_(new DictionaryMemoTableImpl(pool, type)) {}
+
+DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
+ const std::shared_ptr<Array>& dictionary)
+ : impl_(new DictionaryMemoTableImpl(pool, dictionary->type())) {
+ ARROW_CHECK_OK(impl_->InsertValues(*dictionary));
+}
+
+DictionaryMemoTable::~DictionaryMemoTable() = default;
+
+#define GET_OR_INSERT(C_TYPE) \
+ Status DictionaryMemoTable::GetOrInsert( \
+ const typename CTypeTraits<C_TYPE>::ArrowType*, C_TYPE value, int32_t* out) { \
+ return impl_->GetOrInsert<typename CTypeTraits<C_TYPE>::ArrowType>(value, out); \
+ }
+
+GET_OR_INSERT(bool)
+GET_OR_INSERT(int8_t)
+GET_OR_INSERT(int16_t)
+GET_OR_INSERT(int32_t)
+GET_OR_INSERT(int64_t)
+GET_OR_INSERT(uint8_t)
+GET_OR_INSERT(uint16_t)
+GET_OR_INSERT(uint32_t)
+GET_OR_INSERT(uint64_t)
+GET_OR_INSERT(float)
+GET_OR_INSERT(double)
+
+#undef GET_OR_INSERT
+
+Status DictionaryMemoTable::GetOrInsert(const BinaryType*, util::string_view value,
+ int32_t* out) {
+ return impl_->GetOrInsert<BinaryType>(value, out);
+}
+
+Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, util::string_view value,
+ int32_t* out) {
+ return impl_->GetOrInsert<LargeBinaryType>(value, out);
+}
+
+Status DictionaryMemoTable::GetArrayData(int64_t start_offset,
+ std::shared_ptr<ArrayData>* out) {
+ return impl_->GetArrayData(start_offset, out);
+}
+
+Status DictionaryMemoTable::InsertValues(const Array& array) {
+ return impl_->InsertValues(array);
+}
+
+int32_t DictionaryMemoTable::size() const { return impl_->size(); }
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
index eb96482dbf7..c2955c6a185 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
@@ -1,252 +1,252 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <type_traits>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_binary.h"
-#include "arrow/array/builder_adaptive.h" // IWYU pragma: export
-#include "arrow/array/builder_base.h" // IWYU pragma: export
-#include "arrow/array/builder_primitive.h" // IWYU pragma: export
-#include "arrow/array/data.h"
-#include "arrow/array/util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_adaptive.h" // IWYU pragma: export
+#include "arrow/array/builder_base.h" // IWYU pragma: export
+#include "arrow/array/builder_primitive.h" // IWYU pragma: export
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
#include "arrow/scalar.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Dictionary builder
-
-namespace internal {
-
-template <typename T, typename Enable = void>
-struct DictionaryValue {
- using type = typename T::c_type;
- using PhysicalType = T;
-};
-
-template <typename T>
-struct DictionaryValue<T, enable_if_base_binary<T>> {
- using type = util::string_view;
- using PhysicalType =
- typename std::conditional<std::is_same<typename T::offset_type, int32_t>::value,
- BinaryType, LargeBinaryType>::type;
-};
-
-template <typename T>
-struct DictionaryValue<T, enable_if_fixed_size_binary<T>> {
- using type = util::string_view;
- using PhysicalType = BinaryType;
-};
-
-class ARROW_EXPORT DictionaryMemoTable {
- public:
- DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<DataType>& type);
- DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
- ~DictionaryMemoTable();
-
- Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out);
-
- /// \brief Insert new memo values
- Status InsertValues(const Array& values);
-
- int32_t size() const;
-
- template <typename T>
- Status GetOrInsert(typename DictionaryValue<T>::type value, int32_t* out) {
- // We want to keep the DictionaryMemoTable implementation private, also we can't
- // use extern template classes because of compiler issues (MinGW?). Instead,
- // we expose explicit function overrides for each supported physical type.
- const typename DictionaryValue<T>::PhysicalType* physical_type = NULLPTR;
- return GetOrInsert(physical_type, value, out);
- }
-
- private:
- Status GetOrInsert(const BooleanType*, bool value, int32_t* out);
- Status GetOrInsert(const Int8Type*, int8_t value, int32_t* out);
- Status GetOrInsert(const Int16Type*, int16_t value, int32_t* out);
- Status GetOrInsert(const Int32Type*, int32_t value, int32_t* out);
- Status GetOrInsert(const Int64Type*, int64_t value, int32_t* out);
- Status GetOrInsert(const UInt8Type*, uint8_t value, int32_t* out);
- Status GetOrInsert(const UInt16Type*, uint16_t value, int32_t* out);
- Status GetOrInsert(const UInt32Type*, uint32_t value, int32_t* out);
- Status GetOrInsert(const UInt64Type*, uint64_t value, int32_t* out);
- Status GetOrInsert(const FloatType*, float value, int32_t* out);
- Status GetOrInsert(const DoubleType*, double value, int32_t* out);
-
- Status GetOrInsert(const BinaryType*, util::string_view value, int32_t* out);
- Status GetOrInsert(const LargeBinaryType*, util::string_view value, int32_t* out);
-
- class DictionaryMemoTableImpl;
- std::unique_ptr<DictionaryMemoTableImpl> impl_;
-};
-
-/// \brief Array builder for created encoded DictionaryArray from
-/// dense array
-///
-/// Unlike other builders, dictionary builder does not completely
-/// reset the state on Finish calls.
-template <typename BuilderType, typename T>
-class DictionaryBuilderBase : public ArrayBuilder {
- public:
- using TypeClass = DictionaryType;
- using Value = typename DictionaryValue<T>::type;
-
- // WARNING: the type given below is the value type, not the DictionaryType.
- // The DictionaryType is instantiated on the Finish() call.
- template <typename B = BuilderType, typename T1 = T>
- DictionaryBuilderBase(uint8_t start_int_size,
- enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
- !is_fixed_size_binary_type<T1>::value,
- const std::shared_ptr<DataType>&>
- value_type,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool),
- memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
- delta_offset_(0),
- byte_width_(-1),
- indices_builder_(start_int_size, pool),
- value_type_(value_type) {}
-
- template <typename T1 = T>
- explicit DictionaryBuilderBase(
- enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
- value_type,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool),
- memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
- delta_offset_(0),
- byte_width_(-1),
- indices_builder_(pool),
- value_type_(value_type) {}
-
- template <typename B = BuilderType, typename T1 = T>
- DictionaryBuilderBase(uint8_t start_int_size,
- enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
- is_fixed_size_binary_type<T1>::value,
- const std::shared_ptr<DataType>&>
- value_type,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool),
- memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
- delta_offset_(0),
- byte_width_(static_cast<const T1&>(*value_type).byte_width()),
- indices_builder_(start_int_size, pool),
- value_type_(value_type) {}
-
- template <typename T1 = T>
- explicit DictionaryBuilderBase(
- enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool),
- memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
- delta_offset_(0),
- byte_width_(static_cast<const T1&>(*value_type).byte_width()),
- indices_builder_(pool),
- value_type_(value_type) {}
-
- template <typename T1 = T>
- explicit DictionaryBuilderBase(
- enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
- : DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
-
- // This constructor doesn't check for errors. Use InsertMemoValues instead.
- explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool),
- memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
- delta_offset_(0),
- byte_width_(-1),
- indices_builder_(pool),
- value_type_(dictionary->type()) {}
-
- ~DictionaryBuilderBase() override = default;
-
- /// \brief The current number of entries in the dictionary
- int64_t dictionary_length() const { return memo_table_->size(); }
-
- /// \brief The value byte width (for FixedSizeBinaryType)
- template <typename T1 = T>
- enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
- return byte_width_;
- }
-
- /// \brief Append a scalar value
- Status Append(Value value) {
- ARROW_RETURN_NOT_OK(Reserve(1));
-
- int32_t memo_index;
- ARROW_RETURN_NOT_OK(memo_table_->GetOrInsert<T>(value, &memo_index));
- ARROW_RETURN_NOT_OK(indices_builder_.Append(memo_index));
- length_ += 1;
-
- return Status::OK();
- }
-
- /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
- template <typename T1 = T>
- enable_if_fixed_size_binary<T1, Status> Append(const uint8_t* value) {
- return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
- }
-
- /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
- template <typename T1 = T>
- enable_if_fixed_size_binary<T1, Status> Append(const char* value) {
- return Append(util::string_view(value, byte_width_));
- }
-
- /// \brief Append a string (only for binary types)
- template <typename T1 = T>
- enable_if_binary_like<T1, Status> Append(const uint8_t* value, int32_t length) {
- return Append(reinterpret_cast<const char*>(value), length);
- }
-
- /// \brief Append a string (only for binary types)
- template <typename T1 = T>
- enable_if_binary_like<T1, Status> Append(const char* value, int32_t length) {
- return Append(util::string_view(value, length));
- }
-
- /// \brief Append a string (only for string types)
- template <typename T1 = T>
- enable_if_string_like<T1, Status> Append(const char* value, int32_t length) {
- return Append(util::string_view(value, length));
- }
-
- /// \brief Append a decimal (only for Decimal128Type)
- template <typename T1 = T>
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Dictionary builder
+
+namespace internal {
+
+template <typename T, typename Enable = void>
+struct DictionaryValue {
+ using type = typename T::c_type;
+ using PhysicalType = T;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_base_binary<T>> {
+ using type = util::string_view;
+ using PhysicalType =
+ typename std::conditional<std::is_same<typename T::offset_type, int32_t>::value,
+ BinaryType, LargeBinaryType>::type;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_fixed_size_binary<T>> {
+ using type = util::string_view;
+ using PhysicalType = BinaryType;
+};
+
+class ARROW_EXPORT DictionaryMemoTable {
+ public:
+ DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<DataType>& type);
+ DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
+ ~DictionaryMemoTable();
+
+ Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out);
+
+ /// \brief Insert new memo values
+ Status InsertValues(const Array& values);
+
+ int32_t size() const;
+
+ template <typename T>
+ Status GetOrInsert(typename DictionaryValue<T>::type value, int32_t* out) {
+ // We want to keep the DictionaryMemoTable implementation private, also we can't
+ // use extern template classes because of compiler issues (MinGW?). Instead,
+ // we expose explicit function overrides for each supported physical type.
+ const typename DictionaryValue<T>::PhysicalType* physical_type = NULLPTR;
+ return GetOrInsert(physical_type, value, out);
+ }
+
+ private:
+ Status GetOrInsert(const BooleanType*, bool value, int32_t* out);
+ Status GetOrInsert(const Int8Type*, int8_t value, int32_t* out);
+ Status GetOrInsert(const Int16Type*, int16_t value, int32_t* out);
+ Status GetOrInsert(const Int32Type*, int32_t value, int32_t* out);
+ Status GetOrInsert(const Int64Type*, int64_t value, int32_t* out);
+ Status GetOrInsert(const UInt8Type*, uint8_t value, int32_t* out);
+ Status GetOrInsert(const UInt16Type*, uint16_t value, int32_t* out);
+ Status GetOrInsert(const UInt32Type*, uint32_t value, int32_t* out);
+ Status GetOrInsert(const UInt64Type*, uint64_t value, int32_t* out);
+ Status GetOrInsert(const FloatType*, float value, int32_t* out);
+ Status GetOrInsert(const DoubleType*, double value, int32_t* out);
+
+ Status GetOrInsert(const BinaryType*, util::string_view value, int32_t* out);
+ Status GetOrInsert(const LargeBinaryType*, util::string_view value, int32_t* out);
+
+ class DictionaryMemoTableImpl;
+ std::unique_ptr<DictionaryMemoTableImpl> impl_;
+};
+
+/// \brief Array builder for created encoded DictionaryArray from
+/// dense array
+///
+/// Unlike other builders, dictionary builder does not completely
+/// reset the state on Finish calls.
+template <typename BuilderType, typename T>
+class DictionaryBuilderBase : public ArrayBuilder {
+ public:
+ using TypeClass = DictionaryType;
+ using Value = typename DictionaryValue<T>::type;
+
+ // WARNING: the type given below is the value type, not the DictionaryType.
+ // The DictionaryType is instantiated on the Finish() call.
+ template <typename B = BuilderType, typename T1 = T>
+ DictionaryBuilderBase(uint8_t start_int_size,
+ enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+ !is_fixed_size_binary_type<T1>::value,
+ const std::shared_ptr<DataType>&>
+ value_type,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool),
+ memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+ delta_offset_(0),
+ byte_width_(-1),
+ indices_builder_(start_int_size, pool),
+ value_type_(value_type) {}
+
+ template <typename T1 = T>
+ explicit DictionaryBuilderBase(
+ enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
+ value_type,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool),
+ memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+ delta_offset_(0),
+ byte_width_(-1),
+ indices_builder_(pool),
+ value_type_(value_type) {}
+
+ template <typename B = BuilderType, typename T1 = T>
+ DictionaryBuilderBase(uint8_t start_int_size,
+ enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+ is_fixed_size_binary_type<T1>::value,
+ const std::shared_ptr<DataType>&>
+ value_type,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool),
+ memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+ delta_offset_(0),
+ byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+ indices_builder_(start_int_size, pool),
+ value_type_(value_type) {}
+
+ template <typename T1 = T>
+ explicit DictionaryBuilderBase(
+ enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool),
+ memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+ delta_offset_(0),
+ byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+ indices_builder_(pool),
+ value_type_(value_type) {}
+
+ template <typename T1 = T>
+ explicit DictionaryBuilderBase(
+ enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+ : DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
+
+ // This constructor doesn't check for errors. Use InsertMemoValues instead.
+ explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool),
+ memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
+ delta_offset_(0),
+ byte_width_(-1),
+ indices_builder_(pool),
+ value_type_(dictionary->type()) {}
+
+ ~DictionaryBuilderBase() override = default;
+
+ /// \brief The current number of entries in the dictionary
+ int64_t dictionary_length() const { return memo_table_->size(); }
+
+ /// \brief The value byte width (for FixedSizeBinaryType)
+ template <typename T1 = T>
+ enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
+ return byte_width_;
+ }
+
+ /// \brief Append a scalar value
+ Status Append(Value value) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+
+ int32_t memo_index;
+ ARROW_RETURN_NOT_OK(memo_table_->GetOrInsert<T>(value, &memo_index));
+ ARROW_RETURN_NOT_OK(indices_builder_.Append(memo_index));
+ length_ += 1;
+
+ return Status::OK();
+ }
+
+ /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+ template <typename T1 = T>
+ enable_if_fixed_size_binary<T1, Status> Append(const uint8_t* value) {
+ return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
+ }
+
+ /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+ template <typename T1 = T>
+ enable_if_fixed_size_binary<T1, Status> Append(const char* value) {
+ return Append(util::string_view(value, byte_width_));
+ }
+
+ /// \brief Append a string (only for binary types)
+ template <typename T1 = T>
+ enable_if_binary_like<T1, Status> Append(const uint8_t* value, int32_t length) {
+ return Append(reinterpret_cast<const char*>(value), length);
+ }
+
+ /// \brief Append a string (only for binary types)
+ template <typename T1 = T>
+ enable_if_binary_like<T1, Status> Append(const char* value, int32_t length) {
+ return Append(util::string_view(value, length));
+ }
+
+ /// \brief Append a string (only for string types)
+ template <typename T1 = T>
+ enable_if_string_like<T1, Status> Append(const char* value, int32_t length) {
+ return Append(util::string_view(value, length));
+ }
+
+ /// \brief Append a decimal (only for Decimal128Type)
+ template <typename T1 = T>
enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
- uint8_t data[16];
- value.ToBytes(data);
- return Append(data, 16);
- }
-
+ uint8_t data[16];
+ value.ToBytes(data);
+ return Append(data, 16);
+ }
+
/// \brief Append a decimal (only for Decimal128Type)
template <typename T1 = T>
enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
@@ -255,21 +255,21 @@ class DictionaryBuilderBase : public ArrayBuilder {
return Append(data, 32);
}
- /// \brief Append a scalar null value
- Status AppendNull() final {
- length_ += 1;
- null_count_ += 1;
-
- return indices_builder_.AppendNull();
- }
-
- Status AppendNulls(int64_t length) final {
- length_ += length;
- null_count_ += length;
-
- return indices_builder_.AppendNulls(length);
- }
-
+ /// \brief Append a scalar null value
+ Status AppendNull() final {
+ length_ += 1;
+ null_count_ += 1;
+
+ return indices_builder_.AppendNull();
+ }
+
+ Status AppendNulls(int64_t length) final {
+ length_ += length;
+ null_count_ += length;
+
+ return indices_builder_.AppendNulls(length);
+ }
+
Status AppendEmptyValue() final {
length_ += 1;
@@ -282,182 +282,182 @@ class DictionaryBuilderBase : public ArrayBuilder {
return indices_builder_.AppendEmptyValues(length);
}
- /// \brief Insert values into the dictionary's memo, but do not append any
- /// indices. Can be used to initialize a new builder with known dictionary
- /// values
- /// \param[in] values dictionary values to add to memo. Type must match
- /// builder type
- Status InsertMemoValues(const Array& values) {
- return memo_table_->InsertValues(values);
- }
-
- /// \brief Append a whole dense array to the builder
- template <typename T1 = T>
- enable_if_t<!is_fixed_size_binary_type<T1>::value, Status> AppendArray(
- const Array& array) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
-
-#ifndef NDEBUG
- ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
- value_type_, array, "Wrong value type of array to be appended"));
-#endif
-
- const auto& concrete_array = static_cast<const ArrayType&>(array);
- for (int64_t i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) {
- ARROW_RETURN_NOT_OK(AppendNull());
- } else {
- ARROW_RETURN_NOT_OK(Append(concrete_array.GetView(i)));
- }
- }
- return Status::OK();
- }
-
- template <typename T1 = T>
- enable_if_fixed_size_binary<T1, Status> AppendArray(const Array& array) {
-#ifndef NDEBUG
- ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
- value_type_, array, "Wrong value type of array to be appended"));
-#endif
-
- const auto& concrete_array = static_cast<const FixedSizeBinaryArray&>(array);
- for (int64_t i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) {
- ARROW_RETURN_NOT_OK(AppendNull());
- } else {
- ARROW_RETURN_NOT_OK(Append(concrete_array.GetValue(i)));
- }
- }
- return Status::OK();
- }
-
- void Reset() override {
- // Perform a partial reset. Call ResetFull to also reset the accumulated
- // dictionary values
- ArrayBuilder::Reset();
- indices_builder_.Reset();
- }
-
- /// \brief Reset and also clear accumulated dictionary values in memo table
- void ResetFull() {
- Reset();
- memo_table_.reset(new internal::DictionaryMemoTable(pool_, value_type_));
- }
-
- Status Resize(int64_t capacity) override {
- ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
- capacity = std::max(capacity, kMinBuilderCapacity);
- ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
- capacity_ = indices_builder_.capacity();
- return Status::OK();
- }
-
- /// \brief Return dictionary indices and a delta dictionary since the last
- /// time that Finish or FinishDelta were called, and reset state of builder
- /// (except the memo table)
- Status FinishDelta(std::shared_ptr<Array>* out_indices,
- std::shared_ptr<Array>* out_delta) {
- std::shared_ptr<ArrayData> indices_data;
- std::shared_ptr<ArrayData> delta_data;
- ARROW_RETURN_NOT_OK(FinishWithDictOffset(delta_offset_, &indices_data, &delta_data));
- *out_indices = MakeArray(indices_data);
- *out_delta = MakeArray(delta_data);
- return Status::OK();
- }
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override {
- return ::arrow::dictionary(indices_builder_.type(), value_type_);
- }
-
- protected:
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
- std::shared_ptr<ArrayData> dictionary;
- ARROW_RETURN_NOT_OK(FinishWithDictOffset(/*offset=*/0, out, &dictionary));
-
- // Set type of array data to the right dictionary type
- (*out)->type = type();
- (*out)->dictionary = dictionary;
- return Status::OK();
- }
-
- Status FinishWithDictOffset(int64_t dict_offset,
- std::shared_ptr<ArrayData>* out_indices,
- std::shared_ptr<ArrayData>* out_dictionary) {
- // Finalize indices array
- ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out_indices));
-
- // Generate dictionary array from hash table contents
- ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(dict_offset, out_dictionary));
- delta_offset_ = memo_table_->size();
-
- // Update internals for further uses of this DictionaryBuilder
- ArrayBuilder::Reset();
- return Status::OK();
- }
-
- std::unique_ptr<DictionaryMemoTable> memo_table_;
-
- // The size of the dictionary memo at last invocation of Finish, to use in
- // FinishDelta for computing dictionary deltas
- int32_t delta_offset_;
-
- // Only used for FixedSizeBinaryType
- int32_t byte_width_;
-
- BuilderType indices_builder_;
- std::shared_ptr<DataType> value_type_;
-};
-
-template <typename BuilderType>
-class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
- public:
- template <typename B = BuilderType>
- DictionaryBuilderBase(
- enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
- start_int_size,
+ /// \brief Insert values into the dictionary's memo, but do not append any
+ /// indices. Can be used to initialize a new builder with known dictionary
+ /// values
+ /// \param[in] values dictionary values to add to memo. Type must match
+ /// builder type
+ Status InsertMemoValues(const Array& values) {
+ return memo_table_->InsertValues(values);
+ }
+
+ /// \brief Append a whole dense array to the builder
+ template <typename T1 = T>
+ enable_if_t<!is_fixed_size_binary_type<T1>::value, Status> AppendArray(
+ const Array& array) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+
+#ifndef NDEBUG
+ ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+ value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+ const auto& concrete_array = static_cast<const ArrayType&>(array);
+ for (int64_t i = 0; i < array.length(); i++) {
+ if (array.IsNull(i)) {
+ ARROW_RETURN_NOT_OK(AppendNull());
+ } else {
+ ARROW_RETURN_NOT_OK(Append(concrete_array.GetView(i)));
+ }
+ }
+ return Status::OK();
+ }
+
+ template <typename T1 = T>
+ enable_if_fixed_size_binary<T1, Status> AppendArray(const Array& array) {
+#ifndef NDEBUG
+ ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+ value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+ const auto& concrete_array = static_cast<const FixedSizeBinaryArray&>(array);
+ for (int64_t i = 0; i < array.length(); i++) {
+ if (array.IsNull(i)) {
+ ARROW_RETURN_NOT_OK(AppendNull());
+ } else {
+ ARROW_RETURN_NOT_OK(Append(concrete_array.GetValue(i)));
+ }
+ }
+ return Status::OK();
+ }
+
+ void Reset() override {
+ // Perform a partial reset. Call ResetFull to also reset the accumulated
+ // dictionary values
+ ArrayBuilder::Reset();
+ indices_builder_.Reset();
+ }
+
+ /// \brief Reset and also clear accumulated dictionary values in memo table
+ void ResetFull() {
+ Reset();
+ memo_table_.reset(new internal::DictionaryMemoTable(pool_, value_type_));
+ }
+
+ Status Resize(int64_t capacity) override {
+ ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+ ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+ capacity_ = indices_builder_.capacity();
+ return Status::OK();
+ }
+
+ /// \brief Return dictionary indices and a delta dictionary since the last
+ /// time that Finish or FinishDelta were called, and reset state of builder
+ /// (except the memo table)
+ Status FinishDelta(std::shared_ptr<Array>* out_indices,
+ std::shared_ptr<Array>* out_delta) {
+ std::shared_ptr<ArrayData> indices_data;
+ std::shared_ptr<ArrayData> delta_data;
+ ARROW_RETURN_NOT_OK(FinishWithDictOffset(delta_offset_, &indices_data, &delta_data));
+ *out_indices = MakeArray(indices_data);
+ *out_delta = MakeArray(delta_data);
+ return Status::OK();
+ }
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override {
+ return ::arrow::dictionary(indices_builder_.type(), value_type_);
+ }
+
+ protected:
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+ std::shared_ptr<ArrayData> dictionary;
+ ARROW_RETURN_NOT_OK(FinishWithDictOffset(/*offset=*/0, out, &dictionary));
+
+ // Set type of array data to the right dictionary type
+ (*out)->type = type();
+ (*out)->dictionary = dictionary;
+ return Status::OK();
+ }
+
+ Status FinishWithDictOffset(int64_t dict_offset,
+ std::shared_ptr<ArrayData>* out_indices,
+ std::shared_ptr<ArrayData>* out_dictionary) {
+ // Finalize indices array
+ ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out_indices));
+
+ // Generate dictionary array from hash table contents
+ ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(dict_offset, out_dictionary));
+ delta_offset_ = memo_table_->size();
+
+ // Update internals for further uses of this DictionaryBuilder
+ ArrayBuilder::Reset();
+ return Status::OK();
+ }
+
+ std::unique_ptr<DictionaryMemoTable> memo_table_;
+
+ // The size of the dictionary memo at last invocation of Finish, to use in
+ // FinishDelta for computing dictionary deltas
+ int32_t delta_offset_;
+
+ // Only used for FixedSizeBinaryType
+ int32_t byte_width_;
+
+ BuilderType indices_builder_;
+ std::shared_ptr<DataType> value_type_;
+};
+
+template <typename BuilderType>
+class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
+ public:
+ template <typename B = BuilderType>
+ DictionaryBuilderBase(
+ enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+ start_int_size,
const std::shared_ptr<DataType>& /*value_type*/,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
-
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
explicit DictionaryBuilderBase(const std::shared_ptr<DataType>& /*value_type*/,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), indices_builder_(pool) {}
-
- template <typename B = BuilderType>
- explicit DictionaryBuilderBase(
- enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
- start_int_size,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
-
- explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), indices_builder_(pool) {}
-
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), indices_builder_(pool) {}
+
+ template <typename B = BuilderType>
+ explicit DictionaryBuilderBase(
+ enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+ start_int_size,
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
+ explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), indices_builder_(pool) {}
+
explicit DictionaryBuilderBase(const std::shared_ptr<Array>& /*dictionary*/,
- MemoryPool* pool = default_memory_pool())
- : ArrayBuilder(pool), indices_builder_(pool) {}
-
- /// \brief Append a scalar null value
- Status AppendNull() final {
- length_ += 1;
- null_count_ += 1;
-
- return indices_builder_.AppendNull();
- }
-
- Status AppendNulls(int64_t length) final {
- length_ += length;
- null_count_ += length;
-
- return indices_builder_.AppendNulls(length);
- }
-
+ MemoryPool* pool = default_memory_pool())
+ : ArrayBuilder(pool), indices_builder_(pool) {}
+
+ /// \brief Append a scalar null value
+ Status AppendNull() final {
+ length_ += 1;
+ null_count_ += 1;
+
+ return indices_builder_.AppendNull();
+ }
+
+ Status AppendNulls(int64_t length) final {
+ length_ += length;
+ null_count_ += length;
+
+ return indices_builder_.AppendNulls(length);
+ }
+
Status AppendEmptyValue() final {
length_ += 1;
@@ -470,103 +470,103 @@ class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
return indices_builder_.AppendEmptyValues(length);
}
- /// \brief Append a whole dense array to the builder
- Status AppendArray(const Array& array) {
-#ifndef NDEBUG
- ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
- Type::NA, array, "Wrong value type of array to be appended"));
-#endif
- for (int64_t i = 0; i < array.length(); i++) {
- ARROW_RETURN_NOT_OK(AppendNull());
- }
- return Status::OK();
- }
-
- Status Resize(int64_t capacity) override {
- ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
- capacity = std::max(capacity, kMinBuilderCapacity);
-
- ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
- capacity_ = indices_builder_.capacity();
- return Status::OK();
- }
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
- ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out));
- (*out)->type = dictionary((*out)->type, null());
- (*out)->dictionary = NullArray(0).data();
- return Status::OK();
- }
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
-
- std::shared_ptr<DataType> type() const override {
- return ::arrow::dictionary(indices_builder_.type(), null());
- }
-
- protected:
- BuilderType indices_builder_;
-};
-
-} // namespace internal
-
-/// \brief A DictionaryArray builder that uses AdaptiveIntBuilder to return the
-/// smallest index size that can accommodate the dictionary indices
-template <typename T>
-class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuilder, T> {
- public:
- using BASE = internal::DictionaryBuilderBase<AdaptiveIntBuilder, T>;
- using BASE::BASE;
-
- /// \brief Append dictionary indices directly without modifying memo
- ///
- /// NOTE: Experimental API
- Status AppendIndices(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR) {
- int64_t null_count_before = this->indices_builder_.null_count();
- ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
- this->capacity_ = this->indices_builder_.capacity();
- this->length_ += length;
- this->null_count_ += this->indices_builder_.null_count() - null_count_before;
- return Status::OK();
- }
-};
-
-/// \brief A DictionaryArray builder that always returns int32 dictionary
-/// indices so that data cast to dictionary form will have a consistent index
-/// type, e.g. for creating a ChunkedArray
-template <typename T>
-class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder, T> {
- public:
- using BASE = internal::DictionaryBuilderBase<Int32Builder, T>;
- using BASE::BASE;
-
- /// \brief Append dictionary indices directly without modifying memo
- ///
- /// NOTE: Experimental API
- Status AppendIndices(const int32_t* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR) {
- int64_t null_count_before = this->indices_builder_.null_count();
- ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
- this->capacity_ = this->indices_builder_.capacity();
- this->length_ += length;
- this->null_count_ += this->indices_builder_.null_count() - null_count_before;
- return Status::OK();
- }
-};
-
-// ----------------------------------------------------------------------
-// Binary / Unicode builders
-// (compatibility aliases; those used to be derived classes with additional
-// Append() overloads, but they have been folded into DictionaryBuilderBase)
-
-using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
-using StringDictionaryBuilder = DictionaryBuilder<StringType>;
-using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
-using StringDictionary32Builder = Dictionary32Builder<StringType>;
-
-} // namespace arrow
+ /// \brief Append a whole dense array to the builder
+ Status AppendArray(const Array& array) {
+#ifndef NDEBUG
+ ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+ Type::NA, array, "Wrong value type of array to be appended"));
+#endif
+ for (int64_t i = 0; i < array.length(); i++) {
+ ARROW_RETURN_NOT_OK(AppendNull());
+ }
+ return Status::OK();
+ }
+
+ Status Resize(int64_t capacity) override {
+ ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
+ ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+ capacity_ = indices_builder_.capacity();
+ return Status::OK();
+ }
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+ ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out));
+ (*out)->type = dictionary((*out)->type, null());
+ (*out)->dictionary = NullArray(0).data();
+ return Status::OK();
+ }
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+ std::shared_ptr<DataType> type() const override {
+ return ::arrow::dictionary(indices_builder_.type(), null());
+ }
+
+ protected:
+ BuilderType indices_builder_;
+};
+
+} // namespace internal
+
+/// \brief A DictionaryArray builder that uses AdaptiveIntBuilder to return the
+/// smallest index size that can accommodate the dictionary indices
+template <typename T>
+class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuilder, T> {
+ public:
+ using BASE = internal::DictionaryBuilderBase<AdaptiveIntBuilder, T>;
+ using BASE::BASE;
+
+ /// \brief Append dictionary indices directly without modifying memo
+ ///
+ /// NOTE: Experimental API
+ Status AppendIndices(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ int64_t null_count_before = this->indices_builder_.null_count();
+ ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+ this->capacity_ = this->indices_builder_.capacity();
+ this->length_ += length;
+ this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+ return Status::OK();
+ }
+};
+
+/// \brief A DictionaryArray builder that always returns int32 dictionary
+/// indices so that data cast to dictionary form will have a consistent index
+/// type, e.g. for creating a ChunkedArray
+template <typename T>
+class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder, T> {
+ public:
+ using BASE = internal::DictionaryBuilderBase<Int32Builder, T>;
+ using BASE::BASE;
+
+ /// \brief Append dictionary indices directly without modifying memo
+ ///
+ /// NOTE: Experimental API
+ Status AppendIndices(const int32_t* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ int64_t null_count_before = this->indices_builder_.null_count();
+ ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+ this->capacity_ = this->indices_builder_.capacity();
+ this->length_ += length;
+ this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+ return Status::OK();
+ }
+};
+
+// ----------------------------------------------------------------------
+// Binary / Unicode builders
+// (compatibility aliases; those used to be derived classes with additional
+// Append() overloads, but they have been folded into DictionaryBuilderBase)
+
+using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
+using StringDictionaryBuilder = DictionaryBuilder<StringType>;
+using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
+using StringDictionary32Builder = Dictionary32Builder<StringType>;
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
index a3bcde0381a..81fd71124cd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
@@ -1,128 +1,128 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_nested.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <utility>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// MapBuilder
-
-MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
- std::shared_ptr<ArrayBuilder> const& item_builder,
- const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool), key_builder_(key_builder), item_builder_(item_builder) {
- auto map_type = internal::checked_cast<const MapType*>(type.get());
- keys_sorted_ = map_type->keys_sorted();
-
- std::vector<std::shared_ptr<ArrayBuilder>> child_builders{key_builder, item_builder};
- auto struct_builder =
- std::make_shared<StructBuilder>(map_type->value_type(), pool, child_builders);
-
- list_builder_ =
- std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
-}
-
-MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
- const std::shared_ptr<ArrayBuilder>& item_builder,
- bool keys_sorted)
- : MapBuilder(pool, key_builder, item_builder,
- map(key_builder->type(), item_builder->type(), keys_sorted)) {}
-
-MapBuilder::MapBuilder(MemoryPool* pool,
- const std::shared_ptr<ArrayBuilder>& struct_builder,
- const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool) {
- auto map_type = internal::checked_cast<const MapType*>(type.get());
- keys_sorted_ = map_type->keys_sorted();
- key_builder_ = struct_builder->child_builder(0);
- item_builder_ = struct_builder->child_builder(1);
- list_builder_ =
- std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
-}
-
-Status MapBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(list_builder_->Resize(capacity));
- capacity_ = list_builder_->capacity();
- return Status::OK();
-}
-
-void MapBuilder::Reset() {
- list_builder_->Reset();
- ArrayBuilder::Reset();
-}
-
-Status MapBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- ARROW_CHECK_EQ(item_builder_->length(), key_builder_->length())
- << "keys and items builders don't have the same size in MapBuilder";
- RETURN_NOT_OK(AdjustStructBuilderLength());
- RETURN_NOT_OK(list_builder_->FinishInternal(out));
- (*out)->type = type();
- ArrayBuilder::Reset();
- return Status::OK();
-}
-
-Status MapBuilder::AppendValues(const int32_t* offsets, int64_t length,
- const uint8_t* valid_bytes) {
- DCHECK_EQ(item_builder_->length(), key_builder_->length());
- RETURN_NOT_OK(AdjustStructBuilderLength());
- RETURN_NOT_OK(list_builder_->AppendValues(offsets, length, valid_bytes));
- length_ = list_builder_->length();
- null_count_ = list_builder_->null_count();
- return Status::OK();
-}
-
-Status MapBuilder::Append() {
- DCHECK_EQ(item_builder_->length(), key_builder_->length());
- RETURN_NOT_OK(AdjustStructBuilderLength());
- RETURN_NOT_OK(list_builder_->Append());
- length_ = list_builder_->length();
- return Status::OK();
-}
-
-Status MapBuilder::AppendNull() {
- DCHECK_EQ(item_builder_->length(), key_builder_->length());
- RETURN_NOT_OK(AdjustStructBuilderLength());
- RETURN_NOT_OK(list_builder_->AppendNull());
- length_ = list_builder_->length();
- null_count_ = list_builder_->null_count();
- return Status::OK();
-}
-
-Status MapBuilder::AppendNulls(int64_t length) {
- DCHECK_EQ(item_builder_->length(), key_builder_->length());
- RETURN_NOT_OK(AdjustStructBuilderLength());
- RETURN_NOT_OK(list_builder_->AppendNulls(length));
- length_ = list_builder_->length();
- null_count_ = list_builder_->null_count();
- return Status::OK();
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_nested.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// MapBuilder
+
+MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+ std::shared_ptr<ArrayBuilder> const& item_builder,
+ const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool), key_builder_(key_builder), item_builder_(item_builder) {
+ auto map_type = internal::checked_cast<const MapType*>(type.get());
+ keys_sorted_ = map_type->keys_sorted();
+
+ std::vector<std::shared_ptr<ArrayBuilder>> child_builders{key_builder, item_builder};
+ auto struct_builder =
+ std::make_shared<StructBuilder>(map_type->value_type(), pool, child_builders);
+
+ list_builder_ =
+ std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
+}
+
+MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+ const std::shared_ptr<ArrayBuilder>& item_builder,
+ bool keys_sorted)
+ : MapBuilder(pool, key_builder, item_builder,
+ map(key_builder->type(), item_builder->type(), keys_sorted)) {}
+
+MapBuilder::MapBuilder(MemoryPool* pool,
+ const std::shared_ptr<ArrayBuilder>& struct_builder,
+ const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool) {
+ auto map_type = internal::checked_cast<const MapType*>(type.get());
+ keys_sorted_ = map_type->keys_sorted();
+ key_builder_ = struct_builder->child_builder(0);
+ item_builder_ = struct_builder->child_builder(1);
+ list_builder_ =
+ std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
+}
+
+Status MapBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(list_builder_->Resize(capacity));
+ capacity_ = list_builder_->capacity();
+ return Status::OK();
+}
+
+void MapBuilder::Reset() {
+ list_builder_->Reset();
+ ArrayBuilder::Reset();
+}
+
+Status MapBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ ARROW_CHECK_EQ(item_builder_->length(), key_builder_->length())
+ << "keys and items builders don't have the same size in MapBuilder";
+ RETURN_NOT_OK(AdjustStructBuilderLength());
+ RETURN_NOT_OK(list_builder_->FinishInternal(out));
+ (*out)->type = type();
+ ArrayBuilder::Reset();
+ return Status::OK();
+}
+
+Status MapBuilder::AppendValues(const int32_t* offsets, int64_t length,
+ const uint8_t* valid_bytes) {
+ DCHECK_EQ(item_builder_->length(), key_builder_->length());
+ RETURN_NOT_OK(AdjustStructBuilderLength());
+ RETURN_NOT_OK(list_builder_->AppendValues(offsets, length, valid_bytes));
+ length_ = list_builder_->length();
+ null_count_ = list_builder_->null_count();
+ return Status::OK();
+}
+
+Status MapBuilder::Append() {
+ DCHECK_EQ(item_builder_->length(), key_builder_->length());
+ RETURN_NOT_OK(AdjustStructBuilderLength());
+ RETURN_NOT_OK(list_builder_->Append());
+ length_ = list_builder_->length();
+ return Status::OK();
+}
+
+Status MapBuilder::AppendNull() {
+ DCHECK_EQ(item_builder_->length(), key_builder_->length());
+ RETURN_NOT_OK(AdjustStructBuilderLength());
+ RETURN_NOT_OK(list_builder_->AppendNull());
+ length_ = list_builder_->length();
+ null_count_ = list_builder_->null_count();
+ return Status::OK();
+}
+
+Status MapBuilder::AppendNulls(int64_t length) {
+ DCHECK_EQ(item_builder_->length(), key_builder_->length());
+ RETURN_NOT_OK(AdjustStructBuilderLength());
+ RETURN_NOT_OK(list_builder_->AppendNulls(length));
+ length_ = list_builder_->length();
+ null_count_ = list_builder_->null_count();
+ return Status::OK();
+}
+
Status MapBuilder::AppendEmptyValue() {
DCHECK_EQ(item_builder_->length(), key_builder_->length());
RETURN_NOT_OK(AdjustStructBuilderLength());
@@ -141,78 +141,78 @@ Status MapBuilder::AppendEmptyValues(int64_t length) {
return Status::OK();
}
-Status MapBuilder::AdjustStructBuilderLength() {
- // If key/item builders have been appended, adjust struct builder length
- // to match. Struct and key are non-nullable, append all valid values.
- auto struct_builder =
- internal::checked_cast<StructBuilder*>(list_builder_->value_builder());
- if (struct_builder->length() < key_builder_->length()) {
- int64_t length_diff = key_builder_->length() - struct_builder->length();
- RETURN_NOT_OK(struct_builder->AppendValues(length_diff, NULLPTR));
- }
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// FixedSizeListBuilder
-
-FixedSizeListBuilder::FixedSizeListBuilder(
- MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
- const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool),
- value_field_(type->field(0)),
- list_size_(
- internal::checked_cast<const FixedSizeListType*>(type.get())->list_size()),
- value_builder_(value_builder) {}
-
-FixedSizeListBuilder::FixedSizeListBuilder(
- MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
- int32_t list_size)
- : FixedSizeListBuilder(pool, value_builder,
- fixed_size_list(value_builder->type(), list_size)) {}
-
-void FixedSizeListBuilder::Reset() {
- ArrayBuilder::Reset();
- value_builder_->Reset();
-}
-
-Status FixedSizeListBuilder::Append() {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(true);
- return Status::OK();
-}
-
-Status FixedSizeListBuilder::AppendValues(int64_t length, const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- return Status::OK();
-}
-
-Status FixedSizeListBuilder::AppendNull() {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(false);
- return value_builder_->AppendNulls(list_size_);
-}
-
-Status FixedSizeListBuilder::AppendNulls(int64_t length) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(length, false);
- return value_builder_->AppendNulls(list_size_ * length);
-}
-
-Status FixedSizeListBuilder::ValidateOverflow(int64_t new_elements) {
- auto new_length = value_builder_->length() + new_elements;
- if (new_elements != list_size_) {
- return Status::Invalid("Length of item not correct: expected ", list_size_,
- " but got array of size ", new_elements);
- }
- if (new_length > maximum_elements()) {
- return Status::CapacityError("array cannot contain more than ", maximum_elements(),
- " elements, have ", new_elements);
- }
- return Status::OK();
-}
-
+Status MapBuilder::AdjustStructBuilderLength() {
+ // If key/item builders have been appended, adjust struct builder length
+ // to match. Struct and key are non-nullable, append all valid values.
+ auto struct_builder =
+ internal::checked_cast<StructBuilder*>(list_builder_->value_builder());
+ if (struct_builder->length() < key_builder_->length()) {
+ int64_t length_diff = key_builder_->length() - struct_builder->length();
+ RETURN_NOT_OK(struct_builder->AppendValues(length_diff, NULLPTR));
+ }
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// FixedSizeListBuilder
+
+FixedSizeListBuilder::FixedSizeListBuilder(
+ MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
+ const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool),
+ value_field_(type->field(0)),
+ list_size_(
+ internal::checked_cast<const FixedSizeListType*>(type.get())->list_size()),
+ value_builder_(value_builder) {}
+
+FixedSizeListBuilder::FixedSizeListBuilder(
+ MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& value_builder,
+ int32_t list_size)
+ : FixedSizeListBuilder(pool, value_builder,
+ fixed_size_list(value_builder->type(), list_size)) {}
+
+void FixedSizeListBuilder::Reset() {
+ ArrayBuilder::Reset();
+ value_builder_->Reset();
+}
+
+Status FixedSizeListBuilder::Append() {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(true);
+ return Status::OK();
+}
+
+Status FixedSizeListBuilder::AppendValues(int64_t length, const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return Status::OK();
+}
+
+Status FixedSizeListBuilder::AppendNull() {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(false);
+ return value_builder_->AppendNulls(list_size_);
+}
+
+Status FixedSizeListBuilder::AppendNulls(int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(length, false);
+ return value_builder_->AppendNulls(list_size_ * length);
+}
+
+Status FixedSizeListBuilder::ValidateOverflow(int64_t new_elements) {
+ auto new_length = value_builder_->length() + new_elements;
+ if (new_elements != list_size_) {
+ return Status::Invalid("Length of item not correct: expected ", list_size_,
+ " but got array of size ", new_elements);
+ }
+ if (new_length > maximum_elements()) {
+ return Status::CapacityError("array cannot contain more than ", maximum_elements(),
+ " elements, have ", new_elements);
+ }
+ return Status::OK();
+}
+
Status FixedSizeListBuilder::AppendEmptyValue() {
RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(true);
@@ -225,70 +225,70 @@ Status FixedSizeListBuilder::AppendEmptyValues(int64_t length) {
return value_builder_->AppendEmptyValues(list_size_ * length);
}
-Status FixedSizeListBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status FixedSizeListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<ArrayData> items;
-
- if (value_builder_->length() == 0) {
- // Try to make sure we get a non-null values buffer (ARROW-2744)
- RETURN_NOT_OK(value_builder_->Resize(0));
- }
- RETURN_NOT_OK(value_builder_->FinishInternal(&items));
-
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
- *out = ArrayData::Make(type(), length_, {null_bitmap}, {std::move(items)}, null_count_);
- Reset();
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// Struct
-
-StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
- std::vector<std::shared_ptr<ArrayBuilder>> field_builders)
- : ArrayBuilder(pool), type_(type) {
- children_ = std::move(field_builders);
-}
-
-void StructBuilder::Reset() {
- ArrayBuilder::Reset();
- for (const auto& field_builder : children_) {
- field_builder->Reset();
- }
-}
-
-Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
- std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
- for (size_t i = 0; i < children_.size(); ++i) {
- if (length_ == 0) {
- // Try to make sure the child buffers are initialized
- RETURN_NOT_OK(children_[i]->Resize(0));
- }
- RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
- }
-
- *out = ArrayData::Make(type(), length_, {null_bitmap}, null_count_);
- (*out)->child_data = std::move(child_data);
-
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-std::shared_ptr<DataType> StructBuilder::type() const {
- DCHECK_EQ(type_->fields().size(), children_.size());
- std::vector<std::shared_ptr<Field>> fields(children_.size());
- for (int i = 0; i < static_cast<int>(fields.size()); ++i) {
- fields[i] = type_->field(i)->WithType(children_[i]->type());
- }
- return struct_(std::move(fields));
-}
-
-} // namespace arrow
+Status FixedSizeListBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status FixedSizeListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<ArrayData> items;
+
+ if (value_builder_->length() == 0) {
+ // Try to make sure we get a non-null values buffer (ARROW-2744)
+ RETURN_NOT_OK(value_builder_->Resize(0));
+ }
+ RETURN_NOT_OK(value_builder_->FinishInternal(&items));
+
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ *out = ArrayData::Make(type(), length_, {null_bitmap}, {std::move(items)}, null_count_);
+ Reset();
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Struct
+
+StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+ std::vector<std::shared_ptr<ArrayBuilder>> field_builders)
+ : ArrayBuilder(pool), type_(type) {
+ children_ = std::move(field_builders);
+}
+
+void StructBuilder::Reset() {
+ ArrayBuilder::Reset();
+ for (const auto& field_builder : children_) {
+ field_builder->Reset();
+ }
+}
+
+Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+ std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
+ for (size_t i = 0; i < children_.size(); ++i) {
+ if (length_ == 0) {
+ // Try to make sure the child buffers are initialized
+ RETURN_NOT_OK(children_[i]->Resize(0));
+ }
+ RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
+ }
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap}, null_count_);
+ (*out)->child_data = std::move(child_data);
+
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+std::shared_ptr<DataType> StructBuilder::type() const {
+ DCHECK_EQ(type_->fields().size(), children_.size());
+ std::vector<std::shared_ptr<Field>> fields(children_.size());
+ for (int i = 0; i < static_cast<int>(fields.size()); ++i) {
+ fields[i] = type_->field(i)->WithType(children_[i]->type());
+ }
+ return struct_(std::move(fields));
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
index 12b999b786e..13c63ee01e3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
@@ -1,114 +1,114 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_nested.h"
-#include "arrow/array/builder_base.h"
-#include "arrow/array/data.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// List builder
-
-template <typename TYPE>
-class BaseListBuilder : public ArrayBuilder {
- public:
- using TypeClass = TYPE;
- using offset_type = typename TypeClass::offset_type;
-
- /// Use this constructor to incrementally build the value array along with offsets and
- /// null bitmap.
- BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
- const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool),
- offsets_builder_(pool),
- value_builder_(value_builder),
- value_field_(type->field(0)->WithType(NULLPTR)) {}
-
- BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder)
- : BaseListBuilder(pool, value_builder, list(value_builder->type())) {}
-
- Status Resize(int64_t capacity) override {
- if (capacity > maximum_elements()) {
- return Status::CapacityError("List array cannot reserve space for more than ",
- maximum_elements(), " got ", capacity);
- }
- ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
-
- // One more than requested for offsets
- ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
- return ArrayBuilder::Resize(capacity);
- }
-
- void Reset() override {
- ArrayBuilder::Reset();
- offsets_builder_.Reset();
- value_builder_->Reset();
- }
-
- /// \brief Vector append
- ///
- /// If passed, valid_bytes is of equal length to values, and any zero byte
- /// will be considered as a null for that slot
- Status AppendValues(const offset_type* offsets, int64_t length,
- const uint8_t* valid_bytes = NULLPTR) {
- ARROW_RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- offsets_builder_.UnsafeAppend(offsets, length);
- return Status::OK();
- }
-
- /// \brief Start a new variable-length list slot
- ///
- /// This function should be called before beginning to append elements to the
- /// value builder
- Status Append(bool is_valid = true) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(is_valid);
- return AppendNextOffset();
- }
-
- Status AppendNull() final { return Append(false); }
-
- Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(Reserve(length));
- ARROW_RETURN_NOT_OK(ValidateOverflow(0));
- UnsafeAppendToBitmap(length, false);
- const int64_t num_values = value_builder_->length();
- for (int64_t i = 0; i < length; ++i) {
- offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
- }
- return Status::OK();
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// List builder
+
+template <typename TYPE>
+class BaseListBuilder : public ArrayBuilder {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TypeClass::offset_type;
+
+ /// Use this constructor to incrementally build the value array along with offsets and
+ /// null bitmap.
+ BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
+ const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool),
+ offsets_builder_(pool),
+ value_builder_(value_builder),
+ value_field_(type->field(0)->WithType(NULLPTR)) {}
+
+ BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder)
+ : BaseListBuilder(pool, value_builder, list(value_builder->type())) {}
+
+ Status Resize(int64_t capacity) override {
+ if (capacity > maximum_elements()) {
+ return Status::CapacityError("List array cannot reserve space for more than ",
+ maximum_elements(), " got ", capacity);
+ }
+ ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+
+ // One more than requested for offsets
+ ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+ return ArrayBuilder::Resize(capacity);
+ }
+
+ void Reset() override {
+ ArrayBuilder::Reset();
+ offsets_builder_.Reset();
+ value_builder_->Reset();
+ }
+
+ /// \brief Vector append
+ ///
+ /// If passed, valid_bytes is of equal length to values, and any zero byte
+ /// will be considered as a null for that slot
+ Status AppendValues(const offset_type* offsets, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ offsets_builder_.UnsafeAppend(offsets, length);
+ return Status::OK();
+ }
+
+ /// \brief Start a new variable-length list slot
+ ///
+ /// This function should be called before beginning to append elements to the
+ /// value builder
+ Status Append(bool is_valid = true) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(is_valid);
+ return AppendNextOffset();
+ }
+
+ Status AppendNull() final { return Append(false); }
+
+ Status AppendNulls(int64_t length) final {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+ UnsafeAppendToBitmap(length, false);
+ const int64_t num_values = value_builder_->length();
+ for (int64_t i = 0; i < length; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+ }
+ return Status::OK();
+ }
+
Status AppendEmptyValue() final { return Append(true); }
Status AppendEmptyValues(int64_t length) final {
@@ -122,326 +122,326 @@ class BaseListBuilder : public ArrayBuilder {
return Status::OK();
}
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
- ARROW_RETURN_NOT_OK(AppendNextOffset());
-
- // Offset padding zeroed by BufferBuilder
- std::shared_ptr<Buffer> offsets, null_bitmap;
- ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
- ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
- if (value_builder_->length() == 0) {
- // Try to make sure we get a non-null values buffer (ARROW-2744)
- ARROW_RETURN_NOT_OK(value_builder_->Resize(0));
- }
-
- std::shared_ptr<ArrayData> items;
- ARROW_RETURN_NOT_OK(value_builder_->FinishInternal(&items));
-
- *out = ArrayData::Make(type(), length_, {null_bitmap, offsets}, {std::move(items)},
- null_count_);
- Reset();
- return Status::OK();
- }
-
- Status ValidateOverflow(int64_t new_elements) const {
- auto new_length = value_builder_->length() + new_elements;
- if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
- return Status::CapacityError("List array cannot contain more than ",
- maximum_elements(), " elements, have ", new_elements);
- } else {
- return Status::OK();
- }
- }
-
- ArrayBuilder* value_builder() const { return value_builder_.get(); }
-
- // Cannot make this a static attribute because of linking issues
- static constexpr int64_t maximum_elements() {
- return std::numeric_limits<offset_type>::max() - 1;
- }
-
- std::shared_ptr<DataType> type() const override {
- return std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
- }
-
- protected:
- TypedBufferBuilder<offset_type> offsets_builder_;
- std::shared_ptr<ArrayBuilder> value_builder_;
- std::shared_ptr<Field> value_field_;
-
- Status AppendNextOffset() {
- ARROW_RETURN_NOT_OK(ValidateOverflow(0));
- const int64_t num_values = value_builder_->length();
- return offsets_builder_.Append(static_cast<offset_type>(num_values));
- }
-};
-
-/// \class ListBuilder
-/// \brief Builder class for variable-length list array value types
-///
-/// To use this class, you must append values to the child array builder and use
-/// the Append function to delimit each distinct list value (once the values
-/// have been appended to the child array) or use the bulk API to append
-/// a sequence of offsets and null values.
-///
-/// A note on types. Per arrow/type.h all types in the c++ implementation are
-/// logical so even though this class always builds list array, this can
-/// represent multiple different logical types. If no logical type is provided
-/// at construction time, the class defaults to List<T> where t is taken from the
-/// value_builder/values that the object is constructed with.
-class ARROW_EXPORT ListBuilder : public BaseListBuilder<ListType> {
- public:
- using BaseListBuilder::BaseListBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<ListArray>* out) { return FinishTyped(out); }
-};
-
-/// \class LargeListBuilder
-/// \brief Builder class for large variable-length list array value types
-///
-/// Like ListBuilder, but to create large list arrays (with 64-bit offsets).
-class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
- public:
- using BaseListBuilder::BaseListBuilder;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<LargeListArray>* out) { return FinishTyped(out); }
-};
-
-// ----------------------------------------------------------------------
-// Map builder
-
-/// \class MapBuilder
-/// \brief Builder class for arrays of variable-size maps
-///
-/// To use this class, you must append values to the key and item array builders
-/// and use the Append function to delimit each distinct map (once the keys and items
-/// have been appended) or use the bulk API to append a sequence of offsets and null
-/// maps.
-///
-/// Key uniqueness and ordering are not validated.
-class ARROW_EXPORT MapBuilder : public ArrayBuilder {
- public:
- /// Use this constructor to define the built array's type explicitly. If key_builder
- /// or item_builder has indeterminate type, this builder will also.
- MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
- const std::shared_ptr<ArrayBuilder>& item_builder,
- const std::shared_ptr<DataType>& type);
-
- /// Use this constructor to infer the built array's type. If key_builder or
- /// item_builder has indeterminate type, this builder will also.
- MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
- const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);
-
- MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
- const std::shared_ptr<DataType>& type);
-
- Status Resize(int64_t capacity) override;
- void Reset() override;
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<MapArray>* out) { return FinishTyped(out); }
-
- /// \brief Vector append
- ///
- /// If passed, valid_bytes is of equal length to values, and any zero byte
- /// will be considered as a null for that slot
- Status AppendValues(const int32_t* offsets, int64_t length,
- const uint8_t* valid_bytes = NULLPTR);
-
- /// \brief Start a new variable-length map slot
- ///
- /// This function should be called before beginning to append elements to the
- /// key and item builders
- Status Append();
-
- Status AppendNull() final;
-
- Status AppendNulls(int64_t length) final;
-
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+ ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+ // Offset padding zeroed by BufferBuilder
+ std::shared_ptr<Buffer> offsets, null_bitmap;
+ ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+ ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+ if (value_builder_->length() == 0) {
+ // Try to make sure we get a non-null values buffer (ARROW-2744)
+ ARROW_RETURN_NOT_OK(value_builder_->Resize(0));
+ }
+
+ std::shared_ptr<ArrayData> items;
+ ARROW_RETURN_NOT_OK(value_builder_->FinishInternal(&items));
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap, offsets}, {std::move(items)},
+ null_count_);
+ Reset();
+ return Status::OK();
+ }
+
+ Status ValidateOverflow(int64_t new_elements) const {
+ auto new_length = value_builder_->length() + new_elements;
+ if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
+ return Status::CapacityError("List array cannot contain more than ",
+ maximum_elements(), " elements, have ", new_elements);
+ } else {
+ return Status::OK();
+ }
+ }
+
+ ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+ // Cannot make this a static attribute because of linking issues
+ static constexpr int64_t maximum_elements() {
+ return std::numeric_limits<offset_type>::max() - 1;
+ }
+
+ std::shared_ptr<DataType> type() const override {
+ return std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
+ }
+
+ protected:
+ TypedBufferBuilder<offset_type> offsets_builder_;
+ std::shared_ptr<ArrayBuilder> value_builder_;
+ std::shared_ptr<Field> value_field_;
+
+ Status AppendNextOffset() {
+ ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+ const int64_t num_values = value_builder_->length();
+ return offsets_builder_.Append(static_cast<offset_type>(num_values));
+ }
+};
+
+/// \class ListBuilder
+/// \brief Builder class for variable-length list array value types
+///
+/// To use this class, you must append values to the child array builder and use
+/// the Append function to delimit each distinct list value (once the values
+/// have been appended to the child array) or use the bulk API to append
+/// a sequence of offsets and null values.
+///
+/// A note on types. Per arrow/type.h all types in the c++ implementation are
+/// logical so even though this class always builds list array, this can
+/// represent multiple different logical types. If no logical type is provided
+/// at construction time, the class defaults to List<T> where t is taken from the
+/// value_builder/values that the object is constructed with.
+class ARROW_EXPORT ListBuilder : public BaseListBuilder<ListType> {
+ public:
+ using BaseListBuilder::BaseListBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<ListArray>* out) { return FinishTyped(out); }
+};
+
+/// \class LargeListBuilder
+/// \brief Builder class for large variable-length list array value types
+///
+/// Like ListBuilder, but to create large list arrays (with 64-bit offsets).
+class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
+ public:
+ using BaseListBuilder::BaseListBuilder;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<LargeListArray>* out) { return FinishTyped(out); }
+};
+
+// ----------------------------------------------------------------------
+// Map builder
+
+/// \class MapBuilder
+/// \brief Builder class for arrays of variable-size maps
+///
+/// To use this class, you must append values to the key and item array builders
+/// and use the Append function to delimit each distinct map (once the keys and items
+/// have been appended) or use the bulk API to append a sequence of offsets and null
+/// maps.
+///
+/// Key uniqueness and ordering are not validated.
+class ARROW_EXPORT MapBuilder : public ArrayBuilder {
+ public:
+ /// Use this constructor to define the built array's type explicitly. If key_builder
+ /// or item_builder has indeterminate type, this builder will also.
+ MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+ const std::shared_ptr<ArrayBuilder>& item_builder,
+ const std::shared_ptr<DataType>& type);
+
+ /// Use this constructor to infer the built array's type. If key_builder or
+ /// item_builder has indeterminate type, this builder will also.
+ MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+ const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);
+
+ MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
+ const std::shared_ptr<DataType>& type);
+
+ Status Resize(int64_t capacity) override;
+ void Reset() override;
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<MapArray>* out) { return FinishTyped(out); }
+
+ /// \brief Vector append
+ ///
+ /// If passed, valid_bytes is of equal length to values, and any zero byte
+ /// will be considered as a null for that slot
+ Status AppendValues(const int32_t* offsets, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR);
+
+ /// \brief Start a new variable-length map slot
+ ///
+ /// This function should be called before beginning to append elements to the
+ /// key and item builders
+ Status Append();
+
+ Status AppendNull() final;
+
+ Status AppendNulls(int64_t length) final;
+
Status AppendEmptyValue() final;
Status AppendEmptyValues(int64_t length) final;
- /// \brief Get builder to append keys.
- ///
- /// Append a key with this builder should be followed by appending
- /// an item or null value with item_builder().
- ArrayBuilder* key_builder() const { return key_builder_.get(); }
-
- /// \brief Get builder to append items
- ///
- /// Appending an item with this builder should have been preceded
- /// by appending a key with key_builder().
- ArrayBuilder* item_builder() const { return item_builder_.get(); }
-
- /// \brief Get builder to add Map entries as struct values.
- ///
- /// This is used instead of key_builder()/item_builder() and allows
- /// the Map to be built as a list of struct values.
- ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }
-
- std::shared_ptr<DataType> type() const override {
- return map(key_builder_->type(), item_builder_->type(), keys_sorted_);
- }
-
- Status ValidateOverflow(int64_t new_elements) {
- return list_builder_->ValidateOverflow(new_elements);
- }
-
- protected:
- inline Status AdjustStructBuilderLength();
-
- protected:
- bool keys_sorted_ = false;
- std::shared_ptr<ListBuilder> list_builder_;
- std::shared_ptr<ArrayBuilder> key_builder_;
- std::shared_ptr<ArrayBuilder> item_builder_;
-};
-
-// ----------------------------------------------------------------------
-// FixedSizeList builder
-
-/// \class FixedSizeListBuilder
-/// \brief Builder class for fixed-length list array value types
-class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
- public:
- /// Use this constructor to define the built array's type explicitly. If value_builder
- /// has indeterminate type, this builder will also.
- FixedSizeListBuilder(MemoryPool* pool,
- std::shared_ptr<ArrayBuilder> const& value_builder,
- int32_t list_size);
-
- /// Use this constructor to infer the built array's type. If value_builder has
- /// indeterminate type, this builder will also.
- FixedSizeListBuilder(MemoryPool* pool,
- std::shared_ptr<ArrayBuilder> const& value_builder,
- const std::shared_ptr<DataType>& type);
-
- Status Resize(int64_t capacity) override;
- void Reset() override;
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<FixedSizeListArray>* out) { return FinishTyped(out); }
-
- /// \brief Append a valid fixed length list.
- ///
- /// This function affects only the validity bitmap; the child values must be appended
- /// using the child array builder.
- Status Append();
-
- /// \brief Vector append
- ///
- /// If passed, valid_bytes wil be read and any zero byte
- /// will cause the corresponding slot to be null
- ///
- /// This function affects only the validity bitmap; the child values must be appended
- /// using the child array builder. This includes appending nulls for null lists.
- /// XXX this restriction is confusing, should this method be omitted?
- Status AppendValues(int64_t length, const uint8_t* valid_bytes = NULLPTR);
-
- /// \brief Append a null fixed length list.
- ///
- /// The child array builder will have the appropriate number of nulls appended
- /// automatically.
- Status AppendNull() final;
-
- /// \brief Append length null fixed length lists.
- ///
- /// The child array builder will have the appropriate number of nulls appended
- /// automatically.
- Status AppendNulls(int64_t length) final;
-
- Status ValidateOverflow(int64_t new_elements);
-
+ /// \brief Get builder to append keys.
+ ///
+ /// Append a key with this builder should be followed by appending
+ /// an item or null value with item_builder().
+ ArrayBuilder* key_builder() const { return key_builder_.get(); }
+
+ /// \brief Get builder to append items
+ ///
+ /// Appending an item with this builder should have been preceded
+ /// by appending a key with key_builder().
+ ArrayBuilder* item_builder() const { return item_builder_.get(); }
+
+ /// \brief Get builder to add Map entries as struct values.
+ ///
+ /// This is used instead of key_builder()/item_builder() and allows
+ /// the Map to be built as a list of struct values.
+ ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }
+
+ std::shared_ptr<DataType> type() const override {
+ return map(key_builder_->type(), item_builder_->type(), keys_sorted_);
+ }
+
+ Status ValidateOverflow(int64_t new_elements) {
+ return list_builder_->ValidateOverflow(new_elements);
+ }
+
+ protected:
+ inline Status AdjustStructBuilderLength();
+
+ protected:
+ bool keys_sorted_ = false;
+ std::shared_ptr<ListBuilder> list_builder_;
+ std::shared_ptr<ArrayBuilder> key_builder_;
+ std::shared_ptr<ArrayBuilder> item_builder_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeList builder
+
+/// \class FixedSizeListBuilder
+/// \brief Builder class for fixed-length list array value types
+class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
+ public:
+ /// Use this constructor to define the built array's type explicitly. If value_builder
+ /// has indeterminate type, this builder will also.
+ FixedSizeListBuilder(MemoryPool* pool,
+ std::shared_ptr<ArrayBuilder> const& value_builder,
+ int32_t list_size);
+
+ /// Use this constructor to infer the built array's type. If value_builder has
+ /// indeterminate type, this builder will also.
+ FixedSizeListBuilder(MemoryPool* pool,
+ std::shared_ptr<ArrayBuilder> const& value_builder,
+ const std::shared_ptr<DataType>& type);
+
+ Status Resize(int64_t capacity) override;
+ void Reset() override;
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<FixedSizeListArray>* out) { return FinishTyped(out); }
+
+ /// \brief Append a valid fixed length list.
+ ///
+ /// This function affects only the validity bitmap; the child values must be appended
+ /// using the child array builder.
+ Status Append();
+
+ /// \brief Vector append
+ ///
+ /// If passed, valid_bytes wil be read and any zero byte
+ /// will cause the corresponding slot to be null
+ ///
+ /// This function affects only the validity bitmap; the child values must be appended
+ /// using the child array builder. This includes appending nulls for null lists.
+ /// XXX this restriction is confusing, should this method be omitted?
+ Status AppendValues(int64_t length, const uint8_t* valid_bytes = NULLPTR);
+
+ /// \brief Append a null fixed length list.
+ ///
+ /// The child array builder will have the appropriate number of nulls appended
+ /// automatically.
+ Status AppendNull() final;
+
+ /// \brief Append length null fixed length lists.
+ ///
+ /// The child array builder will have the appropriate number of nulls appended
+ /// automatically.
+ Status AppendNulls(int64_t length) final;
+
+ Status ValidateOverflow(int64_t new_elements);
+
Status AppendEmptyValue() final;
Status AppendEmptyValues(int64_t length) final;
- ArrayBuilder* value_builder() const { return value_builder_.get(); }
-
- std::shared_ptr<DataType> type() const override {
- return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
- }
-
- // Cannot make this a static attribute because of linking issues
- static constexpr int64_t maximum_elements() {
- return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
- }
-
- protected:
- std::shared_ptr<Field> value_field_;
- const int32_t list_size_;
- std::shared_ptr<ArrayBuilder> value_builder_;
-};
-
-// ----------------------------------------------------------------------
-// Struct
-
-// ---------------------------------------------------------------------------------
-// StructArray builder
-/// Append, Resize and Reserve methods are acting on StructBuilder.
-/// Please make sure all these methods of all child-builders' are consistently
-/// called to maintain data-structure consistency.
-class ARROW_EXPORT StructBuilder : public ArrayBuilder {
- public:
- /// If any of field_builders has indeterminate type, this builder will also
- StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
- std::vector<std::shared_ptr<ArrayBuilder>> field_builders);
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<StructArray>* out) { return FinishTyped(out); }
-
- /// Null bitmap is of equal length to every child field, and any zero byte
- /// will be considered as a null for that field, but users must using app-
- /// end methods or advance methods of the child builders' independently to
- /// insert data.
- Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
- ARROW_RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- return Status::OK();
- }
-
- /// Append an element to the Struct. All child-builders' Append method must
- /// be called independently to maintain data-structure consistency.
- Status Append(bool is_valid = true) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(is_valid);
- return Status::OK();
- }
-
+ ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+ std::shared_ptr<DataType> type() const override {
+ return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
+ }
+
+ // Cannot make this a static attribute because of linking issues
+ static constexpr int64_t maximum_elements() {
+ return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
+ }
+
+ protected:
+ std::shared_ptr<Field> value_field_;
+ const int32_t list_size_;
+ std::shared_ptr<ArrayBuilder> value_builder_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+// ---------------------------------------------------------------------------------
+// StructArray builder
+/// Append, Resize and Reserve methods are acting on StructBuilder.
+/// Please make sure all these methods of all child-builders' are consistently
+/// called to maintain data-structure consistency.
+class ARROW_EXPORT StructBuilder : public ArrayBuilder {
+ public:
+ /// If any of field_builders has indeterminate type, this builder will also
+ StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+ std::vector<std::shared_ptr<ArrayBuilder>> field_builders);
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<StructArray>* out) { return FinishTyped(out); }
+
+ /// Null bitmap is of equal length to every child field, and any zero byte
+ /// will be considered as a null for that field, but users must using app-
+ /// end methods or advance methods of the child builders' independently to
+ /// insert data.
+ Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return Status::OK();
+ }
+
+ /// Append an element to the Struct. All child-builders' Append method must
+ /// be called independently to maintain data-structure consistency.
+ Status Append(bool is_valid = true) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(is_valid);
+ return Status::OK();
+ }
+
/// \brief Append a null value. Automatically appends an empty value to each child
- /// builder.
- Status AppendNull() final {
- for (const auto& field : children_) {
+ /// builder.
+ Status AppendNull() final {
+ for (const auto& field : children_) {
ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
- }
- return Append(false);
- }
-
+ }
+ return Append(false);
+ }
+
/// \brief Append multiple null values. Automatically appends empty values to each
- /// child builder.
+ /// child builder.
Status AppendNulls(int64_t length) final {
for (const auto& field : children_) {
ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
@@ -450,7 +450,7 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
UnsafeAppendToBitmap(length, false);
return Status::OK();
}
-
+
Status AppendEmptyValue() final {
for (const auto& field : children_) {
ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
@@ -467,16 +467,16 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}
- void Reset() override;
-
- ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
-
- int num_fields() const { return static_cast<int>(children_.size()); }
-
- std::shared_ptr<DataType> type() const override;
-
- private:
- std::shared_ptr<DataType> type_;
-};
-
-} // namespace arrow
+ void Reset() override;
+
+ ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
+
+ int num_fields() const { return static_cast<int>(children_.size()); }
+
+ std::shared_ptr<DataType> type() const override;
+
+ private:
+ std::shared_ptr<DataType> type_;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
index e403c42411d..7720a69b036 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
@@ -1,137 +1,137 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_primitive.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/int_util.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Null builder
-
-Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- *out = ArrayData::Make(null(), length_, {nullptr}, length_);
- length_ = null_count_ = 0;
- return Status::OK();
-}
-
-BooleanBuilder::BooleanBuilder(MemoryPool* pool)
- : ArrayBuilder(pool), data_builder_(pool) {}
-
-BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
- : BooleanBuilder(pool) {
- ARROW_CHECK_EQ(Type::BOOL, type->id());
-}
-
-void BooleanBuilder::Reset() {
- ArrayBuilder::Reset();
- data_builder_.Reset();
-}
-
-Status BooleanBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity));
- capacity = std::max(capacity, kMinBuilderCapacity);
- RETURN_NOT_OK(data_builder_.Resize(capacity));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_primitive.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Null builder
+
+Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ *out = ArrayData::Make(null(), length_, {nullptr}, length_);
+ length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+BooleanBuilder::BooleanBuilder(MemoryPool* pool)
+ : ArrayBuilder(pool), data_builder_(pool) {}
+
+BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+ : BooleanBuilder(pool) {
+ ARROW_CHECK_EQ(Type::BOOL, type->id());
+}
+
+void BooleanBuilder::Reset() {
+ ArrayBuilder::Reset();
+ data_builder_.Reset();
+}
+
+Status BooleanBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+ RETURN_NOT_OK(data_builder_.Resize(capacity));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
-
- *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
-
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
-
- int64_t i = 0;
- data_builder_.UnsafeAppend<false>(length,
- [values, &i]() -> bool { return values[i++] != 0; });
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
- return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
- const std::vector<bool>& is_valid) {
- RETURN_NOT_OK(Reserve(length));
- DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
- int64_t i = 0;
- data_builder_.UnsafeAppend<false>(length,
- [values, &i]() -> bool { return values[i++]; });
- ArrayBuilder::UnsafeAppendToBitmap(is_valid);
- return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
- const std::vector<bool>& is_valid) {
- return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
- return AppendValues(values.data(), static_cast<int64_t>(values.size()));
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
- const std::vector<bool>& is_valid) {
- const int64_t length = static_cast<int64_t>(values.size());
- RETURN_NOT_OK(Reserve(length));
- DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
- int64_t i = 0;
- data_builder_.UnsafeAppend<false>(length,
- [&values, &i]() -> bool { return values[i++]; });
- ArrayBuilder::UnsafeAppendToBitmap(is_valid);
- return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
- const int64_t length = static_cast<int64_t>(values.size());
- RETURN_NOT_OK(Reserve(length));
- int64_t i = 0;
- data_builder_.UnsafeAppend<false>(length,
- [&values, &i]() -> bool { return values[i++]; });
- ArrayBuilder::UnsafeSetNotNull(length);
- return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(int64_t length, bool value) {
- RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(length, value);
- ArrayBuilder::UnsafeSetNotNull(length);
- return Status::OK();
-}
-
-} // namespace arrow
+
+ *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
+
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+
+ int64_t i = 0;
+ data_builder_.UnsafeAppend<false>(length,
+ [values, &i]() -> bool { return values[i++] != 0; });
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+ return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
+ const std::vector<bool>& is_valid) {
+ RETURN_NOT_OK(Reserve(length));
+ DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
+ int64_t i = 0;
+ data_builder_.UnsafeAppend<false>(length,
+ [values, &i]() -> bool { return values[i++]; });
+ ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+ return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
+ const std::vector<bool>& is_valid) {
+ return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
+ return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
+ const std::vector<bool>& is_valid) {
+ const int64_t length = static_cast<int64_t>(values.size());
+ RETURN_NOT_OK(Reserve(length));
+ DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
+ int64_t i = 0;
+ data_builder_.UnsafeAppend<false>(length,
+ [&values, &i]() -> bool { return values[i++]; });
+ ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+ return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
+ const int64_t length = static_cast<int64_t>(values.size());
+ RETURN_NOT_OK(Reserve(length));
+ int64_t i = 0;
+ data_builder_.UnsafeAppend<false>(length,
+ [&values, &i]() -> bool { return values[i++]; });
+ ArrayBuilder::UnsafeSetNotNull(length);
+ return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(int64_t length, bool value) {
+ RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(length, value);
+ ArrayBuilder::UnsafeSetNotNull(length);
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
index 80cfc4061bb..a0337047236 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
@@ -1,110 +1,110 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <memory>
-#include <vector>
-
-#include "arrow/array/builder_base.h"
-#include "arrow/array/data.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
#include "arrow/result.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-
-namespace arrow {
-
-class ARROW_EXPORT NullBuilder : public ArrayBuilder {
- public:
- explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+
+namespace arrow {
+
+class ARROW_EXPORT NullBuilder : public ArrayBuilder {
+ public:
+ explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
explicit NullBuilder(const std::shared_ptr<DataType>& /*type*/,
- MemoryPool* pool = default_memory_pool())
- : NullBuilder(pool) {}
-
- /// \brief Append the specified number of null elements
- Status AppendNulls(int64_t length) final {
- if (length < 0) return Status::Invalid("length must be positive");
- null_count_ += length;
- length_ += length;
- return Status::OK();
- }
-
- /// \brief Append a single null element
- Status AppendNull() final { return AppendNulls(1); }
-
+ MemoryPool* pool = default_memory_pool())
+ : NullBuilder(pool) {}
+
+ /// \brief Append the specified number of null elements
+ Status AppendNulls(int64_t length) final {
+ if (length < 0) return Status::Invalid("length must be positive");
+ null_count_ += length;
+ length_ += length;
+ return Status::OK();
+ }
+
+ /// \brief Append a single null element
+ Status AppendNull() final { return AppendNulls(1); }
+
Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); }
Status AppendEmptyValue() final { return AppendEmptyValues(1); }
- Status Append(std::nullptr_t) { return AppendNull(); }
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- std::shared_ptr<DataType> type() const override { return null(); }
-
- Status Finish(std::shared_ptr<NullArray>* out) { return FinishTyped(out); }
-};
-
-/// Base class for all Builders that emit an Array of a scalar numerical type.
-template <typename T>
-class NumericBuilder : public ArrayBuilder {
- public:
- using TypeClass = T;
- using value_type = typename T::c_type;
- using ArrayType = typename TypeTraits<T>::ArrayType;
-
- template <typename T1 = T>
- explicit NumericBuilder(
- enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
- : ArrayBuilder(pool), type_(TypeTraits<T>::type_singleton()), data_builder_(pool) {}
-
- NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
- : ArrayBuilder(pool), type_(type), data_builder_(pool) {}
-
- /// Append a single scalar and increase the size if necessary.
- Status Append(const value_type val) {
- ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
- UnsafeAppend(val);
- return Status::OK();
- }
-
- /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
- /// The memory at the corresponding data slot is set to 0 to prevent
- /// uninitialized memory access
- Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(length, value_type{}); // zero
- UnsafeSetNull(length);
- return Status::OK();
- }
-
- /// \brief Append a single null element
- Status AppendNull() final {
- ARROW_RETURN_NOT_OK(Reserve(1));
- data_builder_.UnsafeAppend(value_type{}); // zero
- UnsafeAppendToBitmap(false);
- return Status::OK();
- }
-
+ Status Append(std::nullptr_t) { return AppendNull(); }
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ std::shared_ptr<DataType> type() const override { return null(); }
+
+ Status Finish(std::shared_ptr<NullArray>* out) { return FinishTyped(out); }
+};
+
+/// Base class for all Builders that emit an Array of a scalar numerical type.
+template <typename T>
+class NumericBuilder : public ArrayBuilder {
+ public:
+ using TypeClass = T;
+ using value_type = typename T::c_type;
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+
+ template <typename T1 = T>
+ explicit NumericBuilder(
+ enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+ : ArrayBuilder(pool), type_(TypeTraits<T>::type_singleton()), data_builder_(pool) {}
+
+ NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+ : ArrayBuilder(pool), type_(type), data_builder_(pool) {}
+
+ /// Append a single scalar and increase the size if necessary.
+ Status Append(const value_type val) {
+ ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
+ UnsafeAppend(val);
+ return Status::OK();
+ }
+
+ /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+ /// The memory at the corresponding data slot is set to 0 to prevent
+ /// uninitialized memory access
+ Status AppendNulls(int64_t length) final {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(length, value_type{}); // zero
+ UnsafeSetNull(length);
+ return Status::OK();
+ }
+
+ /// \brief Append a single null element
+ Status AppendNull() final {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ data_builder_.UnsafeAppend(value_type{}); // zero
+ UnsafeAppendToBitmap(false);
+ return Status::OK();
+ }
+
/// \brief Append a empty element
Status AppendEmptyValue() final {
ARROW_RETURN_NOT_OK(Reserve(1));
@@ -121,203 +121,203 @@ class NumericBuilder : public ArrayBuilder {
return Status::OK();
}
- value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
-
- void Reset() override { data_builder_.Reset(); }
-
- Status Resize(int64_t capacity) override {
- ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
- capacity = std::max(capacity, kMinBuilderCapacity);
- ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
- return ArrayBuilder::Resize(capacity);
- }
-
- value_type operator[](int64_t index) const { return GetValue(index); }
-
- value_type& operator[](int64_t index) {
- return reinterpret_cast<value_type*>(data_builder_.mutable_data())[index];
- }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous C array of values
- /// \param[in] length the number of values to append
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const value_type* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR) {
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(values, length);
- // length_ is update by these
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
- return Status::OK();
- }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous C array of values
- /// \param[in] length the number of values to append
- /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
- /// (0). Equal in length to values
- /// \return Status
- Status AppendValues(const value_type* values, int64_t length,
- const std::vector<bool>& is_valid) {
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(values, length);
- // length_ is update by these
- ArrayBuilder::UnsafeAppendToBitmap(is_valid);
- return Status::OK();
- }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a std::vector of values
- /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
- /// (0). Equal in length to values
- /// \return Status
- Status AppendValues(const std::vector<value_type>& values,
- const std::vector<bool>& is_valid) {
- return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
- }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a std::vector of values
- /// \return Status
- Status AppendValues(const std::vector<value_type>& values) {
- return AppendValues(values.data(), static_cast<int64_t>(values.size()));
- }
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+ value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
+
+ void Reset() override { data_builder_.Reset(); }
+
+ Status Resize(int64_t capacity) override {
+ ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+ ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
+ return ArrayBuilder::Resize(capacity);
+ }
+
+ value_type operator[](int64_t index) const { return GetValue(index); }
+
+ value_type& operator[](int64_t index) {
+ return reinterpret_cast<value_type*>(data_builder_.mutable_data())[index];
+ }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous C array of values
+ /// \param[in] length the number of values to append
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const value_type* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(values, length);
+ // length_ is update by these
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+ return Status::OK();
+ }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous C array of values
+ /// \param[in] length the number of values to append
+ /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+ /// (0). Equal in length to values
+ /// \return Status
+ Status AppendValues(const value_type* values, int64_t length,
+ const std::vector<bool>& is_valid) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(values, length);
+ // length_ is update by these
+ ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+ return Status::OK();
+ }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a std::vector of values
+ /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+ /// (0). Equal in length to values
+ /// \return Status
+ Status AppendValues(const std::vector<value_type>& values,
+ const std::vector<bool>& is_valid) {
+ return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+ }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a std::vector of values
+ /// \return Status
+ Status AppendValues(const std::vector<value_type>& values) {
+ return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+ }
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
null_bitmap_builder_.FinishWithLength(length_));
ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
- *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
- }
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<ArrayType>* out) { return FinishTyped(out); }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values_begin InputIterator to the beginning of the values
- /// \param[in] values_end InputIterator pointing to the end of the values
- /// \return Status
- template <typename ValuesIter>
- Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(values_begin, values_end);
- // this updates the length_
- UnsafeSetNotNull(length);
- return Status::OK();
- }
-
- /// \brief Append a sequence of elements in one shot, with a specified nullmap
- /// \param[in] values_begin InputIterator to the beginning of the values
- /// \param[in] values_end InputIterator pointing to the end of the values
- /// \param[in] valid_begin InputIterator with elements indication valid(1)
- /// or null(0) values.
- /// \return Status
- template <typename ValuesIter, typename ValidIter>
- enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
- ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
- static_assert(!internal::is_null_pointer<ValidIter>::value,
- "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
- "version instead");
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(values_begin, values_end);
- null_bitmap_builder_.UnsafeAppend<true>(
- length, [&valid_begin]() -> bool { return *valid_begin++; });
- length_ = null_bitmap_builder_.length();
- null_count_ = null_bitmap_builder_.false_count();
- return Status::OK();
- }
-
- // Same as above, with a pointer type ValidIter
- template <typename ValuesIter, typename ValidIter>
- enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
- ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(values_begin, values_end);
- // this updates the length_
- if (valid_begin == NULLPTR) {
- UnsafeSetNotNull(length);
- } else {
- null_bitmap_builder_.UnsafeAppend<true>(
- length, [&valid_begin]() -> bool { return *valid_begin++; });
- length_ = null_bitmap_builder_.length();
- null_count_ = null_bitmap_builder_.false_count();
- }
-
- return Status::OK();
- }
-
- /// Append a single scalar under the assumption that the underlying Buffer is
- /// large enough.
- ///
- /// This method does not capacity-check; make sure to call Reserve
- /// beforehand.
- void UnsafeAppend(const value_type val) {
- ArrayBuilder::UnsafeAppendToBitmap(true);
- data_builder_.UnsafeAppend(val);
- }
-
- void UnsafeAppendNull() {
- ArrayBuilder::UnsafeAppendToBitmap(false);
- data_builder_.UnsafeAppend(value_type{}); // zero
- }
-
- std::shared_ptr<DataType> type() const override { return type_; }
-
- protected:
- std::shared_ptr<DataType> type_;
- TypedBufferBuilder<value_type> data_builder_;
-};
-
-// Builders
-
-using UInt8Builder = NumericBuilder<UInt8Type>;
-using UInt16Builder = NumericBuilder<UInt16Type>;
-using UInt32Builder = NumericBuilder<UInt32Type>;
-using UInt64Builder = NumericBuilder<UInt64Type>;
-
-using Int8Builder = NumericBuilder<Int8Type>;
-using Int16Builder = NumericBuilder<Int16Type>;
-using Int32Builder = NumericBuilder<Int32Type>;
-using Int64Builder = NumericBuilder<Int64Type>;
-
-using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
-using FloatBuilder = NumericBuilder<FloatType>;
-using DoubleBuilder = NumericBuilder<DoubleType>;
-
-class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
- public:
- using TypeClass = BooleanType;
- using value_type = bool;
-
- explicit BooleanBuilder(MemoryPool* pool = default_memory_pool());
-
- BooleanBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool = default_memory_pool());
-
- /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
- Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend(length, false);
- UnsafeSetNull(length);
- return Status::OK();
- }
-
- Status AppendNull() final {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppendNull();
- return Status::OK();
- }
-
+ *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+ }
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<ArrayType>* out) { return FinishTyped(out); }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values_begin InputIterator to the beginning of the values
+ /// \param[in] values_end InputIterator pointing to the end of the values
+ /// \return Status
+ template <typename ValuesIter>
+ Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(values_begin, values_end);
+ // this updates the length_
+ UnsafeSetNotNull(length);
+ return Status::OK();
+ }
+
+ /// \brief Append a sequence of elements in one shot, with a specified nullmap
+ /// \param[in] values_begin InputIterator to the beginning of the values
+ /// \param[in] values_end InputIterator pointing to the end of the values
+ /// \param[in] valid_begin InputIterator with elements indication valid(1)
+ /// or null(0) values.
+ /// \return Status
+ template <typename ValuesIter, typename ValidIter>
+ enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+ ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+ static_assert(!internal::is_null_pointer<ValidIter>::value,
+ "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+ "version instead");
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(values_begin, values_end);
+ null_bitmap_builder_.UnsafeAppend<true>(
+ length, [&valid_begin]() -> bool { return *valid_begin++; });
+ length_ = null_bitmap_builder_.length();
+ null_count_ = null_bitmap_builder_.false_count();
+ return Status::OK();
+ }
+
+ // Same as above, with a pointer type ValidIter
+ template <typename ValuesIter, typename ValidIter>
+ enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+ ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(values_begin, values_end);
+ // this updates the length_
+ if (valid_begin == NULLPTR) {
+ UnsafeSetNotNull(length);
+ } else {
+ null_bitmap_builder_.UnsafeAppend<true>(
+ length, [&valid_begin]() -> bool { return *valid_begin++; });
+ length_ = null_bitmap_builder_.length();
+ null_count_ = null_bitmap_builder_.false_count();
+ }
+
+ return Status::OK();
+ }
+
+ /// Append a single scalar under the assumption that the underlying Buffer is
+ /// large enough.
+ ///
+ /// This method does not capacity-check; make sure to call Reserve
+ /// beforehand.
+ void UnsafeAppend(const value_type val) {
+ ArrayBuilder::UnsafeAppendToBitmap(true);
+ data_builder_.UnsafeAppend(val);
+ }
+
+ void UnsafeAppendNull() {
+ ArrayBuilder::UnsafeAppendToBitmap(false);
+ data_builder_.UnsafeAppend(value_type{}); // zero
+ }
+
+ std::shared_ptr<DataType> type() const override { return type_; }
+
+ protected:
+ std::shared_ptr<DataType> type_;
+ TypedBufferBuilder<value_type> data_builder_;
+};
+
+// Builders
+
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
+
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
+
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
+ public:
+ using TypeClass = BooleanType;
+ using value_type = bool;
+
+ explicit BooleanBuilder(MemoryPool* pool = default_memory_pool());
+
+ BooleanBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool = default_memory_pool());
+
+ /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+ Status AppendNulls(int64_t length) final {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend(length, false);
+ UnsafeSetNull(length);
+ return Status::OK();
+ }
+
+ Status AppendNull() final {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendNull();
+ return Status::OK();
+ }
+
Status AppendEmptyValue() final {
ARROW_RETURN_NOT_OK(Reserve(1));
data_builder_.UnsafeAppend(false);
@@ -332,148 +332,148 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
return Status::OK();
}
- /// Scalar append
- Status Append(const bool val) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(val);
- return Status::OK();
- }
-
- Status Append(const uint8_t val) { return Append(val != 0); }
-
- /// Scalar append, without checking for capacity
- void UnsafeAppend(const bool val) {
- data_builder_.UnsafeAppend(val);
- UnsafeAppendToBitmap(true);
- }
-
- void UnsafeAppendNull() {
- data_builder_.UnsafeAppend(false);
- UnsafeAppendToBitmap(false);
- }
-
- void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous array of bytes (non-zero is 1)
- /// \param[in] length the number of values to append
- /// \param[in] valid_bytes an optional sequence of bytes where non-zero
- /// indicates a valid (non-null) value
- /// \return Status
- Status AppendValues(const uint8_t* values, int64_t length,
- const uint8_t* valid_bytes = NULLPTR);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a contiguous C array of values
- /// \param[in] length the number of values to append
- /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
- /// (0). Equal in length to values
- /// \return Status
- Status AppendValues(const uint8_t* values, int64_t length,
- const std::vector<bool>& is_valid);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a std::vector of bytes
- /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
- /// (0). Equal in length to values
- /// \return Status
- Status AppendValues(const std::vector<uint8_t>& values,
- const std::vector<bool>& is_valid);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values a std::vector of bytes
- /// \return Status
- Status AppendValues(const std::vector<uint8_t>& values);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values an std::vector<bool> indicating true (1) or false
- /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
- /// (0). Equal in length to values
- /// \return Status
- Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values an std::vector<bool> indicating true (1) or false
- /// \return Status
- Status AppendValues(const std::vector<bool>& values);
-
- /// \brief Append a sequence of elements in one shot
- /// \param[in] values_begin InputIterator to the beginning of the values
- /// \param[in] values_end InputIterator pointing to the end of the values
- /// or null(0) values
- /// \return Status
- template <typename ValuesIter>
- Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend<false>(
- length, [&values_begin]() -> bool { return *values_begin++; });
- // this updates length_
- UnsafeSetNotNull(length);
- return Status::OK();
- }
-
- /// \brief Append a sequence of elements in one shot, with a specified nullmap
- /// \param[in] values_begin InputIterator to the beginning of the values
- /// \param[in] values_end InputIterator pointing to the end of the values
- /// \param[in] valid_begin InputIterator with elements indication valid(1)
- /// or null(0) values
- /// \return Status
- template <typename ValuesIter, typename ValidIter>
- enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
- ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
- static_assert(!internal::is_null_pointer<ValidIter>::value,
- "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
- "version instead");
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
-
- data_builder_.UnsafeAppend<false>(
- length, [&values_begin]() -> bool { return *values_begin++; });
- null_bitmap_builder_.UnsafeAppend<true>(
- length, [&valid_begin]() -> bool { return *valid_begin++; });
- length_ = null_bitmap_builder_.length();
- null_count_ = null_bitmap_builder_.false_count();
- return Status::OK();
- }
-
- // Same as above, for a pointer type ValidIter
- template <typename ValuesIter, typename ValidIter>
- enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
- ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
- int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
- ARROW_RETURN_NOT_OK(Reserve(length));
- data_builder_.UnsafeAppend<false>(
- length, [&values_begin]() -> bool { return *values_begin++; });
-
- if (valid_begin == NULLPTR) {
- UnsafeSetNotNull(length);
- } else {
- null_bitmap_builder_.UnsafeAppend<true>(
- length, [&valid_begin]() -> bool { return *valid_begin++; });
- }
- length_ = null_bitmap_builder_.length();
- null_count_ = null_bitmap_builder_.false_count();
- return Status::OK();
- }
-
- Status AppendValues(int64_t length, bool value);
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<BooleanArray>* out) { return FinishTyped(out); }
-
- void Reset() override;
- Status Resize(int64_t capacity) override;
-
- std::shared_ptr<DataType> type() const override { return boolean(); }
-
- protected:
- TypedBufferBuilder<bool> data_builder_;
-};
-
-} // namespace arrow
+ /// Scalar append
+ Status Append(const bool val) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(val);
+ return Status::OK();
+ }
+
+ Status Append(const uint8_t val) { return Append(val != 0); }
+
+ /// Scalar append, without checking for capacity
+ void UnsafeAppend(const bool val) {
+ data_builder_.UnsafeAppend(val);
+ UnsafeAppendToBitmap(true);
+ }
+
+ void UnsafeAppendNull() {
+ data_builder_.UnsafeAppend(false);
+ UnsafeAppendToBitmap(false);
+ }
+
+ void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous array of bytes (non-zero is 1)
+ /// \param[in] length the number of values to append
+ /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+ /// indicates a valid (non-null) value
+ /// \return Status
+ Status AppendValues(const uint8_t* values, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a contiguous C array of values
+ /// \param[in] length the number of values to append
+ /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+ /// (0). Equal in length to values
+ /// \return Status
+ Status AppendValues(const uint8_t* values, int64_t length,
+ const std::vector<bool>& is_valid);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a std::vector of bytes
+ /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+ /// (0). Equal in length to values
+ /// \return Status
+ Status AppendValues(const std::vector<uint8_t>& values,
+ const std::vector<bool>& is_valid);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values a std::vector of bytes
+ /// \return Status
+ Status AppendValues(const std::vector<uint8_t>& values);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values an std::vector<bool> indicating true (1) or false
+ /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+ /// (0). Equal in length to values
+ /// \return Status
+ Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values an std::vector<bool> indicating true (1) or false
+ /// \return Status
+ Status AppendValues(const std::vector<bool>& values);
+
+ /// \brief Append a sequence of elements in one shot
+ /// \param[in] values_begin InputIterator to the beginning of the values
+ /// \param[in] values_end InputIterator pointing to the end of the values
+ /// or null(0) values
+ /// \return Status
+ template <typename ValuesIter>
+ Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend<false>(
+ length, [&values_begin]() -> bool { return *values_begin++; });
+ // this updates length_
+ UnsafeSetNotNull(length);
+ return Status::OK();
+ }
+
+ /// \brief Append a sequence of elements in one shot, with a specified nullmap
+ /// \param[in] values_begin InputIterator to the beginning of the values
+ /// \param[in] values_end InputIterator pointing to the end of the values
+ /// \param[in] valid_begin InputIterator with elements indication valid(1)
+ /// or null(0) values
+ /// \return Status
+ template <typename ValuesIter, typename ValidIter>
+ enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+ ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+ static_assert(!internal::is_null_pointer<ValidIter>::value,
+ "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+ "version instead");
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+
+ data_builder_.UnsafeAppend<false>(
+ length, [&values_begin]() -> bool { return *values_begin++; });
+ null_bitmap_builder_.UnsafeAppend<true>(
+ length, [&valid_begin]() -> bool { return *valid_begin++; });
+ length_ = null_bitmap_builder_.length();
+ null_count_ = null_bitmap_builder_.false_count();
+ return Status::OK();
+ }
+
+ // Same as above, for a pointer type ValidIter
+ template <typename ValuesIter, typename ValidIter>
+ enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+ ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+ int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ data_builder_.UnsafeAppend<false>(
+ length, [&values_begin]() -> bool { return *values_begin++; });
+
+ if (valid_begin == NULLPTR) {
+ UnsafeSetNotNull(length);
+ } else {
+ null_bitmap_builder_.UnsafeAppend<true>(
+ length, [&valid_begin]() -> bool { return *valid_begin++; });
+ }
+ length_ = null_bitmap_builder_.length();
+ null_count_ = null_bitmap_builder_.false_count();
+ return Status::OK();
+ }
+
+ Status AppendValues(int64_t length, bool value);
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<BooleanArray>* out) { return FinishTyped(out); }
+
+ void Reset() override;
+ Status Resize(int64_t capacity) override;
+
+ std::shared_ptr<DataType> type() const override { return boolean(); }
+
+ protected:
+ TypedBufferBuilder<bool> data_builder_;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_time.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_time.h
index ccd11c22345..f24d6b8b2e7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_time.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_time.h
@@ -1,43 +1,43 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Contains declarations of time related Arrow builder types.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/array/builder_base.h"
-#include "arrow/array/builder_primitive.h"
-
-namespace arrow {
-
-// TODO(ARROW-7938): this class is untested
-
-class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
- public:
- using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
-
- explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool())
- : DayTimeIntervalBuilder(day_time_interval(), pool) {}
-
- explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
- MemoryPool* pool = default_memory_pool())
- : NumericBuilder<DayTimeIntervalType>(type, pool) {}
-};
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains declarations of time related Arrow builder types.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_primitive.h"
+
+namespace arrow {
+
+// TODO(ARROW-7938): this class is untested
+
+class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
+ public:
+ using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
+
+ explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool())
+ : DayTimeIntervalBuilder(day_time_interval(), pool) {}
+
+ explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
+ MemoryPool* pool = default_memory_pool())
+ : NumericBuilder<DayTimeIntervalType>(type, pool) {}
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
index 8617cb73fce..5ddf4920a1f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
@@ -1,121 +1,121 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/builder_union.h"
-
-#include <cstddef>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-
-Status BasicUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- int64_t length = types_builder_.length();
-
- std::shared_ptr<Buffer> types;
- RETURN_NOT_OK(types_builder_.Finish(&types));
-
- std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
- for (size_t i = 0; i < children_.size(); ++i) {
- RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
- }
-
- *out = ArrayData::Make(type(), length, {nullptr, types}, /*null_count=*/0);
- (*out)->child_data = std::move(child_data);
- return Status::OK();
-}
-
-Status DenseUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- ARROW_RETURN_NOT_OK(BasicUnionBuilder::FinishInternal(out));
- (*out)->buffers.resize(3);
- ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&(*out)->buffers[2]));
- return Status::OK();
-}
-
-BasicUnionBuilder::BasicUnionBuilder(
- MemoryPool* pool, const std::vector<std::shared_ptr<ArrayBuilder>>& children,
- const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool), child_fields_(children.size()), types_builder_(pool) {
- const auto& union_type = checked_cast<const UnionType&>(*type);
- mode_ = union_type.mode();
-
- DCHECK_EQ(children.size(), union_type.type_codes().size());
-
- type_codes_ = union_type.type_codes();
- children_ = children;
-
- type_id_to_children_.resize(union_type.max_type_code() + 1, nullptr);
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_union.h"
+
+#include <cstddef>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+Status BasicUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ int64_t length = types_builder_.length();
+
+ std::shared_ptr<Buffer> types;
+ RETURN_NOT_OK(types_builder_.Finish(&types));
+
+ std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
+ for (size_t i = 0; i < children_.size(); ++i) {
+ RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
+ }
+
+ *out = ArrayData::Make(type(), length, {nullptr, types}, /*null_count=*/0);
+ (*out)->child_data = std::move(child_data);
+ return Status::OK();
+}
+
+Status DenseUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ ARROW_RETURN_NOT_OK(BasicUnionBuilder::FinishInternal(out));
+ (*out)->buffers.resize(3);
+ ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&(*out)->buffers[2]));
+ return Status::OK();
+}
+
+BasicUnionBuilder::BasicUnionBuilder(
+ MemoryPool* pool, const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool), child_fields_(children.size()), types_builder_(pool) {
+ const auto& union_type = checked_cast<const UnionType&>(*type);
+ mode_ = union_type.mode();
+
+ DCHECK_EQ(children.size(), union_type.type_codes().size());
+
+ type_codes_ = union_type.type_codes();
+ children_ = children;
+
+ type_id_to_children_.resize(union_type.max_type_code() + 1, nullptr);
DCHECK_LE(
type_id_to_children_.size() - 1,
- static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
-
- for (size_t i = 0; i < children.size(); ++i) {
- child_fields_[i] = union_type.field(static_cast<int>(i));
-
- auto type_id = union_type.type_codes()[i];
- type_id_to_children_[type_id] = children[i].get();
- }
-}
-
-int8_t BasicUnionBuilder::AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
- const std::string& field_name) {
- children_.push_back(new_child);
- auto new_type_id = NextTypeId();
-
- type_id_to_children_[new_type_id] = new_child.get();
- child_fields_.push_back(field(field_name, nullptr));
- type_codes_.push_back(static_cast<int8_t>(new_type_id));
-
- return new_type_id;
-}
-
-std::shared_ptr<DataType> BasicUnionBuilder::type() const {
- std::vector<std::shared_ptr<Field>> child_fields(child_fields_.size());
- for (size_t i = 0; i < child_fields.size(); ++i) {
- child_fields[i] = child_fields_[i]->WithType(children_[i]->type());
- }
- return mode_ == UnionMode::SPARSE ? sparse_union(std::move(child_fields), type_codes_)
- : dense_union(std::move(child_fields), type_codes_);
-}
-
-int8_t BasicUnionBuilder::NextTypeId() {
- // Find type_id such that type_id_to_children_[type_id] == nullptr
- // and use that for the new child. Start searching at dense_type_id_
- // since type_id_to_children_ is densely packed up at least up to dense_type_id_
- for (; static_cast<size_t>(dense_type_id_) < type_id_to_children_.size();
- ++dense_type_id_) {
- if (type_id_to_children_[dense_type_id_] == nullptr) {
- return dense_type_id_++;
- }
- }
-
- DCHECK_LT(
- type_id_to_children_.size(),
- static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
-
- // type_id_to_children_ is already densely packed, so just append the new child
- type_id_to_children_.resize(type_id_to_children_.size() + 1);
- return dense_type_id_++;
-}
-
-} // namespace arrow
+ static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
+
+ for (size_t i = 0; i < children.size(); ++i) {
+ child_fields_[i] = union_type.field(static_cast<int>(i));
+
+ auto type_id = union_type.type_codes()[i];
+ type_id_to_children_[type_id] = children[i].get();
+ }
+}
+
+int8_t BasicUnionBuilder::AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+ const std::string& field_name) {
+ children_.push_back(new_child);
+ auto new_type_id = NextTypeId();
+
+ type_id_to_children_[new_type_id] = new_child.get();
+ child_fields_.push_back(field(field_name, nullptr));
+ type_codes_.push_back(static_cast<int8_t>(new_type_id));
+
+ return new_type_id;
+}
+
+std::shared_ptr<DataType> BasicUnionBuilder::type() const {
+ std::vector<std::shared_ptr<Field>> child_fields(child_fields_.size());
+ for (size_t i = 0; i < child_fields.size(); ++i) {
+ child_fields[i] = child_fields_[i]->WithType(children_[i]->type());
+ }
+ return mode_ == UnionMode::SPARSE ? sparse_union(std::move(child_fields), type_codes_)
+ : dense_union(std::move(child_fields), type_codes_);
+}
+
+int8_t BasicUnionBuilder::NextTypeId() {
+ // Find type_id such that type_id_to_children_[type_id] == nullptr
+ // and use that for the new child. Start searching at dense_type_id_
+ // since type_id_to_children_ is densely packed up at least up to dense_type_id_
+ for (; static_cast<size_t>(dense_type_id_) < type_id_to_children_.size();
+ ++dense_type_id_) {
+ if (type_id_to_children_[dense_type_id_] == nullptr) {
+ return dense_type_id_++;
+ }
+ }
+
+ DCHECK_LT(
+ type_id_to_children_.size(),
+ static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
+
+ // type_id_to_children_ is already densely packed, so just append the new child
+ type_id_to_children_.resize(type_id_to_children_.size() + 1);
+ return dense_type_id_++;
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
index 060be474fb8..86b60f2b26e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
@@ -1,122 +1,122 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array/array_nested.h"
-#include "arrow/array/builder_base.h"
-#include "arrow/array/data.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief Base class for union array builds.
-///
-/// Note that while we subclass ArrayBuilder, as union types do not have a
-/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
-class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
- public:
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- /// \cond FALSE
- using ArrayBuilder::Finish;
- /// \endcond
-
- Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
-
- /// \brief Make a new child builder available to the UnionArray
- ///
- /// \param[in] new_child the child builder
- /// \param[in] field_name the name of the field in the union array type
- /// if type inference is used
- /// \return child index, which is the "type" argument that needs
- /// to be passed to the "Append" method to add a new element to
- /// the union array.
- int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
- const std::string& field_name = "");
-
- std::shared_ptr<DataType> type() const override;
-
- int64_t length() const override { return types_builder_.length(); }
-
- protected:
- BasicUnionBuilder(MemoryPool* pool,
- const std::vector<std::shared_ptr<ArrayBuilder>>& children,
- const std::shared_ptr<DataType>& type);
-
- int8_t NextTypeId();
-
- std::vector<std::shared_ptr<Field>> child_fields_;
- std::vector<int8_t> type_codes_;
- UnionMode::type mode_;
-
- std::vector<ArrayBuilder*> type_id_to_children_;
- // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
- int8_t dense_type_id_ = 0;
- TypedBufferBuilder<int8_t> types_builder_;
-};
-
-/// \class DenseUnionBuilder
-///
-/// This API is EXPERIMENTAL.
-class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
- public:
- /// Use this constructor to initialize the UnionBuilder with no child builders,
- /// allowing type to be inferred. You will need to call AppendChild for each of the
- /// children builders you want to use.
- explicit DenseUnionBuilder(MemoryPool* pool)
- : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
-
- /// Use this constructor to specify the type explicitly.
- /// You can still add child builders to the union after using this constructor
- DenseUnionBuilder(MemoryPool* pool,
- const std::vector<std::shared_ptr<ArrayBuilder>>& children,
- const std::shared_ptr<DataType>& type)
- : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
-
- Status AppendNull() final {
- const int8_t first_child_code = type_codes_[0];
- ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
- ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
- ARROW_RETURN_NOT_OK(
- offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
- // Append a null arbitrarily to the first child
- return child_builder->AppendNull();
- }
-
- Status AppendNulls(int64_t length) final {
- const int8_t first_child_code = type_codes_[0];
- ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
- ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
- ARROW_RETURN_NOT_OK(
- offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
- // Append just a single null to the first child
- return child_builder->AppendNull();
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Base class for union array builds.
+///
+/// Note that while we subclass ArrayBuilder, as union types do not have a
+/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
+class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
+ public:
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+
+ /// \brief Make a new child builder available to the UnionArray
+ ///
+ /// \param[in] new_child the child builder
+ /// \param[in] field_name the name of the field in the union array type
+ /// if type inference is used
+ /// \return child index, which is the "type" argument that needs
+ /// to be passed to the "Append" method to add a new element to
+ /// the union array.
+ int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+ const std::string& field_name = "");
+
+ std::shared_ptr<DataType> type() const override;
+
+ int64_t length() const override { return types_builder_.length(); }
+
+ protected:
+ BasicUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type);
+
+ int8_t NextTypeId();
+
+ std::vector<std::shared_ptr<Field>> child_fields_;
+ std::vector<int8_t> type_codes_;
+ UnionMode::type mode_;
+
+ std::vector<ArrayBuilder*> type_id_to_children_;
+ // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
+ int8_t dense_type_id_ = 0;
+ TypedBufferBuilder<int8_t> types_builder_;
+};
+
+/// \class DenseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
+ public:
+ /// Use this constructor to initialize the UnionBuilder with no child builders,
+ /// allowing type to be inferred. You will need to call AppendChild for each of the
+ /// children builders you want to use.
+ explicit DenseUnionBuilder(MemoryPool* pool)
+ : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
+
+ /// Use this constructor to specify the type explicitly.
+ /// You can still add child builders to the union after using this constructor
+ DenseUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type)
+ : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
+
+ Status AppendNull() final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+ // Append a null arbitrarily to the first child
+ return child_builder->AppendNull();
+ }
+
+ Status AppendNulls(int64_t length) final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+ // Append just a single null to the first child
+ return child_builder->AppendNull();
+ }
+
Status AppendEmptyValue() final {
const int8_t first_child_code = type_codes_[0];
ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
@@ -137,52 +137,52 @@ class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
return child_builder->AppendEmptyValue();
}
- /// \brief Append an element to the UnionArray. This must be followed
- /// by an append to the appropriate child builder.
- ///
- /// \param[in] next_type type_id of the child to which the next value will be appended.
- ///
- /// The corresponding child builder must be appended to independently after this method
- /// is called.
- Status Append(int8_t next_type) {
- ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
- if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
- return Status::CapacityError(
- "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
- "child");
- }
- auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
- return offsets_builder_.Append(offset);
- }
-
- Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- private:
- TypedBufferBuilder<int32_t> offsets_builder_;
-};
-
-/// \class SparseUnionBuilder
-///
-/// This API is EXPERIMENTAL.
-class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
- public:
- /// Use this constructor to initialize the UnionBuilder with no child builders,
- /// allowing type to be inferred. You will need to call AppendChild for each of the
- /// children builders you want to use.
- explicit SparseUnionBuilder(MemoryPool* pool)
- : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
-
- /// Use this constructor to specify the type explicitly.
- /// You can still add child builders to the union after using this constructor
- SparseUnionBuilder(MemoryPool* pool,
- const std::vector<std::shared_ptr<ArrayBuilder>>& children,
- const std::shared_ptr<DataType>& type)
- : BasicUnionBuilder(pool, children, type) {}
-
+ /// \brief Append an element to the UnionArray. This must be followed
+ /// by an append to the appropriate child builder.
+ ///
+ /// \param[in] next_type type_id of the child to which the next value will be appended.
+ ///
+ /// The corresponding child builder must be appended to independently after this method
+ /// is called.
+ Status Append(int8_t next_type) {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+ if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
+ return Status::CapacityError(
+ "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
+ "child");
+ }
+ auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
+ return offsets_builder_.Append(offset);
+ }
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ private:
+ TypedBufferBuilder<int32_t> offsets_builder_;
+};
+
+/// \class SparseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
+ public:
+ /// Use this constructor to initialize the UnionBuilder with no child builders,
+ /// allowing type to be inferred. You will need to call AppendChild for each of the
+ /// children builders you want to use.
+ explicit SparseUnionBuilder(MemoryPool* pool)
+ : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
+
+ /// Use this constructor to specify the type explicitly.
+ /// You can still add child builders to the union after using this constructor
+ SparseUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type)
+ : BasicUnionBuilder(pool, children, type) {}
+
/// \brief Append a null value.
///
/// A null is appended to the first child, empty values to the other children.
- Status AppendNull() final {
+ Status AppendNull() final {
const auto first_child_code = type_codes_[0];
ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
@@ -207,29 +207,29 @@ class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
}
Status AppendEmptyValue() final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
- for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
+ for (int8_t code : type_codes_) {
ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
- }
- return Status::OK();
- }
-
+ }
+ return Status::OK();
+ }
+
Status AppendEmptyValues(int64_t length) final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
- for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
+ for (int8_t code : type_codes_) {
ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
- }
- return Status::OK();
- }
-
- /// \brief Append an element to the UnionArray. This must be followed
- /// by an append to the appropriate child builder.
- ///
- /// \param[in] next_type type_id of the child to which the next value will be appended.
- ///
- /// The corresponding child builder must be appended to independently after this method
+ }
+ return Status::OK();
+ }
+
+ /// \brief Append an element to the UnionArray. This must be followed
+ /// by an append to the appropriate child builder.
+ ///
+ /// \param[in] next_type type_id of the child to which the next value will be appended.
+ ///
+ /// The corresponding child builder must be appended to independently after this method
/// is called, and all other child builders must have null or empty value appended.
- Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
-};
-
-} // namespace arrow
+ Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
index 32478783394..5744ed922f5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
@@ -1,261 +1,261 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/concatenate.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/array/util.h"
-#include "arrow/buffer.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/checked_cast.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/concatenate.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
#include "arrow/util/int_util.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::SafeSignedAdd;
-
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::SafeSignedAdd;
+
namespace {
-/// offset, length pair for representing a Range of a buffer or array
-struct Range {
- int64_t offset = -1, length = 0;
-
- Range() = default;
- Range(int64_t o, int64_t l) : offset(o), length(l) {}
-};
-
-/// non-owning view into a range of bits
-struct Bitmap {
- Bitmap() = default;
- Bitmap(const uint8_t* d, Range r) : data(d), range(r) {}
- explicit Bitmap(const std::shared_ptr<Buffer>& buffer, Range r)
- : Bitmap(buffer ? buffer->data() : nullptr, r) {}
-
- const uint8_t* data = nullptr;
- Range range;
-
- bool AllSet() const { return data == nullptr; }
-};
-
-// Allocate a buffer and concatenate bitmaps into it.
+/// offset, length pair for representing a Range of a buffer or array
+struct Range {
+ int64_t offset = -1, length = 0;
+
+ Range() = default;
+ Range(int64_t o, int64_t l) : offset(o), length(l) {}
+};
+
+/// non-owning view into a range of bits
+struct Bitmap {
+ Bitmap() = default;
+ Bitmap(const uint8_t* d, Range r) : data(d), range(r) {}
+ explicit Bitmap(const std::shared_ptr<Buffer>& buffer, Range r)
+ : Bitmap(buffer ? buffer->data() : nullptr, r) {}
+
+ const uint8_t* data = nullptr;
+ Range range;
+
+ bool AllSet() const { return data == nullptr; }
+};
+
+// Allocate a buffer and concatenate bitmaps into it.
Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool* pool,
std::shared_ptr<Buffer>* out) {
- int64_t out_length = 0;
- for (const auto& bitmap : bitmaps) {
- if (internal::AddWithOverflow(out_length, bitmap.range.length, &out_length)) {
- return Status::Invalid("Length overflow when concatenating arrays");
- }
- }
- ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(out_length, pool));
- uint8_t* dst = (*out)->mutable_data();
-
- int64_t bitmap_offset = 0;
- for (auto bitmap : bitmaps) {
- if (bitmap.AllSet()) {
- BitUtil::SetBitsTo(dst, bitmap_offset, bitmap.range.length, true);
- } else {
- internal::CopyBitmap(bitmap.data, bitmap.range.offset, bitmap.range.length, dst,
- bitmap_offset);
- }
- bitmap_offset += bitmap.range.length;
- }
-
- return Status::OK();
-}
-
-// Write offsets in src into dst, adjusting them such that first_offset
-// will be the first offset written.
-template <typename Offset>
+ int64_t out_length = 0;
+ for (const auto& bitmap : bitmaps) {
+ if (internal::AddWithOverflow(out_length, bitmap.range.length, &out_length)) {
+ return Status::Invalid("Length overflow when concatenating arrays");
+ }
+ }
+ ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(out_length, pool));
+ uint8_t* dst = (*out)->mutable_data();
+
+ int64_t bitmap_offset = 0;
+ for (auto bitmap : bitmaps) {
+ if (bitmap.AllSet()) {
+ BitUtil::SetBitsTo(dst, bitmap_offset, bitmap.range.length, true);
+ } else {
+ internal::CopyBitmap(bitmap.data, bitmap.range.offset, bitmap.range.length, dst,
+ bitmap_offset);
+ }
+ bitmap_offset += bitmap.range.length;
+ }
+
+ return Status::OK();
+}
+
+// Write offsets in src into dst, adjusting them such that first_offset
+// will be the first offset written.
+template <typename Offset>
Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst,
Range* values_range);
-
-// Concatenate buffers holding offsets into a single buffer of offsets,
-// also computing the ranges of values spanned by each buffer of offsets.
-template <typename Offset>
+
+// Concatenate buffers holding offsets into a single buffer of offsets,
+// also computing the ranges of values spanned by each buffer of offsets.
+template <typename Offset>
Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool,
std::shared_ptr<Buffer>* out,
std::vector<Range>* values_ranges) {
- values_ranges->resize(buffers.size());
-
- // allocate output buffer
- int64_t out_length = 0;
- for (const auto& buffer : buffers) {
- out_length += buffer->size() / sizeof(Offset);
- }
- ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer((out_length + 1) * sizeof(Offset), pool));
- auto dst = reinterpret_cast<Offset*>((*out)->mutable_data());
-
- int64_t elements_length = 0;
- Offset values_length = 0;
- for (size_t i = 0; i < buffers.size(); ++i) {
- // the first offset from buffers[i] will be adjusted to values_length
- // (the cumulative length of values spanned by offsets in previous buffers)
- RETURN_NOT_OK(PutOffsets<Offset>(buffers[i], values_length, &dst[elements_length],
- &values_ranges->at(i)));
- elements_length += buffers[i]->size() / sizeof(Offset);
- values_length += static_cast<Offset>(values_ranges->at(i).length);
- }
-
- // the final element in dst is the length of all values spanned by the offsets
- dst[out_length] = values_length;
- return Status::OK();
-}
-
-template <typename Offset>
+ values_ranges->resize(buffers.size());
+
+ // allocate output buffer
+ int64_t out_length = 0;
+ for (const auto& buffer : buffers) {
+ out_length += buffer->size() / sizeof(Offset);
+ }
+ ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer((out_length + 1) * sizeof(Offset), pool));
+ auto dst = reinterpret_cast<Offset*>((*out)->mutable_data());
+
+ int64_t elements_length = 0;
+ Offset values_length = 0;
+ for (size_t i = 0; i < buffers.size(); ++i) {
+ // the first offset from buffers[i] will be adjusted to values_length
+ // (the cumulative length of values spanned by offsets in previous buffers)
+ RETURN_NOT_OK(PutOffsets<Offset>(buffers[i], values_length, &dst[elements_length],
+ &values_ranges->at(i)));
+ elements_length += buffers[i]->size() / sizeof(Offset);
+ values_length += static_cast<Offset>(values_ranges->at(i).length);
+ }
+
+ // the final element in dst is the length of all values spanned by the offsets
+ dst[out_length] = values_length;
+ return Status::OK();
+}
+
+template <typename Offset>
Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst,
Range* values_range) {
- if (src->size() == 0) {
- // It's allowed to have an empty offsets buffer for a 0-length array
- // (see Array::Validate)
- values_range->offset = 0;
- values_range->length = 0;
- return Status::OK();
- }
-
- // Get the range of offsets to transfer from src
- auto src_begin = reinterpret_cast<const Offset*>(src->data());
- auto src_end = reinterpret_cast<const Offset*>(src->data() + src->size());
-
- // Compute the range of values which is spanned by this range of offsets
- values_range->offset = src_begin[0];
- values_range->length = *src_end - values_range->offset;
- if (first_offset > std::numeric_limits<Offset>::max() - values_range->length) {
- return Status::Invalid("offset overflow while concatenating arrays");
- }
-
- // Write offsets into dst, ensuring that the first offset written is
- // first_offset
- auto adjustment = first_offset - src_begin[0];
- // NOTE: Concatenate can be called during IPC reads to append delta dictionaries.
- // Avoid UB on non-validated input by doing the addition in the unsigned domain.
- // (the result can later be validated using Array::ValidateFull)
- std::transform(src_begin, src_end, dst, [adjustment](Offset offset) {
- return SafeSignedAdd(offset, adjustment);
- });
- return Status::OK();
-}
-
-class ConcatenateImpl {
- public:
+ if (src->size() == 0) {
+ // It's allowed to have an empty offsets buffer for a 0-length array
+ // (see Array::Validate)
+ values_range->offset = 0;
+ values_range->length = 0;
+ return Status::OK();
+ }
+
+ // Get the range of offsets to transfer from src
+ auto src_begin = reinterpret_cast<const Offset*>(src->data());
+ auto src_end = reinterpret_cast<const Offset*>(src->data() + src->size());
+
+ // Compute the range of values which is spanned by this range of offsets
+ values_range->offset = src_begin[0];
+ values_range->length = *src_end - values_range->offset;
+ if (first_offset > std::numeric_limits<Offset>::max() - values_range->length) {
+ return Status::Invalid("offset overflow while concatenating arrays");
+ }
+
+ // Write offsets into dst, ensuring that the first offset written is
+ // first_offset
+ auto adjustment = first_offset - src_begin[0];
+ // NOTE: Concatenate can be called during IPC reads to append delta dictionaries.
+ // Avoid UB on non-validated input by doing the addition in the unsigned domain.
+ // (the result can later be validated using Array::ValidateFull)
+ std::transform(src_begin, src_end, dst, [adjustment](Offset offset) {
+ return SafeSignedAdd(offset, adjustment);
+ });
+ return Status::OK();
+}
+
+class ConcatenateImpl {
+ public:
ConcatenateImpl(const ArrayDataVector& in, MemoryPool* pool)
- : in_(std::move(in)), pool_(pool), out_(std::make_shared<ArrayData>()) {
- out_->type = in[0]->type;
- for (size_t i = 0; i < in_.size(); ++i) {
- out_->length = SafeSignedAdd(out_->length, in[i]->length);
- if (out_->null_count == kUnknownNullCount ||
- in[i]->null_count == kUnknownNullCount) {
- out_->null_count = kUnknownNullCount;
- continue;
- }
- out_->null_count = SafeSignedAdd(out_->null_count.load(), in[i]->null_count.load());
- }
- out_->buffers.resize(in[0]->buffers.size());
- out_->child_data.resize(in[0]->child_data.size());
- for (auto& data : out_->child_data) {
- data = std::make_shared<ArrayData>();
- }
- }
-
- Status Concatenate(std::shared_ptr<ArrayData>* out) && {
- if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
- RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
- }
- RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
- *out = std::move(out_);
- return Status::OK();
- }
-
- Status Visit(const NullType&) { return Status::OK(); }
-
- Status Visit(const BooleanType&) {
- return ConcatenateBitmaps(Bitmaps(1), pool_, &out_->buffers[1]);
- }
-
- Status Visit(const FixedWidthType& fixed) {
+ : in_(std::move(in)), pool_(pool), out_(std::make_shared<ArrayData>()) {
+ out_->type = in[0]->type;
+ for (size_t i = 0; i < in_.size(); ++i) {
+ out_->length = SafeSignedAdd(out_->length, in[i]->length);
+ if (out_->null_count == kUnknownNullCount ||
+ in[i]->null_count == kUnknownNullCount) {
+ out_->null_count = kUnknownNullCount;
+ continue;
+ }
+ out_->null_count = SafeSignedAdd(out_->null_count.load(), in[i]->null_count.load());
+ }
+ out_->buffers.resize(in[0]->buffers.size());
+ out_->child_data.resize(in[0]->child_data.size());
+ for (auto& data : out_->child_data) {
+ data = std::make_shared<ArrayData>();
+ }
+ }
+
+ Status Concatenate(std::shared_ptr<ArrayData>* out) && {
+ if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
+ RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
+ }
+ RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
+ *out = std::move(out_);
+ return Status::OK();
+ }
+
+ Status Visit(const NullType&) { return Status::OK(); }
+
+ Status Visit(const BooleanType&) {
+ return ConcatenateBitmaps(Bitmaps(1), pool_, &out_->buffers[1]);
+ }
+
+ Status Visit(const FixedWidthType& fixed) {
// Handles numbers, decimal128, decimal256, fixed_size_binary
- ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
- return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
- }
-
- Status Visit(const BinaryType&) {
- std::vector<Range> value_ranges;
- ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
- RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
- &value_ranges));
- ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
- return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
- }
-
- Status Visit(const LargeBinaryType&) {
- std::vector<Range> value_ranges;
- ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
- RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
- &value_ranges));
- ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
- return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
- }
-
- Status Visit(const ListType&) {
- std::vector<Range> value_ranges;
- ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
- RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
- &value_ranges));
- ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
- return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
- }
-
- Status Visit(const LargeListType&) {
- std::vector<Range> value_ranges;
- ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
- RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
- &value_ranges));
- ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
- return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
- }
-
+ ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
+ return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
+ }
+
+ Status Visit(const BinaryType&) {
+ std::vector<Range> value_ranges;
+ ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
+ RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
+ &value_ranges));
+ ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
+ return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
+ }
+
+ Status Visit(const LargeBinaryType&) {
+ std::vector<Range> value_ranges;
+ ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
+ RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
+ &value_ranges));
+ ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
+ return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
+ }
+
+ Status Visit(const ListType&) {
+ std::vector<Range> value_ranges;
+ ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
+ RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
+ &value_ranges));
+ ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
+ return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+ }
+
+ Status Visit(const LargeListType&) {
+ std::vector<Range> value_ranges;
+ ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
+ RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
+ &value_ranges));
+ ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
+ return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+ }
+
Status Visit(const FixedSizeListType& fixed_size_list) {
ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size()));
- return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
- }
-
- Status Visit(const StructType& s) {
- for (int i = 0; i < s.num_fields(); ++i) {
- ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i));
- RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
- }
- return Status::OK();
- }
-
+ return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+ }
+
+ Status Visit(const StructType& s) {
+ for (int i = 0; i < s.num_fields(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i));
+ RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
+ }
+ return Status::OK();
+ }
+
Result<BufferVector> UnifyDictionaries(const DictionaryType& d) {
BufferVector new_index_lookup;
ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(d.value_type()));
@@ -297,140 +297,140 @@ class ConcatenateImpl {
return std::move(out);
}
- Status Visit(const DictionaryType& d) {
- auto fixed = internal::checked_cast<const FixedWidthType*>(d.index_type().get());
-
- // Two cases: all the dictionaries are the same, or unification is
- // required
- bool dictionaries_same = true;
- std::shared_ptr<Array> dictionary0 = MakeArray(in_[0]->dictionary);
- for (size_t i = 1; i < in_.size(); ++i) {
- if (!MakeArray(in_[i]->dictionary)->Equals(dictionary0)) {
- dictionaries_same = false;
- break;
- }
- }
-
+ Status Visit(const DictionaryType& d) {
+ auto fixed = internal::checked_cast<const FixedWidthType*>(d.index_type().get());
+
+ // Two cases: all the dictionaries are the same, or unification is
+ // required
+ bool dictionaries_same = true;
+ std::shared_ptr<Array> dictionary0 = MakeArray(in_[0]->dictionary);
+ for (size_t i = 1; i < in_.size(); ++i) {
+ if (!MakeArray(in_[i]->dictionary)->Equals(dictionary0)) {
+ dictionaries_same = false;
+ break;
+ }
+ }
+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, *fixed));
- if (dictionaries_same) {
- out_->dictionary = in_[0]->dictionary;
- return ConcatenateBuffers(index_buffers, pool_).Value(&out_->buffers[1]);
- } else {
+ if (dictionaries_same) {
+ out_->dictionary = in_[0]->dictionary;
+ return ConcatenateBuffers(index_buffers, pool_).Value(&out_->buffers[1]);
+ } else {
ARROW_ASSIGN_OR_RAISE(auto index_lookup, UnifyDictionaries(d));
ARROW_ASSIGN_OR_RAISE(out_->buffers[1],
ConcatenateDictionaryIndices(*fixed, index_lookup));
return Status::OK();
- }
- }
-
- Status Visit(const UnionType& u) {
- return Status::NotImplemented("concatenation of ", u);
- }
-
- Status Visit(const ExtensionType& e) {
- // XXX can we just concatenate their storage?
- return Status::NotImplemented("concatenation of ", e);
- }
-
- private:
- // NOTE: Concatenate() can be called during IPC reads to append delta dictionaries
- // on non-validated input. Therefore, the input-checking SliceBufferSafe and
- // ArrayData::SliceSafe are used below.
-
- // Gather the index-th buffer of each input into a vector.
- // Bytes are sliced with that input's offset and length.
- // Note that BufferVector will not contain the buffer of in_[i] if it's
- // nullptr.
- Result<BufferVector> Buffers(size_t index) {
- BufferVector buffers;
- buffers.reserve(in_.size());
+ }
+ }
+
+ Status Visit(const UnionType& u) {
+ return Status::NotImplemented("concatenation of ", u);
+ }
+
+ Status Visit(const ExtensionType& e) {
+ // XXX can we just concatenate their storage?
+ return Status::NotImplemented("concatenation of ", e);
+ }
+
+ private:
+ // NOTE: Concatenate() can be called during IPC reads to append delta dictionaries
+ // on non-validated input. Therefore, the input-checking SliceBufferSafe and
+ // ArrayData::SliceSafe are used below.
+
+ // Gather the index-th buffer of each input into a vector.
+ // Bytes are sliced with that input's offset and length.
+ // Note that BufferVector will not contain the buffer of in_[i] if it's
+ // nullptr.
+ Result<BufferVector> Buffers(size_t index) {
+ BufferVector buffers;
+ buffers.reserve(in_.size());
for (const auto& array_data : in_) {
- const auto& buffer = array_data->buffers[index];
- if (buffer != nullptr) {
- ARROW_ASSIGN_OR_RAISE(
- auto sliced_buffer,
- SliceBufferSafe(buffer, array_data->offset, array_data->length));
- buffers.push_back(std::move(sliced_buffer));
- }
- }
- return buffers;
- }
-
- // Gather the index-th buffer of each input into a vector.
- // Bytes are sliced with the explicitly passed ranges.
- // Note that BufferVector will not contain the buffer of in_[i] if it's
- // nullptr.
- Result<BufferVector> Buffers(size_t index, const std::vector<Range>& ranges) {
- DCHECK_EQ(in_.size(), ranges.size());
- BufferVector buffers;
- buffers.reserve(in_.size());
- for (size_t i = 0; i < in_.size(); ++i) {
- const auto& buffer = in_[i]->buffers[index];
- if (buffer != nullptr) {
- ARROW_ASSIGN_OR_RAISE(
- auto sliced_buffer,
- SliceBufferSafe(buffer, ranges[i].offset, ranges[i].length));
- buffers.push_back(std::move(sliced_buffer));
- } else {
- DCHECK_EQ(ranges[i].length, 0);
- }
- }
- return buffers;
- }
-
- // Gather the index-th buffer of each input into a vector.
- // Buffers are assumed to contain elements of the given byte_width,
- // those elements are sliced with that input's offset and length.
- // Note that BufferVector will not contain the buffer of in_[i] if it's
- // nullptr.
- Result<BufferVector> Buffers(size_t index, int byte_width) {
- BufferVector buffers;
- buffers.reserve(in_.size());
+ const auto& buffer = array_data->buffers[index];
+ if (buffer != nullptr) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto sliced_buffer,
+ SliceBufferSafe(buffer, array_data->offset, array_data->length));
+ buffers.push_back(std::move(sliced_buffer));
+ }
+ }
+ return buffers;
+ }
+
+ // Gather the index-th buffer of each input into a vector.
+ // Bytes are sliced with the explicitly passed ranges.
+ // Note that BufferVector will not contain the buffer of in_[i] if it's
+ // nullptr.
+ Result<BufferVector> Buffers(size_t index, const std::vector<Range>& ranges) {
+ DCHECK_EQ(in_.size(), ranges.size());
+ BufferVector buffers;
+ buffers.reserve(in_.size());
+ for (size_t i = 0; i < in_.size(); ++i) {
+ const auto& buffer = in_[i]->buffers[index];
+ if (buffer != nullptr) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto sliced_buffer,
+ SliceBufferSafe(buffer, ranges[i].offset, ranges[i].length));
+ buffers.push_back(std::move(sliced_buffer));
+ } else {
+ DCHECK_EQ(ranges[i].length, 0);
+ }
+ }
+ return buffers;
+ }
+
+ // Gather the index-th buffer of each input into a vector.
+ // Buffers are assumed to contain elements of the given byte_width,
+ // those elements are sliced with that input's offset and length.
+ // Note that BufferVector will not contain the buffer of in_[i] if it's
+ // nullptr.
+ Result<BufferVector> Buffers(size_t index, int byte_width) {
+ BufferVector buffers;
+ buffers.reserve(in_.size());
for (const auto& array_data : in_) {
- const auto& buffer = array_data->buffers[index];
- if (buffer != nullptr) {
- ARROW_ASSIGN_OR_RAISE(auto sliced_buffer,
- SliceBufferSafe(buffer, array_data->offset * byte_width,
- array_data->length * byte_width));
- buffers.push_back(std::move(sliced_buffer));
- }
- }
- return buffers;
- }
-
- // Gather the index-th buffer of each input into a vector.
- // Buffers are assumed to contain elements of fixed.bit_width(),
- // those elements are sliced with that input's offset and length.
- // Note that BufferVector will not contain the buffer of in_[i] if it's
- // nullptr.
- Result<BufferVector> Buffers(size_t index, const FixedWidthType& fixed) {
- DCHECK_EQ(fixed.bit_width() % 8, 0);
- return Buffers(index, fixed.bit_width() / 8);
- }
-
- // Gather the index-th buffer of each input as a Bitmap
- // into a vector of Bitmaps.
- std::vector<Bitmap> Bitmaps(size_t index) {
- std::vector<Bitmap> bitmaps(in_.size());
- for (size_t i = 0; i < in_.size(); ++i) {
- Range range(in_[i]->offset, in_[i]->length);
- bitmaps[i] = Bitmap(in_[i]->buffers[index], range);
- }
- return bitmaps;
- }
-
- // Gather the index-th child_data of each input into a vector.
- // Elements are sliced with that input's offset and length.
+ const auto& buffer = array_data->buffers[index];
+ if (buffer != nullptr) {
+ ARROW_ASSIGN_OR_RAISE(auto sliced_buffer,
+ SliceBufferSafe(buffer, array_data->offset * byte_width,
+ array_data->length * byte_width));
+ buffers.push_back(std::move(sliced_buffer));
+ }
+ }
+ return buffers;
+ }
+
+ // Gather the index-th buffer of each input into a vector.
+ // Buffers are assumed to contain elements of fixed.bit_width(),
+ // those elements are sliced with that input's offset and length.
+ // Note that BufferVector will not contain the buffer of in_[i] if it's
+ // nullptr.
+ Result<BufferVector> Buffers(size_t index, const FixedWidthType& fixed) {
+ DCHECK_EQ(fixed.bit_width() % 8, 0);
+ return Buffers(index, fixed.bit_width() / 8);
+ }
+
+ // Gather the index-th buffer of each input as a Bitmap
+ // into a vector of Bitmaps.
+ std::vector<Bitmap> Bitmaps(size_t index) {
+ std::vector<Bitmap> bitmaps(in_.size());
+ for (size_t i = 0; i < in_.size(); ++i) {
+ Range range(in_[i]->offset, in_[i]->length);
+ bitmaps[i] = Bitmap(in_[i]->buffers[index], range);
+ }
+ return bitmaps;
+ }
+
+ // Gather the index-th child_data of each input into a vector.
+ // Elements are sliced with that input's offset and length.
Result<ArrayDataVector> ChildData(size_t index) {
ArrayDataVector child_data(in_.size());
- for (size_t i = 0; i < in_.size(); ++i) {
- ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
- in_[i]->offset, in_[i]->length));
- }
- return child_data;
- }
-
- // Gather the index-th child_data of each input into a vector.
+ for (size_t i = 0; i < in_.size(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
+ in_[i]->offset, in_[i]->length));
+ }
+ return child_data;
+ }
+
+ // Gather the index-th child_data of each input into a vector.
// Elements are sliced with that input's offset and length multiplied by multiplier.
Result<ArrayDataVector> ChildData(size_t index, size_t multiplier) {
ArrayDataVector child_data(in_.size());
@@ -443,48 +443,48 @@ class ConcatenateImpl {
}
// Gather the index-th child_data of each input into a vector.
- // Elements are sliced with the explicitly passed ranges.
+ // Elements are sliced with the explicitly passed ranges.
Result<ArrayDataVector> ChildData(size_t index, const std::vector<Range>& ranges) {
- DCHECK_EQ(in_.size(), ranges.size());
+ DCHECK_EQ(in_.size(), ranges.size());
ArrayDataVector child_data(in_.size());
- for (size_t i = 0; i < in_.size(); ++i) {
- ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
- ranges[i].offset, ranges[i].length));
- }
- return child_data;
- }
-
+ for (size_t i = 0; i < in_.size(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
+ ranges[i].offset, ranges[i].length));
+ }
+ return child_data;
+ }
+
const ArrayDataVector& in_;
- MemoryPool* pool_;
- std::shared_ptr<ArrayData> out_;
-};
-
+ MemoryPool* pool_;
+ std::shared_ptr<ArrayData> out_;
+};
+
} // namespace
-Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool) {
- if (arrays.size() == 0) {
- return Status::Invalid("Must pass at least one array");
- }
-
- // gather ArrayData of input arrays
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool) {
+ if (arrays.size() == 0) {
+ return Status::Invalid("Must pass at least one array");
+ }
+
+ // gather ArrayData of input arrays
ArrayDataVector data(arrays.size());
- for (size_t i = 0; i < arrays.size(); ++i) {
- if (!arrays[i]->type()->Equals(*arrays[0]->type())) {
- return Status::Invalid("arrays to be concatenated must be identically typed, but ",
- *arrays[0]->type(), " and ", *arrays[i]->type(),
- " were encountered.");
- }
- data[i] = arrays[i]->data();
- }
-
- std::shared_ptr<ArrayData> out_data;
- RETURN_NOT_OK(ConcatenateImpl(data, pool).Concatenate(&out_data));
- return MakeArray(std::move(out_data));
-}
-
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
- std::shared_ptr<Array>* out) {
- return Concatenate(arrays, pool).Value(out);
-}
-
-} // namespace arrow
+ for (size_t i = 0; i < arrays.size(); ++i) {
+ if (!arrays[i]->type()->Equals(*arrays[0]->type())) {
+ return Status::Invalid("arrays to be concatenated must be identically typed, but ",
+ *arrays[0]->type(), " and ", *arrays[i]->type(),
+ " were encountered.");
+ }
+ data[i] = arrays[i]->data();
+ }
+
+ std::shared_ptr<ArrayData> out_data;
+ RETURN_NOT_OK(ConcatenateImpl(data, pool).Concatenate(&out_data));
+ return MakeArray(std::move(out_data));
+}
+
+Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
+ std::shared_ptr<Array>* out) {
+ return Concatenate(arrays, pool).Value(out);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.h
index a6c1c3cf3c1..746d6408be7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.h
@@ -1,42 +1,42 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief Concatenate arrays
-///
-/// \param[in] arrays a vector of arrays to be concatenated
-/// \param[in] pool memory to store the result will be allocated from this memory pool
-/// \return the concatenated array
-ARROW_EXPORT
-Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
- MemoryPool* pool = default_memory_pool());
-
-ARROW_DEPRECATED("Use Result-returning version")
-ARROW_EXPORT
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
- std::shared_ptr<Array>* out);
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Concatenate arrays
+///
+/// \param[in] arrays a vector of arrays to be concatenated
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return the concatenated array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
+ MemoryPool* pool = default_memory_pool());
+
+ARROW_DEPRECATED("Use Result-returning version")
+ARROW_EXPORT
+Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
+ std::shared_ptr<Array>* out);
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
index 5a214473972..541d86cdb4e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
@@ -1,331 +1,331 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/data.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-
-using internal::CountSetBits;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/data.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+using internal::CountSetBits;
+
static inline void AdjustNonNullable(Type::type type_id, int64_t length,
- std::vector<std::shared_ptr<Buffer>>* buffers,
- int64_t* null_count) {
+ std::vector<std::shared_ptr<Buffer>>* buffers,
+ int64_t* null_count) {
if (type_id == Type::NA) {
*null_count = length;
(*buffers)[0] = nullptr;
} else if (internal::HasValidityBitmap(type_id)) {
- if (*null_count == 0) {
- // In case there are no nulls, don't keep an allocated null bitmap around
- (*buffers)[0] = nullptr;
- } else if (*null_count == kUnknownNullCount && buffers->at(0) == nullptr) {
- // Conversely, if no null bitmap is provided, set the null count to 0
- *null_count = 0;
- }
- } else {
- *null_count = 0;
- }
-}
-
+ if (*null_count == 0) {
+ // In case there are no nulls, don't keep an allocated null bitmap around
+ (*buffers)[0] = nullptr;
+ } else if (*null_count == kUnknownNullCount && buffers->at(0) == nullptr) {
+ // Conversely, if no null bitmap is provided, set the null count to 0
+ *null_count = 0;
+ }
+ } else {
+ *null_count = 0;
+ }
+}
+
std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- int64_t null_count, int64_t offset) {
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ int64_t null_count, int64_t offset) {
AdjustNonNullable(type->id(), length, &buffers, &null_count);
return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
null_count, offset);
-}
-
-std::shared_ptr<ArrayData> ArrayData::Make(
+}
+
+std::shared_ptr<ArrayData> ArrayData::Make(
std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
- int64_t offset) {
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
+ int64_t offset) {
AdjustNonNullable(type->id(), length, &buffers, &null_count);
return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
- std::move(child_data), null_count, offset);
-}
-
-std::shared_ptr<ArrayData> ArrayData::Make(
+ std::move(child_data), null_count, offset);
+}
+
+std::shared_ptr<ArrayData> ArrayData::Make(
std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- std::vector<std::shared_ptr<ArrayData>> child_data,
- std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset) {
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ std::vector<std::shared_ptr<ArrayData>> child_data,
+ std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset) {
AdjustNonNullable(type->id(), length, &buffers, &null_count);
auto data = std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
- std::move(child_data), null_count, offset);
- data->dictionary = std::move(dictionary);
- return data;
-}
-
+ std::move(child_data), null_count, offset);
+ data->dictionary = std::move(dictionary);
+ return data;
+}
+
std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
int64_t null_count, int64_t offset) {
return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
-}
-
-std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
- ARROW_CHECK_LE(off, length) << "Slice offset greater than array length";
- len = std::min(length - off, len);
- off += offset;
-
- auto copy = this->Copy();
- copy->length = len;
- copy->offset = off;
- if (null_count == length) {
- copy->null_count = len;
- } else if (off == offset && len == length) { // A copy of current.
- copy->null_count = null_count.load();
- } else {
- copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
- }
- return copy;
-}
-
-Result<std::shared_ptr<ArrayData>> ArrayData::SliceSafe(int64_t off, int64_t len) const {
- RETURN_NOT_OK(internal::CheckSliceParams(length, off, len, "array"));
- return Slice(off, len);
-}
-
-int64_t ArrayData::GetNullCount() const {
- int64_t precomputed = this->null_count.load();
- if (ARROW_PREDICT_FALSE(precomputed == kUnknownNullCount)) {
- if (this->buffers[0]) {
- precomputed = this->length -
- CountSetBits(this->buffers[0]->data(), this->offset, this->length);
- } else {
- precomputed = 0;
- }
- this->null_count.store(precomputed);
- }
- return precomputed;
-}
-
-// ----------------------------------------------------------------------
-// Implement ArrayData::View
-
-namespace {
-
-void AccumulateLayouts(const std::shared_ptr<DataType>& type,
- std::vector<DataTypeLayout>* layouts) {
- layouts->push_back(type->layout());
- for (const auto& child : type->fields()) {
- AccumulateLayouts(child->type(), layouts);
- }
-}
-
-void AccumulateArrayData(const std::shared_ptr<ArrayData>& data,
- std::vector<std::shared_ptr<ArrayData>>* out) {
- out->push_back(data);
- for (const auto& child : data->child_data) {
- AccumulateArrayData(child, out);
- }
-}
-
-struct ViewDataImpl {
- std::shared_ptr<DataType> root_in_type;
- std::shared_ptr<DataType> root_out_type;
- std::vector<DataTypeLayout> in_layouts;
- std::vector<std::shared_ptr<ArrayData>> in_data;
- int64_t in_data_length;
- size_t in_layout_idx = 0;
- size_t in_buffer_idx = 0;
- bool input_exhausted = false;
-
- Status InvalidView(const std::string& msg) {
- return Status::Invalid("Can't view array of type ", root_in_type->ToString(), " as ",
- root_out_type->ToString(), ": ", msg);
- }
-
- void AdjustInputPointer() {
- if (input_exhausted) {
- return;
- }
- while (true) {
- // Skip exhausted layout (might be empty layout)
- while (in_buffer_idx >= in_layouts[in_layout_idx].buffers.size()) {
- in_buffer_idx = 0;
- ++in_layout_idx;
- if (in_layout_idx >= in_layouts.size()) {
- input_exhausted = true;
- return;
- }
- }
- const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
- if (in_spec.kind != DataTypeLayout::ALWAYS_NULL) {
- return;
- }
- // Skip always-null input buffers
- // (e.g. buffer 0 of a null type or buffer 2 of a sparse union)
- ++in_buffer_idx;
- }
- }
-
- Status CheckInputAvailable() {
- if (input_exhausted) {
- return InvalidView("not enough buffers for view type");
- }
- return Status::OK();
- }
-
- Status CheckInputExhausted() {
- if (!input_exhausted) {
- return InvalidView("too many buffers for view type");
- }
- return Status::OK();
- }
-
- Result<std::shared_ptr<ArrayData>> GetDictionaryView(const DataType& out_type) {
- if (in_data[in_layout_idx]->type->id() != Type::DICTIONARY) {
- return InvalidView("Cannot get view as dictionary type");
- }
- const auto& dict_out_type = static_cast<const DictionaryType&>(out_type);
- return internal::GetArrayView(in_data[in_layout_idx]->dictionary,
- dict_out_type.value_type());
- }
-
- Status MakeDataView(const std::shared_ptr<Field>& out_field,
- std::shared_ptr<ArrayData>* out) {
+}
+
+std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
+ ARROW_CHECK_LE(off, length) << "Slice offset greater than array length";
+ len = std::min(length - off, len);
+ off += offset;
+
+ auto copy = this->Copy();
+ copy->length = len;
+ copy->offset = off;
+ if (null_count == length) {
+ copy->null_count = len;
+ } else if (off == offset && len == length) { // A copy of current.
+ copy->null_count = null_count.load();
+ } else {
+ copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
+ }
+ return copy;
+}
+
+Result<std::shared_ptr<ArrayData>> ArrayData::SliceSafe(int64_t off, int64_t len) const {
+ RETURN_NOT_OK(internal::CheckSliceParams(length, off, len, "array"));
+ return Slice(off, len);
+}
+
+int64_t ArrayData::GetNullCount() const {
+ int64_t precomputed = this->null_count.load();
+ if (ARROW_PREDICT_FALSE(precomputed == kUnknownNullCount)) {
+ if (this->buffers[0]) {
+ precomputed = this->length -
+ CountSetBits(this->buffers[0]->data(), this->offset, this->length);
+ } else {
+ precomputed = 0;
+ }
+ this->null_count.store(precomputed);
+ }
+ return precomputed;
+}
+
+// ----------------------------------------------------------------------
+// Implement ArrayData::View
+
+namespace {
+
+void AccumulateLayouts(const std::shared_ptr<DataType>& type,
+ std::vector<DataTypeLayout>* layouts) {
+ layouts->push_back(type->layout());
+ for (const auto& child : type->fields()) {
+ AccumulateLayouts(child->type(), layouts);
+ }
+}
+
+void AccumulateArrayData(const std::shared_ptr<ArrayData>& data,
+ std::vector<std::shared_ptr<ArrayData>>* out) {
+ out->push_back(data);
+ for (const auto& child : data->child_data) {
+ AccumulateArrayData(child, out);
+ }
+}
+
+struct ViewDataImpl {
+ std::shared_ptr<DataType> root_in_type;
+ std::shared_ptr<DataType> root_out_type;
+ std::vector<DataTypeLayout> in_layouts;
+ std::vector<std::shared_ptr<ArrayData>> in_data;
+ int64_t in_data_length;
+ size_t in_layout_idx = 0;
+ size_t in_buffer_idx = 0;
+ bool input_exhausted = false;
+
+ Status InvalidView(const std::string& msg) {
+ return Status::Invalid("Can't view array of type ", root_in_type->ToString(), " as ",
+ root_out_type->ToString(), ": ", msg);
+ }
+
+ void AdjustInputPointer() {
+ if (input_exhausted) {
+ return;
+ }
+ while (true) {
+ // Skip exhausted layout (might be empty layout)
+ while (in_buffer_idx >= in_layouts[in_layout_idx].buffers.size()) {
+ in_buffer_idx = 0;
+ ++in_layout_idx;
+ if (in_layout_idx >= in_layouts.size()) {
+ input_exhausted = true;
+ return;
+ }
+ }
+ const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
+ if (in_spec.kind != DataTypeLayout::ALWAYS_NULL) {
+ return;
+ }
+ // Skip always-null input buffers
+ // (e.g. buffer 0 of a null type or buffer 2 of a sparse union)
+ ++in_buffer_idx;
+ }
+ }
+
+ Status CheckInputAvailable() {
+ if (input_exhausted) {
+ return InvalidView("not enough buffers for view type");
+ }
+ return Status::OK();
+ }
+
+ Status CheckInputExhausted() {
+ if (!input_exhausted) {
+ return InvalidView("too many buffers for view type");
+ }
+ return Status::OK();
+ }
+
+ Result<std::shared_ptr<ArrayData>> GetDictionaryView(const DataType& out_type) {
+ if (in_data[in_layout_idx]->type->id() != Type::DICTIONARY) {
+ return InvalidView("Cannot get view as dictionary type");
+ }
+ const auto& dict_out_type = static_cast<const DictionaryType&>(out_type);
+ return internal::GetArrayView(in_data[in_layout_idx]->dictionary,
+ dict_out_type.value_type());
+ }
+
+ Status MakeDataView(const std::shared_ptr<Field>& out_field,
+ std::shared_ptr<ArrayData>* out) {
const auto& out_type = out_field->type();
- const auto out_layout = out_type->layout();
-
- AdjustInputPointer();
- int64_t out_length = in_data_length;
- int64_t out_offset = 0;
- int64_t out_null_count;
-
- std::shared_ptr<ArrayData> dictionary;
- if (out_type->id() == Type::DICTIONARY) {
- ARROW_ASSIGN_OR_RAISE(dictionary, GetDictionaryView(*out_type));
- }
-
- // No type has a purely empty layout
- DCHECK_GT(out_layout.buffers.size(), 0);
-
- std::vector<std::shared_ptr<Buffer>> out_buffers;
-
- // Process null bitmap
- if (in_buffer_idx == 0 && out_layout.buffers[0].kind == DataTypeLayout::BITMAP) {
- // Copy input null bitmap
- RETURN_NOT_OK(CheckInputAvailable());
- const auto& in_data_item = in_data[in_layout_idx];
- if (!out_field->nullable() && in_data_item->GetNullCount() != 0) {
- return InvalidView("nulls in input cannot be viewed as non-nullable");
- }
- DCHECK_GT(in_data_item->buffers.size(), in_buffer_idx);
- out_buffers.push_back(in_data_item->buffers[in_buffer_idx]);
- out_length = in_data_item->length;
- out_offset = in_data_item->offset;
- out_null_count = in_data_item->null_count;
- ++in_buffer_idx;
- AdjustInputPointer();
- } else {
- // No null bitmap in input, append no-nulls bitmap
- out_buffers.push_back(nullptr);
+ const auto out_layout = out_type->layout();
+
+ AdjustInputPointer();
+ int64_t out_length = in_data_length;
+ int64_t out_offset = 0;
+ int64_t out_null_count;
+
+ std::shared_ptr<ArrayData> dictionary;
+ if (out_type->id() == Type::DICTIONARY) {
+ ARROW_ASSIGN_OR_RAISE(dictionary, GetDictionaryView(*out_type));
+ }
+
+ // No type has a purely empty layout
+ DCHECK_GT(out_layout.buffers.size(), 0);
+
+ std::vector<std::shared_ptr<Buffer>> out_buffers;
+
+ // Process null bitmap
+ if (in_buffer_idx == 0 && out_layout.buffers[0].kind == DataTypeLayout::BITMAP) {
+ // Copy input null bitmap
+ RETURN_NOT_OK(CheckInputAvailable());
+ const auto& in_data_item = in_data[in_layout_idx];
+ if (!out_field->nullable() && in_data_item->GetNullCount() != 0) {
+ return InvalidView("nulls in input cannot be viewed as non-nullable");
+ }
+ DCHECK_GT(in_data_item->buffers.size(), in_buffer_idx);
+ out_buffers.push_back(in_data_item->buffers[in_buffer_idx]);
+ out_length = in_data_item->length;
+ out_offset = in_data_item->offset;
+ out_null_count = in_data_item->null_count;
+ ++in_buffer_idx;
+ AdjustInputPointer();
+ } else {
+ // No null bitmap in input, append no-nulls bitmap
+ out_buffers.push_back(nullptr);
if (out_type->id() == Type::NA) {
out_null_count = out_length;
} else {
out_null_count = 0;
}
- }
-
- // Process other buffers in output layout
- for (size_t out_buffer_idx = 1; out_buffer_idx < out_layout.buffers.size();
- ++out_buffer_idx) {
- const auto& out_spec = out_layout.buffers[out_buffer_idx];
- // If always-null buffer is expected, just construct it
- if (out_spec.kind == DataTypeLayout::ALWAYS_NULL) {
- out_buffers.push_back(nullptr);
- continue;
- }
-
- // If input buffer is null bitmap, try to ignore it
- while (in_buffer_idx == 0) {
- RETURN_NOT_OK(CheckInputAvailable());
- if (in_data[in_layout_idx]->GetNullCount() != 0) {
- return InvalidView("cannot represent nested nulls");
- }
- ++in_buffer_idx;
- AdjustInputPointer();
- }
-
- RETURN_NOT_OK(CheckInputAvailable());
- const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
- if (out_spec != in_spec) {
- return InvalidView("incompatible layouts");
- }
- // Copy input buffer
- const auto& in_data_item = in_data[in_layout_idx];
- out_length = in_data_item->length;
- out_offset = in_data_item->offset;
- DCHECK_GT(in_data_item->buffers.size(), in_buffer_idx);
- out_buffers.push_back(in_data_item->buffers[in_buffer_idx]);
- ++in_buffer_idx;
- AdjustInputPointer();
- }
-
- std::shared_ptr<ArrayData> out_data = ArrayData::Make(
- out_type, out_length, std::move(out_buffers), out_null_count, out_offset);
- out_data->dictionary = dictionary;
-
- // Process children recursively, depth-first
- for (const auto& child_field : out_type->fields()) {
- std::shared_ptr<ArrayData> child_data;
- RETURN_NOT_OK(MakeDataView(child_field, &child_data));
- out_data->child_data.push_back(std::move(child_data));
- }
- *out = std::move(out_data);
- return Status::OK();
- }
-};
-
-} // namespace
-
-namespace internal {
-
-Result<std::shared_ptr<ArrayData>> GetArrayView(
- const std::shared_ptr<ArrayData>& data, const std::shared_ptr<DataType>& out_type) {
- ViewDataImpl impl;
- impl.root_in_type = data->type;
- impl.root_out_type = out_type;
- AccumulateLayouts(impl.root_in_type, &impl.in_layouts);
- AccumulateArrayData(data, &impl.in_data);
- impl.in_data_length = data->length;
-
- std::shared_ptr<ArrayData> out_data;
- // Dummy field for output type
- auto out_field = field("", out_type);
- RETURN_NOT_OK(impl.MakeDataView(out_field, &out_data));
- RETURN_NOT_OK(impl.CheckInputExhausted());
- return out_data;
-}
-
-} // namespace internal
-} // namespace arrow
+ }
+
+ // Process other buffers in output layout
+ for (size_t out_buffer_idx = 1; out_buffer_idx < out_layout.buffers.size();
+ ++out_buffer_idx) {
+ const auto& out_spec = out_layout.buffers[out_buffer_idx];
+ // If always-null buffer is expected, just construct it
+ if (out_spec.kind == DataTypeLayout::ALWAYS_NULL) {
+ out_buffers.push_back(nullptr);
+ continue;
+ }
+
+ // If input buffer is null bitmap, try to ignore it
+ while (in_buffer_idx == 0) {
+ RETURN_NOT_OK(CheckInputAvailable());
+ if (in_data[in_layout_idx]->GetNullCount() != 0) {
+ return InvalidView("cannot represent nested nulls");
+ }
+ ++in_buffer_idx;
+ AdjustInputPointer();
+ }
+
+ RETURN_NOT_OK(CheckInputAvailable());
+ const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
+ if (out_spec != in_spec) {
+ return InvalidView("incompatible layouts");
+ }
+ // Copy input buffer
+ const auto& in_data_item = in_data[in_layout_idx];
+ out_length = in_data_item->length;
+ out_offset = in_data_item->offset;
+ DCHECK_GT(in_data_item->buffers.size(), in_buffer_idx);
+ out_buffers.push_back(in_data_item->buffers[in_buffer_idx]);
+ ++in_buffer_idx;
+ AdjustInputPointer();
+ }
+
+ std::shared_ptr<ArrayData> out_data = ArrayData::Make(
+ out_type, out_length, std::move(out_buffers), out_null_count, out_offset);
+ out_data->dictionary = dictionary;
+
+ // Process children recursively, depth-first
+ for (const auto& child_field : out_type->fields()) {
+ std::shared_ptr<ArrayData> child_data;
+ RETURN_NOT_OK(MakeDataView(child_field, &child_data));
+ out_data->child_data.push_back(std::move(child_data));
+ }
+ *out = std::move(out_data);
+ return Status::OK();
+ }
+};
+
+} // namespace
+
+namespace internal {
+
+Result<std::shared_ptr<ArrayData>> GetArrayView(
+ const std::shared_ptr<ArrayData>& data, const std::shared_ptr<DataType>& out_type) {
+ ViewDataImpl impl;
+ impl.root_in_type = data->type;
+ impl.root_out_type = out_type;
+ AccumulateLayouts(impl.root_in_type, &impl.in_layouts);
+ AccumulateArrayData(data, &impl.in_data);
+ impl.in_data_length = data->length;
+
+ std::shared_ptr<ArrayData> out_data;
+ // Dummy field for output type
+ auto out_field = field("", out_type);
+ RETURN_NOT_OK(impl.MakeDataView(out_field, &out_data));
+ RETURN_NOT_OK(impl.CheckInputExhausted());
+ return out_data;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
index 418d09def6b..faf57e9724b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
@@ -1,258 +1,258 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic> // IWYU pragma: export
-#include <cstdint>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/result.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// When slicing, we do not know the null count of the sliced range without
-// doing some computation. To avoid doing this eagerly, we set the null count
-// to -1 (any negative number will do). When Array::null_count is called the
-// first time, the null count will be computed. See ARROW-33
-constexpr int64_t kUnknownNullCount = -1;
-
-// ----------------------------------------------------------------------
-// Generic array data container
-
-/// \class ArrayData
-/// \brief Mutable container for generic Arrow array data
-///
-/// This data structure is a self-contained representation of the memory and
-/// metadata inside an Arrow array data structure (called vectors in Java). The
-/// classes arrow::Array and its subclasses provide strongly-typed accessors
-/// with support for the visitor pattern and other affordances.
-///
-/// This class is designed for easy internal data manipulation, analytical data
-/// processing, and data transport to and from IPC messages. For example, we
-/// could cast from int64 to float64 like so:
-///
-/// Int64Array arr = GetMyData();
-/// auto new_data = arr.data()->Copy();
-/// new_data->type = arrow::float64();
-/// DoubleArray double_arr(new_data);
-///
-/// This object is also useful in an analytics setting where memory may be
-/// reused. For example, if we had a group of operations all returning doubles,
-/// say:
-///
-/// Log(Sqrt(Expr(arr)))
-///
-/// Then the low-level implementations of each of these functions could have
-/// the signatures
-///
-/// void Log(const ArrayData& values, ArrayData* out);
-///
-/// As another example a function may consume one or more memory buffers in an
-/// input array and replace them with newly-allocated data, changing the output
-/// data type as well.
-struct ARROW_EXPORT ArrayData {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic> // IWYU pragma: export
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// When slicing, we do not know the null count of the sliced range without
+// doing some computation. To avoid doing this eagerly, we set the null count
+// to -1 (any negative number will do). When Array::null_count is called the
+// first time, the null count will be computed. See ARROW-33
+constexpr int64_t kUnknownNullCount = -1;
+
+// ----------------------------------------------------------------------
+// Generic array data container
+
+/// \class ArrayData
+/// \brief Mutable container for generic Arrow array data
+///
+/// This data structure is a self-contained representation of the memory and
+/// metadata inside an Arrow array data structure (called vectors in Java). The
+/// classes arrow::Array and its subclasses provide strongly-typed accessors
+/// with support for the visitor pattern and other affordances.
+///
+/// This class is designed for easy internal data manipulation, analytical data
+/// processing, and data transport to and from IPC messages. For example, we
+/// could cast from int64 to float64 like so:
+///
+/// Int64Array arr = GetMyData();
+/// auto new_data = arr.data()->Copy();
+/// new_data->type = arrow::float64();
+/// DoubleArray double_arr(new_data);
+///
+/// This object is also useful in an analytics setting where memory may be
+/// reused. For example, if we had a group of operations all returning doubles,
+/// say:
+///
+/// Log(Sqrt(Expr(arr)))
+///
+/// Then the low-level implementations of each of these functions could have
+/// the signatures
+///
+/// void Log(const ArrayData& values, ArrayData* out);
+///
+/// As another example a function may consume one or more memory buffers in an
+/// input array and replace them with newly-allocated data, changing the output
+/// data type as well.
+struct ARROW_EXPORT ArrayData {
ArrayData() = default;
-
+
ArrayData(std::shared_ptr<DataType> type, int64_t length,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
-
+
ArrayData(std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: ArrayData(std::move(type), length, null_count, offset) {
- this->buffers = std::move(buffers);
- }
-
+ this->buffers = std::move(buffers);
+ }
+
ArrayData(std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- std::vector<std::shared_ptr<ArrayData>> child_data,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ std::vector<std::shared_ptr<ArrayData>> child_data,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: ArrayData(std::move(type), length, null_count, offset) {
- this->buffers = std::move(buffers);
- this->child_data = std::move(child_data);
- }
-
+ this->buffers = std::move(buffers);
+ this->child_data = std::move(child_data);
+ }
+
static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- int64_t null_count = kUnknownNullCount,
- int64_t offset = 0);
-
- static std::shared_ptr<ArrayData> Make(
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ int64_t null_count = kUnknownNullCount,
+ int64_t offset = 0);
+
+ static std::shared_ptr<ArrayData> Make(
std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- std::vector<std::shared_ptr<ArrayData>> child_data,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
-
- static std::shared_ptr<ArrayData> Make(
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ std::vector<std::shared_ptr<ArrayData>> child_data,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ static std::shared_ptr<ArrayData> Make(
std::shared_ptr<DataType> type, int64_t length,
- std::vector<std::shared_ptr<Buffer>> buffers,
- std::vector<std::shared_ptr<ArrayData>> child_data,
- std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
- int64_t offset = 0);
-
+ std::vector<std::shared_ptr<Buffer>> buffers,
+ std::vector<std::shared_ptr<ArrayData>> child_data,
+ std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
+ int64_t offset = 0);
+
static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
- int64_t null_count = kUnknownNullCount,
- int64_t offset = 0);
-
- // Move constructor
- ArrayData(ArrayData&& other) noexcept
- : type(std::move(other.type)),
- length(other.length),
- offset(other.offset),
- buffers(std::move(other.buffers)),
- child_data(std::move(other.child_data)),
- dictionary(std::move(other.dictionary)) {
- SetNullCount(other.null_count);
- }
-
- // Copy constructor
- ArrayData(const ArrayData& other) noexcept
- : type(other.type),
- length(other.length),
- offset(other.offset),
- buffers(other.buffers),
- child_data(other.child_data),
- dictionary(other.dictionary) {
- SetNullCount(other.null_count);
- }
-
- // Move assignment
- ArrayData& operator=(ArrayData&& other) {
- type = std::move(other.type);
- length = other.length;
- SetNullCount(other.null_count);
- offset = other.offset;
- buffers = std::move(other.buffers);
- child_data = std::move(other.child_data);
- dictionary = std::move(other.dictionary);
- return *this;
- }
-
- // Copy assignment
- ArrayData& operator=(const ArrayData& other) {
- type = other.type;
- length = other.length;
- SetNullCount(other.null_count);
- offset = other.offset;
- buffers = other.buffers;
- child_data = other.child_data;
- dictionary = other.dictionary;
- return *this;
- }
-
- std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
-
- // Access a buffer's data as a typed C pointer
- template <typename T>
- inline const T* GetValues(int i, int64_t absolute_offset) const {
- if (buffers[i]) {
- return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
- } else {
- return NULLPTR;
- }
- }
-
- template <typename T>
- inline const T* GetValues(int i) const {
- return GetValues<T>(i, offset);
- }
-
- // Like GetValues, but returns NULLPTR instead of aborting if the underlying
- // buffer is not a CPU buffer.
- template <typename T>
- inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
- if (buffers[i] && buffers[i]->is_cpu()) {
- return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
- } else {
- return NULLPTR;
- }
- }
-
- template <typename T>
- inline const T* GetValuesSafe(int i) const {
- return GetValuesSafe<T>(i, offset);
- }
-
- // Access a buffer's data as a typed C pointer
- template <typename T>
- inline T* GetMutableValues(int i, int64_t absolute_offset) {
- if (buffers[i]) {
- return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
- } else {
- return NULLPTR;
- }
- }
-
- template <typename T>
- inline T* GetMutableValues(int i) {
- return GetMutableValues<T>(i, offset);
- }
-
- /// \brief Construct a zero-copy slice of the data with the given offset and length
- std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
-
- /// \brief Input-checking variant of Slice
- ///
- /// An Invalid Status is returned if the requested slice falls out of bounds.
- /// Note that unlike Slice, `length` isn't clamped to the available buffer size.
- Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
-
- void SetNullCount(int64_t v) { null_count.store(v); }
-
- /// \brief Return null count, or compute and set it if it's not known
- int64_t GetNullCount() const;
-
- bool MayHaveNulls() const {
- // If an ArrayData is slightly malformed it may have kUnknownNullCount set
- // but no buffer
- return null_count.load() != 0 && buffers[0] != NULLPTR;
- }
-
- std::shared_ptr<DataType> type;
+ int64_t null_count = kUnknownNullCount,
+ int64_t offset = 0);
+
+ // Move constructor
+ ArrayData(ArrayData&& other) noexcept
+ : type(std::move(other.type)),
+ length(other.length),
+ offset(other.offset),
+ buffers(std::move(other.buffers)),
+ child_data(std::move(other.child_data)),
+ dictionary(std::move(other.dictionary)) {
+ SetNullCount(other.null_count);
+ }
+
+ // Copy constructor
+ ArrayData(const ArrayData& other) noexcept
+ : type(other.type),
+ length(other.length),
+ offset(other.offset),
+ buffers(other.buffers),
+ child_data(other.child_data),
+ dictionary(other.dictionary) {
+ SetNullCount(other.null_count);
+ }
+
+ // Move assignment
+ ArrayData& operator=(ArrayData&& other) {
+ type = std::move(other.type);
+ length = other.length;
+ SetNullCount(other.null_count);
+ offset = other.offset;
+ buffers = std::move(other.buffers);
+ child_data = std::move(other.child_data);
+ dictionary = std::move(other.dictionary);
+ return *this;
+ }
+
+ // Copy assignment
+ ArrayData& operator=(const ArrayData& other) {
+ type = other.type;
+ length = other.length;
+ SetNullCount(other.null_count);
+ offset = other.offset;
+ buffers = other.buffers;
+ child_data = other.child_data;
+ dictionary = other.dictionary;
+ return *this;
+ }
+
+ std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
+
+ // Access a buffer's data as a typed C pointer
+ template <typename T>
+ inline const T* GetValues(int i, int64_t absolute_offset) const {
+ if (buffers[i]) {
+ return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+ } else {
+ return NULLPTR;
+ }
+ }
+
+ template <typename T>
+ inline const T* GetValues(int i) const {
+ return GetValues<T>(i, offset);
+ }
+
+ // Like GetValues, but returns NULLPTR instead of aborting if the underlying
+ // buffer is not a CPU buffer.
+ template <typename T>
+ inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
+ if (buffers[i] && buffers[i]->is_cpu()) {
+ return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+ } else {
+ return NULLPTR;
+ }
+ }
+
+ template <typename T>
+ inline const T* GetValuesSafe(int i) const {
+ return GetValuesSafe<T>(i, offset);
+ }
+
+ // Access a buffer's data as a typed C pointer
+ template <typename T>
+ inline T* GetMutableValues(int i, int64_t absolute_offset) {
+ if (buffers[i]) {
+ return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
+ } else {
+ return NULLPTR;
+ }
+ }
+
+ template <typename T>
+ inline T* GetMutableValues(int i) {
+ return GetMutableValues<T>(i, offset);
+ }
+
+ /// \brief Construct a zero-copy slice of the data with the given offset and length
+ std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
+
+ /// \brief Input-checking variant of Slice
+ ///
+ /// An Invalid Status is returned if the requested slice falls out of bounds.
+ /// Note that unlike Slice, `length` isn't clamped to the available buffer size.
+ Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
+
+ void SetNullCount(int64_t v) { null_count.store(v); }
+
+ /// \brief Return null count, or compute and set it if it's not known
+ int64_t GetNullCount() const;
+
+ bool MayHaveNulls() const {
+ // If an ArrayData is slightly malformed it may have kUnknownNullCount set
+ // but no buffer
+ return null_count.load() != 0 && buffers[0] != NULLPTR;
+ }
+
+ std::shared_ptr<DataType> type;
int64_t length = 0;
mutable std::atomic<int64_t> null_count{0};
- // The logical start point into the physical buffers (in values, not bytes).
- // Note that, for child data, this must be *added* to the child data's own offset.
+ // The logical start point into the physical buffers (in values, not bytes).
+ // Note that, for child data, this must be *added* to the child data's own offset.
int64_t offset = 0;
- std::vector<std::shared_ptr<Buffer>> buffers;
- std::vector<std::shared_ptr<ArrayData>> child_data;
-
- // The dictionary for this Array, if any. Only used for dictionary type
- std::shared_ptr<ArrayData> dictionary;
-};
-
-namespace internal {
-
-/// Construct a zero-copy view of this ArrayData with the given type.
-///
-/// This method checks if the types are layout-compatible.
-/// Nested types are traversed in depth-first order. Data buffers must have
-/// the same item sizes, even though the logical types may be different.
-/// An error is returned if the types are not layout-compatible.
-ARROW_EXPORT
-Result<std::shared_ptr<ArrayData>> GetArrayView(const std::shared_ptr<ArrayData>& data,
- const std::shared_ptr<DataType>& type);
-
-} // namespace internal
-} // namespace arrow
+ std::vector<std::shared_ptr<Buffer>> buffers;
+ std::vector<std::shared_ptr<ArrayData>> child_data;
+
+ // The dictionary for this Array, if any. Only used for dictionary type
+ std::shared_ptr<ArrayData> dictionary;
+};
+
+namespace internal {
+
+/// Construct a zero-copy view of this ArrayData with the given type.
+///
+/// This method checks if the types are layout-compatible.
+/// Nested types are traversed in depth-first order. Data buffers must have
+/// the same item sizes, even though the logical types may be different.
+/// An error is returned if the types are not layout-compatible.
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> GetArrayView(const std::shared_ptr<ArrayData>& data,
+ const std::shared_ptr<DataType>& type);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/dict_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/dict_internal.h
index aa027ac22de..9367346d5a9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/dict_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/dict_internal.h
@@ -1,193 +1,193 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/array/builder_dict.h"
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/string_view.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-namespace internal {
-
-template <typename T, typename Enable = void>
-struct DictionaryTraits {
- using MemoTableType = void;
-};
-
-} // namespace internal
-
-template <typename T, typename Out = void>
-using enable_if_memoize = enable_if_t<
- !std::is_same<typename internal::DictionaryTraits<T>::MemoTableType, void>::value,
- Out>;
-
-template <typename T, typename Out = void>
-using enable_if_no_memoize = enable_if_t<
- std::is_same<typename internal::DictionaryTraits<T>::MemoTableType, void>::value,
- Out>;
-
-namespace internal {
-
-template <>
-struct DictionaryTraits<BooleanType> {
- using T = BooleanType;
- using MemoTableType = typename HashTraits<T>::MemoTableType;
-
- static Status GetDictionaryArrayData(MemoryPool* pool,
- const std::shared_ptr<DataType>& type,
- const MemoTableType& memo_table,
- int64_t start_offset,
- std::shared_ptr<ArrayData>* out) {
- if (start_offset < 0) {
- return Status::Invalid("invalid start_offset ", start_offset);
- }
-
- BooleanBuilder builder(pool);
- const auto& bool_values = memo_table.values();
- const auto null_index = memo_table.GetNull();
-
- // Will iterate up to 3 times.
- for (int64_t i = start_offset; i < memo_table.size(); i++) {
- RETURN_NOT_OK(i == null_index ? builder.AppendNull()
- : builder.Append(bool_values[i]));
- }
-
- return builder.FinishInternal(out);
- }
-}; // namespace internal
-
-template <typename T>
-struct DictionaryTraits<T, enable_if_has_c_type<T>> {
- using c_type = typename T::c_type;
- using MemoTableType = typename HashTraits<T>::MemoTableType;
-
- static Status GetDictionaryArrayData(MemoryPool* pool,
- const std::shared_ptr<DataType>& type,
- const MemoTableType& memo_table,
- int64_t start_offset,
- std::shared_ptr<ArrayData>* out) {
- auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
- // This makes a copy, but we assume a dictionary array is usually small
- // compared to the size of the dictionary-using array.
- // (also, copying the dictionary values is cheap compared to the cost
- // of building the memo table)
- ARROW_ASSIGN_OR_RAISE(
- std::shared_ptr<Buffer> dict_buffer,
- AllocateBuffer(TypeTraits<T>::bytes_required(dict_length), pool));
- memo_table.CopyValues(static_cast<int32_t>(start_offset),
- reinterpret_cast<c_type*>(dict_buffer->mutable_data()));
-
- int64_t null_count = 0;
- std::shared_ptr<Buffer> null_bitmap = nullptr;
- RETURN_NOT_OK(
- ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
-
- *out = ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
- return Status::OK();
- }
-};
-
-template <typename T>
-struct DictionaryTraits<T, enable_if_base_binary<T>> {
- using MemoTableType = typename HashTraits<T>::MemoTableType;
-
- static Status GetDictionaryArrayData(MemoryPool* pool,
- const std::shared_ptr<DataType>& type,
- const MemoTableType& memo_table,
- int64_t start_offset,
- std::shared_ptr<ArrayData>* out) {
- using offset_type = typename T::offset_type;
-
- // Create the offsets buffer
- auto dict_length = static_cast<int64_t>(memo_table.size() - start_offset);
- ARROW_ASSIGN_OR_RAISE(auto dict_offsets,
- AllocateBuffer(sizeof(offset_type) * (dict_length + 1), pool));
- auto raw_offsets = reinterpret_cast<offset_type*>(dict_offsets->mutable_data());
- memo_table.CopyOffsets(static_cast<int32_t>(start_offset), raw_offsets);
-
- // Create the data buffer
- auto values_size = memo_table.values_size();
- ARROW_ASSIGN_OR_RAISE(auto dict_data, AllocateBuffer(values_size, pool));
- if (values_size > 0) {
- memo_table.CopyValues(static_cast<int32_t>(start_offset), dict_data->size(),
- dict_data->mutable_data());
- }
-
- int64_t null_count = 0;
- std::shared_ptr<Buffer> null_bitmap = nullptr;
- RETURN_NOT_OK(
- ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
-
- *out = ArrayData::Make(type, dict_length,
- {null_bitmap, std::move(dict_offsets), std::move(dict_data)},
- null_count);
-
- return Status::OK();
- }
-};
-
-template <typename T>
-struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
- using MemoTableType = typename HashTraits<T>::MemoTableType;
-
- static Status GetDictionaryArrayData(MemoryPool* pool,
- const std::shared_ptr<DataType>& type,
- const MemoTableType& memo_table,
- int64_t start_offset,
- std::shared_ptr<ArrayData>* out) {
- const T& concrete_type = internal::checked_cast<const T&>(*type);
-
- // Create the data buffer
- auto dict_length = static_cast<int64_t>(memo_table.size() - start_offset);
- auto width_length = concrete_type.byte_width();
- auto data_length = dict_length * width_length;
- ARROW_ASSIGN_OR_RAISE(auto dict_data, AllocateBuffer(data_length, pool));
- auto data = dict_data->mutable_data();
-
- memo_table.CopyFixedWidthValues(static_cast<int32_t>(start_offset), width_length,
- data_length, data);
-
- int64_t null_count = 0;
- std::shared_ptr<Buffer> null_bitmap = nullptr;
- RETURN_NOT_OK(
- ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
-
- *out = ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
- null_count);
- return Status::OK();
- }
-};
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/array/builder_dict.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename T, typename Enable = void>
+struct DictionaryTraits {
+ using MemoTableType = void;
+};
+
+} // namespace internal
+
+template <typename T, typename Out = void>
+using enable_if_memoize = enable_if_t<
+ !std::is_same<typename internal::DictionaryTraits<T>::MemoTableType, void>::value,
+ Out>;
+
+template <typename T, typename Out = void>
+using enable_if_no_memoize = enable_if_t<
+ std::is_same<typename internal::DictionaryTraits<T>::MemoTableType, void>::value,
+ Out>;
+
+namespace internal {
+
+template <>
+struct DictionaryTraits<BooleanType> {
+ using T = BooleanType;
+ using MemoTableType = typename HashTraits<T>::MemoTableType;
+
+ static Status GetDictionaryArrayData(MemoryPool* pool,
+ const std::shared_ptr<DataType>& type,
+ const MemoTableType& memo_table,
+ int64_t start_offset,
+ std::shared_ptr<ArrayData>* out) {
+ if (start_offset < 0) {
+ return Status::Invalid("invalid start_offset ", start_offset);
+ }
+
+ BooleanBuilder builder(pool);
+ const auto& bool_values = memo_table.values();
+ const auto null_index = memo_table.GetNull();
+
+ // Will iterate up to 3 times.
+ for (int64_t i = start_offset; i < memo_table.size(); i++) {
+ RETURN_NOT_OK(i == null_index ? builder.AppendNull()
+ : builder.Append(bool_values[i]));
+ }
+
+ return builder.FinishInternal(out);
+ }
+}; // namespace internal
+
+template <typename T>
+struct DictionaryTraits<T, enable_if_has_c_type<T>> {
+ using c_type = typename T::c_type;
+ using MemoTableType = typename HashTraits<T>::MemoTableType;
+
+ static Status GetDictionaryArrayData(MemoryPool* pool,
+ const std::shared_ptr<DataType>& type,
+ const MemoTableType& memo_table,
+ int64_t start_offset,
+ std::shared_ptr<ArrayData>* out) {
+ auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
+ // This makes a copy, but we assume a dictionary array is usually small
+ // compared to the size of the dictionary-using array.
+ // (also, copying the dictionary values is cheap compared to the cost
+ // of building the memo table)
+ ARROW_ASSIGN_OR_RAISE(
+ std::shared_ptr<Buffer> dict_buffer,
+ AllocateBuffer(TypeTraits<T>::bytes_required(dict_length), pool));
+ memo_table.CopyValues(static_cast<int32_t>(start_offset),
+ reinterpret_cast<c_type*>(dict_buffer->mutable_data()));
+
+ int64_t null_count = 0;
+ std::shared_ptr<Buffer> null_bitmap = nullptr;
+ RETURN_NOT_OK(
+ ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
+
+ *out = ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
+ return Status::OK();
+ }
+};
+
+template <typename T>
+struct DictionaryTraits<T, enable_if_base_binary<T>> {
+ using MemoTableType = typename HashTraits<T>::MemoTableType;
+
+ static Status GetDictionaryArrayData(MemoryPool* pool,
+ const std::shared_ptr<DataType>& type,
+ const MemoTableType& memo_table,
+ int64_t start_offset,
+ std::shared_ptr<ArrayData>* out) {
+ using offset_type = typename T::offset_type;
+
+ // Create the offsets buffer
+ auto dict_length = static_cast<int64_t>(memo_table.size() - start_offset);
+ ARROW_ASSIGN_OR_RAISE(auto dict_offsets,
+ AllocateBuffer(sizeof(offset_type) * (dict_length + 1), pool));
+ auto raw_offsets = reinterpret_cast<offset_type*>(dict_offsets->mutable_data());
+ memo_table.CopyOffsets(static_cast<int32_t>(start_offset), raw_offsets);
+
+ // Create the data buffer
+ auto values_size = memo_table.values_size();
+ ARROW_ASSIGN_OR_RAISE(auto dict_data, AllocateBuffer(values_size, pool));
+ if (values_size > 0) {
+ memo_table.CopyValues(static_cast<int32_t>(start_offset), dict_data->size(),
+ dict_data->mutable_data());
+ }
+
+ int64_t null_count = 0;
+ std::shared_ptr<Buffer> null_bitmap = nullptr;
+ RETURN_NOT_OK(
+ ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
+
+ *out = ArrayData::Make(type, dict_length,
+ {null_bitmap, std::move(dict_offsets), std::move(dict_data)},
+ null_count);
+
+ return Status::OK();
+ }
+};
+
+template <typename T>
+struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
+ using MemoTableType = typename HashTraits<T>::MemoTableType;
+
+ static Status GetDictionaryArrayData(MemoryPool* pool,
+ const std::shared_ptr<DataType>& type,
+ const MemoTableType& memo_table,
+ int64_t start_offset,
+ std::shared_ptr<ArrayData>* out) {
+ const T& concrete_type = internal::checked_cast<const T&>(*type);
+
+ // Create the data buffer
+ auto dict_length = static_cast<int64_t>(memo_table.size() - start_offset);
+ auto width_length = concrete_type.byte_width();
+ auto data_length = dict_length * width_length;
+ ARROW_ASSIGN_OR_RAISE(auto dict_data, AllocateBuffer(data_length, pool));
+ auto data = dict_data->mutable_data();
+
+ memo_table.CopyFixedWidthValues(static_cast<int32_t>(start_offset), width_length,
+ data_length, data);
+
+ int64_t null_count = 0;
+ std::shared_ptr<Buffer> null_bitmap = nullptr;
+ RETURN_NOT_OK(
+ ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));
+
+ *out = ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
+ null_count);
+ return Status::OK();
+ }
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.cc
index a94ca178a40..0145e847eb6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.cc
@@ -1,784 +1,784 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/diff.h"
-
-#include <algorithm>
-#include <chrono>
-#include <functional>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_decimal.h"
-#include "arrow/array/array_nested.h"
-#include "arrow/array/array_primitive.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/extension_type.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/range.h"
-#include "arrow/util/string.h"
-#include "arrow/util/string_view.h"
-#include "arrow/vendored/datetime.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-using internal::MakeLazyRange;
-
-template <typename ArrayType>
-auto GetView(const ArrayType& array, int64_t index) -> decltype(array.GetView(index)) {
- return array.GetView(index);
-}
-
-struct Slice {
- const Array* array_;
- int64_t offset_, length_;
-
- bool operator==(const Slice& other) const {
- return length_ == other.length_ &&
- array_->RangeEquals(offset_, offset_ + length_, other.offset_, *other.array_);
- }
- bool operator!=(const Slice& other) const { return !(*this == other); }
-};
-
-template <typename ArrayType, typename T = typename ArrayType::TypeClass,
- typename = enable_if_list_like<T>>
-static Slice GetView(const ArrayType& array, int64_t index) {
- return Slice{array.values().get(), array.value_offset(index),
- array.value_length(index)};
-}
-
-struct UnitSlice {
- const Array* array_;
- int64_t offset_;
-
- bool operator==(const UnitSlice& other) const {
- return array_->RangeEquals(offset_, offset_ + 1, other.offset_, *other.array_);
- }
- bool operator!=(const UnitSlice& other) const { return !(*this == other); }
-};
-
-// FIXME(bkietz) this is inefficient;
-// StructArray's fields can be diffed independently then merged
-static UnitSlice GetView(const StructArray& array, int64_t index) {
- return UnitSlice{&array, index};
-}
-
-static UnitSlice GetView(const UnionArray& array, int64_t index) {
- return UnitSlice{&array, index};
-}
-
-using ValueComparator = std::function<bool(const Array&, int64_t, const Array&, int64_t)>;
-
-struct ValueComparatorVisitor {
- template <typename T>
- Status Visit(const T&) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- out = [](const Array& base, int64_t base_index, const Array& target,
- int64_t target_index) {
- return (GetView(checked_cast<const ArrayType&>(base), base_index) ==
- GetView(checked_cast<const ArrayType&>(target), target_index));
- };
- return Status::OK();
- }
-
- Status Visit(const NullType&) { return Status::NotImplemented("null type"); }
-
- Status Visit(const ExtensionType&) { return Status::NotImplemented("extension type"); }
-
- Status Visit(const DictionaryType&) {
- return Status::NotImplemented("dictionary type");
- }
-
- ValueComparator Create(const DataType& type) {
- DCHECK_OK(VisitTypeInline(type, this));
- return out;
- }
-
- ValueComparator out;
-};
-
-ValueComparator GetValueComparator(const DataType& type) {
- ValueComparatorVisitor type_visitor;
- return type_visitor.Create(type);
-}
-
-// represents an intermediate state in the comparison of two arrays
-struct EditPoint {
- int64_t base, target;
- bool operator==(EditPoint other) const {
- return base == other.base && target == other.target;
- }
-};
-
-/// A generic sequence difference algorithm, based on
-///
-/// E. W. Myers, "An O(ND) difference algorithm and its variations,"
-/// Algorithmica, vol. 1, no. 1-4, pp. 251–266, 1986.
-///
-/// To summarize, an edit script is computed by maintaining the furthest set of EditPoints
-/// which are reachable in a given number of edits D. This is used to compute the furthest
-/// set reachable with D+1 edits, and the process continues inductively until a complete
-/// edit script is discovered.
-///
-/// From each edit point a single deletion and insertion is made then as many shared
-/// elements as possible are skipped, recording only the endpoint of the run. This
-/// representation is minimal in the common case where the sequences differ only slightly,
-/// since most of the elements are shared between base and target and are represented
-/// implicitly.
-class QuadraticSpaceMyersDiff {
- public:
- QuadraticSpaceMyersDiff(const Array& base, const Array& target, MemoryPool* pool)
- : base_(base),
- target_(target),
- pool_(pool),
- value_comparator_(GetValueComparator(*base.type())),
- base_begin_(0),
- base_end_(base.length()),
- target_begin_(0),
- target_end_(target.length()),
- endpoint_base_({ExtendFrom({base_begin_, target_begin_}).base}),
- insert_({true}) {
- if ((base_end_ - base_begin_ == target_end_ - target_begin_) &&
- endpoint_base_[0] == base_end_) {
- // trivial case: base == target
- finish_index_ = 0;
- }
- }
-
- bool ValuesEqual(int64_t base_index, int64_t target_index) const {
- bool base_null = base_.IsNull(base_index);
- bool target_null = target_.IsNull(target_index);
- if (base_null || target_null) {
- // If only one is null, then this is false, otherwise true
- return base_null && target_null;
- }
- return value_comparator_(base_, base_index, target_, target_index);
- }
-
- // increment the position within base (the element pointed to was deleted)
- // then extend maximally
- EditPoint DeleteOne(EditPoint p) const {
- if (p.base != base_end_) {
- ++p.base;
- }
- return ExtendFrom(p);
- }
-
- // increment the position within target (the element pointed to was inserted)
- // then extend maximally
- EditPoint InsertOne(EditPoint p) const {
- if (p.target != target_end_) {
- ++p.target;
- }
- return ExtendFrom(p);
- }
-
- // increment the position within base and target (the elements skipped in this way were
- // present in both sequences)
- EditPoint ExtendFrom(EditPoint p) const {
- for (; p.base != base_end_ && p.target != target_end_; ++p.base, ++p.target) {
- if (!ValuesEqual(p.base, p.target)) {
- break;
- }
- }
- return p;
- }
-
- // beginning of a range for storing per-edit state in endpoint_base_ and insert_
- int64_t StorageOffset(int64_t edit_count) const {
- return edit_count * (edit_count + 1) / 2;
- }
-
- // given edit_count and index, augment endpoint_base_[index] with the corresponding
- // position in target (which is only implicitly represented in edit_count, index)
- EditPoint GetEditPoint(int64_t edit_count, int64_t index) const {
- DCHECK_GE(index, StorageOffset(edit_count));
- DCHECK_LT(index, StorageOffset(edit_count + 1));
- auto insertions_minus_deletions =
- 2 * (index - StorageOffset(edit_count)) - edit_count;
- auto maximal_base = endpoint_base_[index];
- auto maximal_target = std::min(
- target_begin_ + ((maximal_base - base_begin_) + insertions_minus_deletions),
- target_end_);
- return {maximal_base, maximal_target};
- }
-
- void Next() {
- ++edit_count_;
- // base_begin_ is used as a dummy value here since Iterator may not be default
- // constructible. The newly allocated range is completely overwritten below.
- endpoint_base_.resize(StorageOffset(edit_count_ + 1), base_begin_);
- insert_.resize(StorageOffset(edit_count_ + 1), false);
-
- auto previous_offset = StorageOffset(edit_count_ - 1);
- auto current_offset = StorageOffset(edit_count_);
-
- // try deleting from base first
- for (int64_t i = 0, i_out = 0; i < edit_count_; ++i, ++i_out) {
- auto previous_endpoint = GetEditPoint(edit_count_ - 1, i + previous_offset);
- endpoint_base_[i_out + current_offset] = DeleteOne(previous_endpoint).base;
- }
-
- // check if inserting from target could do better
- for (int64_t i = 0, i_out = 1; i < edit_count_; ++i, ++i_out) {
- // retrieve the previously computed best endpoint for (edit_count_, i_out)
- // for comparison with the best endpoint achievable with an insertion
- auto endpoint_after_deletion = GetEditPoint(edit_count_, i_out + current_offset);
-
- auto previous_endpoint = GetEditPoint(edit_count_ - 1, i + previous_offset);
- auto endpoint_after_insertion = InsertOne(previous_endpoint);
-
- if (endpoint_after_insertion.base - endpoint_after_deletion.base >= 0) {
- // insertion was more efficient; keep it and mark the insertion in insert_
- insert_[i_out + current_offset] = true;
- endpoint_base_[i_out + current_offset] = endpoint_after_insertion.base;
- }
- }
-
- // check for completion
- EditPoint finish = {base_end_, target_end_};
- for (int64_t i_out = 0; i_out < edit_count_ + 1; ++i_out) {
- if (GetEditPoint(edit_count_, i_out + current_offset) == finish) {
- finish_index_ = i_out + current_offset;
- return;
- }
- }
- }
-
- bool Done() { return finish_index_ != -1; }
-
- Result<std::shared_ptr<StructArray>> GetEdits(MemoryPool* pool) {
- DCHECK(Done());
-
- int64_t length = edit_count_ + 1;
- ARROW_ASSIGN_OR_RAISE(auto insert_buf, AllocateEmptyBitmap(length, pool));
- ARROW_ASSIGN_OR_RAISE(auto run_length_buf,
- AllocateBuffer(length * sizeof(int64_t), pool));
- auto run_length = reinterpret_cast<int64_t*>(run_length_buf->mutable_data());
-
- auto index = finish_index_;
- auto endpoint = GetEditPoint(edit_count_, finish_index_);
-
- for (int64_t i = edit_count_; i > 0; --i) {
- bool insert = insert_[index];
- BitUtil::SetBitTo(insert_buf->mutable_data(), i, insert);
-
- auto insertions_minus_deletions =
- (endpoint.base - base_begin_) - (endpoint.target - target_begin_);
- if (insert) {
- ++insertions_minus_deletions;
- } else {
- --insertions_minus_deletions;
- }
- index = (i - 1 - insertions_minus_deletions) / 2 + StorageOffset(i - 1);
-
- // endpoint of previous edit
- auto previous = GetEditPoint(i - 1, index);
- run_length[i] = endpoint.base - previous.base - !insert;
- DCHECK_GE(run_length[i], 0);
-
- endpoint = previous;
- }
- BitUtil::SetBitTo(insert_buf->mutable_data(), 0, false);
- run_length[0] = endpoint.base - base_begin_;
-
- return StructArray::Make(
- {std::make_shared<BooleanArray>(length, std::move(insert_buf)),
- std::make_shared<Int64Array>(length, std::move(run_length_buf))},
- {field("insert", boolean()), field("run_length", int64())});
- }
-
- Result<std::shared_ptr<StructArray>> Diff() {
- while (!Done()) {
- Next();
- }
- return GetEdits(pool_);
- }
-
- private:
- const Array& base_;
- const Array& target_;
- MemoryPool* pool_;
- ValueComparator value_comparator_;
- int64_t finish_index_ = -1;
- int64_t edit_count_ = 0;
- int64_t base_begin_, base_end_;
- int64_t target_begin_, target_end_;
- // each element of endpoint_base_ is the furthest position in base reachable given an
- // edit_count and (# insertions) - (# deletions). Each bit of insert_ records whether
- // the corresponding furthest position was reached via an insertion or a deletion
- // (followed by a run of shared elements). See StorageOffset for the
- // layout of these vectors
- std::vector<int64_t> endpoint_base_;
- std::vector<bool> insert_;
-};
-
-Result<std::shared_ptr<StructArray>> NullDiff(const Array& base, const Array& target,
- MemoryPool* pool) {
- bool insert = base.length() < target.length();
- auto run_length = std::min(base.length(), target.length());
- auto edit_count = std::max(base.length(), target.length()) - run_length;
-
- TypedBufferBuilder<bool> insert_builder(pool);
- RETURN_NOT_OK(insert_builder.Resize(edit_count + 1));
- insert_builder.UnsafeAppend(false);
- TypedBufferBuilder<int64_t> run_length_builder(pool);
- RETURN_NOT_OK(run_length_builder.Resize(edit_count + 1));
- run_length_builder.UnsafeAppend(run_length);
- if (edit_count > 0) {
- insert_builder.UnsafeAppend(edit_count, insert);
- run_length_builder.UnsafeAppend(edit_count, 0);
- }
-
- std::shared_ptr<Buffer> insert_buf, run_length_buf;
- RETURN_NOT_OK(insert_builder.Finish(&insert_buf));
- RETURN_NOT_OK(run_length_builder.Finish(&run_length_buf));
-
- return StructArray::Make({std::make_shared<BooleanArray>(edit_count + 1, insert_buf),
- std::make_shared<Int64Array>(edit_count + 1, run_length_buf)},
- {field("insert", boolean()), field("run_length", int64())});
-}
-
-Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
- MemoryPool* pool) {
- if (!base.type()->Equals(target.type())) {
- return Status::TypeError("only taking the diff of like-typed arrays is supported.");
- }
-
- if (base.type()->id() == Type::NA) {
- return NullDiff(base, target, pool);
- } else if (base.type()->id() == Type::EXTENSION) {
- auto base_storage = checked_cast<const ExtensionArray&>(base).storage();
- auto target_storage = checked_cast<const ExtensionArray&>(target).storage();
- return Diff(*base_storage, *target_storage, pool);
- } else if (base.type()->id() == Type::DICTIONARY) {
- return Status::NotImplemented("diffing arrays of type ", *base.type());
- } else {
- return QuadraticSpaceMyersDiff(base, target, pool).Diff();
- }
-}
-
-using Formatter = std::function<void(const Array&, int64_t index, std::ostream*)>;
-
-static Result<Formatter> MakeFormatter(const DataType& type);
-
-class MakeFormatterImpl {
- public:
- Result<Formatter> Make(const DataType& type) && {
- RETURN_NOT_OK(VisitTypeInline(type, this));
- return std::move(impl_);
- }
-
- private:
- template <typename VISITOR>
- friend Status VisitTypeInline(const DataType&, VISITOR*);
-
- // factory implementation
- Status Visit(const BooleanType&) {
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- *os << (checked_cast<const BooleanArray&>(array).Value(index) ? "true" : "false");
- };
- return Status::OK();
- }
-
- // format Numerics with std::ostream defaults
- template <typename T>
- enable_if_number<T, Status> Visit(const T&) {
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- const auto& numeric = checked_cast<const NumericArray<T>&>(array);
- if (sizeof(decltype(numeric.Value(index))) == sizeof(char)) {
- // override std::ostream defaults for /(u|)int8_t/ since they are
- // formatted as potentially unprintable/tty borking characters
- *os << static_cast<int16_t>(numeric.Value(index));
- } else {
- *os << numeric.Value(index);
- }
- };
- return Status::OK();
- }
-
- template <typename T>
- enable_if_date<T, Status> Visit(const T&) {
- using unit = typename std::conditional<std::is_same<T, Date32Type>::value,
- arrow_vendored::date::days,
- std::chrono::milliseconds>::type;
-
- static arrow_vendored::date::sys_days epoch{arrow_vendored::date::jan / 1 / 1970};
-
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- unit value(checked_cast<const NumericArray<T>&>(array).Value(index));
- *os << arrow_vendored::date::format("%F", value + epoch);
- };
- return Status::OK();
- }
-
- template <typename T>
- enable_if_time<T, Status> Visit(const T&) {
- impl_ = MakeTimeFormatter<T, false>("%T");
- return Status::OK();
- }
-
- Status Visit(const TimestampType&) {
- impl_ = MakeTimeFormatter<TimestampType, true>("%F %T");
- return Status::OK();
- }
-
- Status Visit(const DayTimeIntervalType&) {
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- auto day_millis = checked_cast<const DayTimeIntervalArray&>(array).Value(index);
- *os << day_millis.days << "d" << day_millis.milliseconds << "ms";
- };
- return Status::OK();
- }
-
- // format Binary, LargeBinary and FixedSizeBinary in hexadecimal
- template <typename T>
- enable_if_binary_like<T, Status> Visit(const T&) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- *os << HexEncode(checked_cast<const ArrayType&>(array).GetView(index));
- };
- return Status::OK();
- }
-
- // format Strings with \"\n\r\t\\ escaped
- template <typename T>
- enable_if_string_like<T, Status> Visit(const T&) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- *os << "\"" << Escape(checked_cast<const ArrayType&>(array).GetView(index)) << "\"";
- };
- return Status::OK();
- }
-
- // format Decimals with Decimal128Array::FormatValue
- Status Visit(const Decimal128Type&) {
- impl_ = [](const Array& array, int64_t index, std::ostream* os) {
- *os << checked_cast<const Decimal128Array&>(array).FormatValue(index);
- };
- return Status::OK();
- }
-
- template <typename T>
- enable_if_list_like<T, Status> Visit(const T& t) {
- struct ListImpl {
- explicit ListImpl(Formatter f) : values_formatter_(std::move(f)) {}
-
- void operator()(const Array& array, int64_t index, std::ostream* os) {
- const auto& list_array =
- checked_cast<const typename TypeTraits<T>::ArrayType&>(array);
- *os << "[";
- for (int32_t i = 0; i < list_array.value_length(index); ++i) {
- if (i != 0) {
- *os << ", ";
- }
- values_formatter_(*list_array.values(), i + list_array.value_offset(index), os);
- }
- *os << "]";
- }
-
- Formatter values_formatter_;
- };
-
- ARROW_ASSIGN_OR_RAISE(auto values_formatter, MakeFormatter(*t.value_type()));
- impl_ = ListImpl(std::move(values_formatter));
- return Status::OK();
- }
-
- // TODO(bkietz) format maps better
-
- Status Visit(const StructType& t) {
- struct StructImpl {
- explicit StructImpl(std::vector<Formatter> f) : field_formatters_(std::move(f)) {}
-
- void operator()(const Array& array, int64_t index, std::ostream* os) {
- const auto& struct_array = checked_cast<const StructArray&>(array);
- *os << "{";
- for (int i = 0, printed = 0; i < struct_array.num_fields(); ++i) {
- if (printed != 0) {
- *os << ", ";
- }
- if (struct_array.field(i)->IsNull(index)) {
- continue;
- }
- ++printed;
- *os << struct_array.struct_type()->field(i)->name() << ": ";
- field_formatters_[i](*struct_array.field(i), index, os);
- }
- *os << "}";
- }
-
- std::vector<Formatter> field_formatters_;
- };
-
- std::vector<Formatter> field_formatters(t.num_fields());
- for (int i = 0; i < t.num_fields(); ++i) {
- ARROW_ASSIGN_OR_RAISE(field_formatters[i], MakeFormatter(*t.field(i)->type()));
- }
-
- impl_ = StructImpl(std::move(field_formatters));
- return Status::OK();
- }
-
- Status Visit(const UnionType& t) {
- struct UnionImpl {
- explicit UnionImpl(std::vector<Formatter> f) : field_formatters_(std::move(f)) {}
-
- void DoFormat(const UnionArray& array, int64_t index, int64_t child_index,
- std::ostream* os) {
- auto type_code = array.raw_type_codes()[index];
- auto child = array.field(array.child_id(index));
-
- *os << "{" << static_cast<int16_t>(type_code) << ": ";
- if (child->IsNull(child_index)) {
- *os << "null";
- } else {
- field_formatters_[type_code](*child, child_index, os);
- }
- *os << "}";
- }
-
- std::vector<Formatter> field_formatters_;
- };
-
- struct SparseImpl : UnionImpl {
- using UnionImpl::UnionImpl;
-
- void operator()(const Array& array, int64_t index, std::ostream* os) {
- const auto& union_array = checked_cast<const SparseUnionArray&>(array);
- DoFormat(union_array, index, index, os);
- }
- };
-
- struct DenseImpl : UnionImpl {
- using UnionImpl::UnionImpl;
-
- void operator()(const Array& array, int64_t index, std::ostream* os) {
- const auto& union_array = checked_cast<const DenseUnionArray&>(array);
- DoFormat(union_array, index, union_array.raw_value_offsets()[index], os);
- }
- };
-
- std::vector<Formatter> field_formatters(t.max_type_code() + 1);
- for (int i = 0; i < t.num_fields(); ++i) {
- auto type_id = t.type_codes()[i];
- ARROW_ASSIGN_OR_RAISE(field_formatters[type_id],
- MakeFormatter(*t.field(i)->type()));
- }
-
- if (t.mode() == UnionMode::SPARSE) {
- impl_ = SparseImpl(std::move(field_formatters));
- } else {
- impl_ = DenseImpl(std::move(field_formatters));
- }
- return Status::OK();
- }
-
- Status Visit(const NullType& t) {
- return Status::NotImplemented("formatting diffs between arrays of type ", t);
- }
-
- Status Visit(const DictionaryType& t) {
- return Status::NotImplemented("formatting diffs between arrays of type ", t);
- }
-
- Status Visit(const ExtensionType& t) {
- return Status::NotImplemented("formatting diffs between arrays of type ", t);
- }
-
- Status Visit(const DurationType& t) {
- return Status::NotImplemented("formatting diffs between arrays of type ", t);
- }
-
- Status Visit(const MonthIntervalType& t) {
- return Status::NotImplemented("formatting diffs between arrays of type ", t);
- }
-
- template <typename T, bool AddEpoch>
- Formatter MakeTimeFormatter(const std::string& fmt_str) {
- return [fmt_str](const Array& array, int64_t index, std::ostream* os) {
- auto fmt = fmt_str.c_str();
- auto unit = checked_cast<const T&>(*array.type()).unit();
- auto value = checked_cast<const NumericArray<T>&>(array).Value(index);
- using arrow_vendored::date::format;
- using std::chrono::nanoseconds;
- using std::chrono::microseconds;
- using std::chrono::milliseconds;
- using std::chrono::seconds;
- if (AddEpoch) {
- static arrow_vendored::date::sys_days epoch{arrow_vendored::date::jan / 1 / 1970};
-
- switch (unit) {
- case TimeUnit::NANO:
- *os << format(fmt, static_cast<nanoseconds>(value) + epoch);
- break;
- case TimeUnit::MICRO:
- *os << format(fmt, static_cast<microseconds>(value) + epoch);
- break;
- case TimeUnit::MILLI:
- *os << format(fmt, static_cast<milliseconds>(value) + epoch);
- break;
- case TimeUnit::SECOND:
- *os << format(fmt, static_cast<seconds>(value) + epoch);
- break;
- }
- return;
- }
- switch (unit) {
- case TimeUnit::NANO:
- *os << format(fmt, static_cast<nanoseconds>(value));
- break;
- case TimeUnit::MICRO:
- *os << format(fmt, static_cast<microseconds>(value));
- break;
- case TimeUnit::MILLI:
- *os << format(fmt, static_cast<milliseconds>(value));
- break;
- case TimeUnit::SECOND:
- *os << format(fmt, static_cast<seconds>(value));
- break;
- }
- };
- }
-
- Formatter impl_;
-};
-
-static Result<Formatter> MakeFormatter(const DataType& type) {
- return MakeFormatterImpl{}.Make(type);
-}
-
-Status VisitEditScript(
- const Array& edits,
- const std::function<Status(int64_t delete_begin, int64_t delete_end,
- int64_t insert_begin, int64_t insert_end)>& visitor) {
- static const auto edits_type =
- struct_({field("insert", boolean()), field("run_length", int64())});
- DCHECK(edits.type()->Equals(*edits_type));
- DCHECK_GE(edits.length(), 1);
-
- auto insert = checked_pointer_cast<BooleanArray>(
- checked_cast<const StructArray&>(edits).field(0));
- auto run_lengths =
- checked_pointer_cast<Int64Array>(checked_cast<const StructArray&>(edits).field(1));
-
- DCHECK(!insert->Value(0));
-
- auto length = run_lengths->Value(0);
- int64_t base_begin, base_end, target_begin, target_end;
- base_begin = base_end = target_begin = target_end = length;
- for (int64_t i = 1; i < edits.length(); ++i) {
- if (insert->Value(i)) {
- ++target_end;
- } else {
- ++base_end;
- }
- length = run_lengths->Value(i);
- if (length != 0) {
- RETURN_NOT_OK(visitor(base_begin, base_end, target_begin, target_end));
- base_begin = base_end = base_end + length;
- target_begin = target_end = target_end + length;
- }
- }
- if (length == 0) {
- return visitor(base_begin, base_end, target_begin, target_end);
- }
- return Status::OK();
-}
-
-class UnifiedDiffFormatter {
- public:
- UnifiedDiffFormatter(std::ostream* os, Formatter formatter)
- : os_(os), formatter_(std::move(formatter)) {}
-
- Status operator()(int64_t delete_begin, int64_t delete_end, int64_t insert_begin,
- int64_t insert_end) {
- *os_ << "@@ -" << delete_begin << ", +" << insert_begin << " @@" << std::endl;
-
- for (int64_t i = delete_begin; i < delete_end; ++i) {
- *os_ << "-";
- if (base_->IsValid(i)) {
- formatter_(*base_, i, &*os_);
- } else {
- *os_ << "null";
- }
- *os_ << std::endl;
- }
-
- for (int64_t i = insert_begin; i < insert_end; ++i) {
- *os_ << "+";
- if (target_->IsValid(i)) {
- formatter_(*target_, i, &*os_);
- } else {
- *os_ << "null";
- }
- *os_ << std::endl;
- }
-
- return Status::OK();
- }
-
- Status operator()(const Array& edits, const Array& base, const Array& target) {
- if (edits.length() == 1) {
- return Status::OK();
- }
- base_ = &base;
- target_ = &target;
- *os_ << std::endl;
- return VisitEditScript(edits, *this);
- }
-
- private:
- std::ostream* os_ = nullptr;
- const Array* base_ = nullptr;
- const Array* target_ = nullptr;
- Formatter formatter_;
-};
-
-Result<std::function<Status(const Array& edits, const Array& base, const Array& target)>>
-MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os) {
- if (type.id() == Type::NA) {
- return [os](const Array& edits, const Array& base, const Array& target) {
- if (base.length() != target.length()) {
- *os << "# Null arrays differed" << std::endl
- << "-" << base.length() << " nulls" << std::endl
- << "+" << target.length() << " nulls" << std::endl;
- }
- return Status::OK();
- };
- }
-
- ARROW_ASSIGN_OR_RAISE(auto formatter, MakeFormatter(type));
- return UnifiedDiffFormatter(os, std::move(formatter));
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/diff.h"
+
+#include <algorithm>
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/extension_type.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/range.h"
+#include "arrow/util/string.h"
+#include "arrow/util/string_view.h"
+#include "arrow/vendored/datetime.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+using internal::MakeLazyRange;
+
+template <typename ArrayType>
+auto GetView(const ArrayType& array, int64_t index) -> decltype(array.GetView(index)) {
+ return array.GetView(index);
+}
+
+struct Slice {
+ const Array* array_;
+ int64_t offset_, length_;
+
+ bool operator==(const Slice& other) const {
+ return length_ == other.length_ &&
+ array_->RangeEquals(offset_, offset_ + length_, other.offset_, *other.array_);
+ }
+ bool operator!=(const Slice& other) const { return !(*this == other); }
+};
+
+template <typename ArrayType, typename T = typename ArrayType::TypeClass,
+ typename = enable_if_list_like<T>>
+static Slice GetView(const ArrayType& array, int64_t index) {
+ return Slice{array.values().get(), array.value_offset(index),
+ array.value_length(index)};
+}
+
+struct UnitSlice {
+ const Array* array_;
+ int64_t offset_;
+
+ bool operator==(const UnitSlice& other) const {
+ return array_->RangeEquals(offset_, offset_ + 1, other.offset_, *other.array_);
+ }
+ bool operator!=(const UnitSlice& other) const { return !(*this == other); }
+};
+
+// FIXME(bkietz) this is inefficient;
+// StructArray's fields can be diffed independently then merged
+static UnitSlice GetView(const StructArray& array, int64_t index) {
+ return UnitSlice{&array, index};
+}
+
+static UnitSlice GetView(const UnionArray& array, int64_t index) {
+ return UnitSlice{&array, index};
+}
+
+using ValueComparator = std::function<bool(const Array&, int64_t, const Array&, int64_t)>;
+
+struct ValueComparatorVisitor {
+ template <typename T>
+ Status Visit(const T&) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ out = [](const Array& base, int64_t base_index, const Array& target,
+ int64_t target_index) {
+ return (GetView(checked_cast<const ArrayType&>(base), base_index) ==
+ GetView(checked_cast<const ArrayType&>(target), target_index));
+ };
+ return Status::OK();
+ }
+
+ Status Visit(const NullType&) { return Status::NotImplemented("null type"); }
+
+ Status Visit(const ExtensionType&) { return Status::NotImplemented("extension type"); }
+
+ Status Visit(const DictionaryType&) {
+ return Status::NotImplemented("dictionary type");
+ }
+
+ ValueComparator Create(const DataType& type) {
+ DCHECK_OK(VisitTypeInline(type, this));
+ return out;
+ }
+
+ ValueComparator out;
+};
+
+ValueComparator GetValueComparator(const DataType& type) {
+ ValueComparatorVisitor type_visitor;
+ return type_visitor.Create(type);
+}
+
+// represents an intermediate state in the comparison of two arrays
+struct EditPoint {
+ int64_t base, target;
+ bool operator==(EditPoint other) const {
+ return base == other.base && target == other.target;
+ }
+};
+
+/// A generic sequence difference algorithm, based on
+///
+/// E. W. Myers, "An O(ND) difference algorithm and its variations,"
+/// Algorithmica, vol. 1, no. 1-4, pp. 251–266, 1986.
+///
+/// To summarize, an edit script is computed by maintaining the furthest set of EditPoints
+/// which are reachable in a given number of edits D. This is used to compute the furthest
+/// set reachable with D+1 edits, and the process continues inductively until a complete
+/// edit script is discovered.
+///
+/// From each edit point a single deletion and insertion is made then as many shared
+/// elements as possible are skipped, recording only the endpoint of the run. This
+/// representation is minimal in the common case where the sequences differ only slightly,
+/// since most of the elements are shared between base and target and are represented
+/// implicitly.
+class QuadraticSpaceMyersDiff {
+ public:
+ QuadraticSpaceMyersDiff(const Array& base, const Array& target, MemoryPool* pool)
+ : base_(base),
+ target_(target),
+ pool_(pool),
+ value_comparator_(GetValueComparator(*base.type())),
+ base_begin_(0),
+ base_end_(base.length()),
+ target_begin_(0),
+ target_end_(target.length()),
+ endpoint_base_({ExtendFrom({base_begin_, target_begin_}).base}),
+ insert_({true}) {
+ if ((base_end_ - base_begin_ == target_end_ - target_begin_) &&
+ endpoint_base_[0] == base_end_) {
+ // trivial case: base == target
+ finish_index_ = 0;
+ }
+ }
+
+ bool ValuesEqual(int64_t base_index, int64_t target_index) const {
+ bool base_null = base_.IsNull(base_index);
+ bool target_null = target_.IsNull(target_index);
+ if (base_null || target_null) {
+ // If only one is null, then this is false, otherwise true
+ return base_null && target_null;
+ }
+ return value_comparator_(base_, base_index, target_, target_index);
+ }
+
+ // increment the position within base (the element pointed to was deleted)
+ // then extend maximally
+ EditPoint DeleteOne(EditPoint p) const {
+ if (p.base != base_end_) {
+ ++p.base;
+ }
+ return ExtendFrom(p);
+ }
+
+ // increment the position within target (the element pointed to was inserted)
+ // then extend maximally
+ EditPoint InsertOne(EditPoint p) const {
+ if (p.target != target_end_) {
+ ++p.target;
+ }
+ return ExtendFrom(p);
+ }
+
+ // increment the position within base and target (the elements skipped in this way were
+ // present in both sequences)
+ EditPoint ExtendFrom(EditPoint p) const {
+ for (; p.base != base_end_ && p.target != target_end_; ++p.base, ++p.target) {
+ if (!ValuesEqual(p.base, p.target)) {
+ break;
+ }
+ }
+ return p;
+ }
+
+ // beginning of a range for storing per-edit state in endpoint_base_ and insert_
+ int64_t StorageOffset(int64_t edit_count) const {
+ return edit_count * (edit_count + 1) / 2;
+ }
+
+ // given edit_count and index, augment endpoint_base_[index] with the corresponding
+ // position in target (which is only implicitly represented in edit_count, index)
+ EditPoint GetEditPoint(int64_t edit_count, int64_t index) const {
+ DCHECK_GE(index, StorageOffset(edit_count));
+ DCHECK_LT(index, StorageOffset(edit_count + 1));
+ auto insertions_minus_deletions =
+ 2 * (index - StorageOffset(edit_count)) - edit_count;
+ auto maximal_base = endpoint_base_[index];
+ auto maximal_target = std::min(
+ target_begin_ + ((maximal_base - base_begin_) + insertions_minus_deletions),
+ target_end_);
+ return {maximal_base, maximal_target};
+ }
+
+ void Next() {
+ ++edit_count_;
+ // base_begin_ is used as a dummy value here since Iterator may not be default
+ // constructible. The newly allocated range is completely overwritten below.
+ endpoint_base_.resize(StorageOffset(edit_count_ + 1), base_begin_);
+ insert_.resize(StorageOffset(edit_count_ + 1), false);
+
+ auto previous_offset = StorageOffset(edit_count_ - 1);
+ auto current_offset = StorageOffset(edit_count_);
+
+ // try deleting from base first
+ for (int64_t i = 0, i_out = 0; i < edit_count_; ++i, ++i_out) {
+ auto previous_endpoint = GetEditPoint(edit_count_ - 1, i + previous_offset);
+ endpoint_base_[i_out + current_offset] = DeleteOne(previous_endpoint).base;
+ }
+
+ // check if inserting from target could do better
+ for (int64_t i = 0, i_out = 1; i < edit_count_; ++i, ++i_out) {
+ // retrieve the previously computed best endpoint for (edit_count_, i_out)
+ // for comparison with the best endpoint achievable with an insertion
+ auto endpoint_after_deletion = GetEditPoint(edit_count_, i_out + current_offset);
+
+ auto previous_endpoint = GetEditPoint(edit_count_ - 1, i + previous_offset);
+ auto endpoint_after_insertion = InsertOne(previous_endpoint);
+
+ if (endpoint_after_insertion.base - endpoint_after_deletion.base >= 0) {
+ // insertion was more efficient; keep it and mark the insertion in insert_
+ insert_[i_out + current_offset] = true;
+ endpoint_base_[i_out + current_offset] = endpoint_after_insertion.base;
+ }
+ }
+
+ // check for completion
+ EditPoint finish = {base_end_, target_end_};
+ for (int64_t i_out = 0; i_out < edit_count_ + 1; ++i_out) {
+ if (GetEditPoint(edit_count_, i_out + current_offset) == finish) {
+ finish_index_ = i_out + current_offset;
+ return;
+ }
+ }
+ }
+
+ bool Done() { return finish_index_ != -1; }
+
+ Result<std::shared_ptr<StructArray>> GetEdits(MemoryPool* pool) {
+ DCHECK(Done());
+
+ int64_t length = edit_count_ + 1;
+ ARROW_ASSIGN_OR_RAISE(auto insert_buf, AllocateEmptyBitmap(length, pool));
+ ARROW_ASSIGN_OR_RAISE(auto run_length_buf,
+ AllocateBuffer(length * sizeof(int64_t), pool));
+ auto run_length = reinterpret_cast<int64_t*>(run_length_buf->mutable_data());
+
+ auto index = finish_index_;
+ auto endpoint = GetEditPoint(edit_count_, finish_index_);
+
+ for (int64_t i = edit_count_; i > 0; --i) {
+ bool insert = insert_[index];
+ BitUtil::SetBitTo(insert_buf->mutable_data(), i, insert);
+
+ auto insertions_minus_deletions =
+ (endpoint.base - base_begin_) - (endpoint.target - target_begin_);
+ if (insert) {
+ ++insertions_minus_deletions;
+ } else {
+ --insertions_minus_deletions;
+ }
+ index = (i - 1 - insertions_minus_deletions) / 2 + StorageOffset(i - 1);
+
+ // endpoint of previous edit
+ auto previous = GetEditPoint(i - 1, index);
+ run_length[i] = endpoint.base - previous.base - !insert;
+ DCHECK_GE(run_length[i], 0);
+
+ endpoint = previous;
+ }
+ BitUtil::SetBitTo(insert_buf->mutable_data(), 0, false);
+ run_length[0] = endpoint.base - base_begin_;
+
+ return StructArray::Make(
+ {std::make_shared<BooleanArray>(length, std::move(insert_buf)),
+ std::make_shared<Int64Array>(length, std::move(run_length_buf))},
+ {field("insert", boolean()), field("run_length", int64())});
+ }
+
+ Result<std::shared_ptr<StructArray>> Diff() {
+ while (!Done()) {
+ Next();
+ }
+ return GetEdits(pool_);
+ }
+
+ private:
+ const Array& base_;
+ const Array& target_;
+ MemoryPool* pool_;
+ ValueComparator value_comparator_;
+ int64_t finish_index_ = -1;
+ int64_t edit_count_ = 0;
+ int64_t base_begin_, base_end_;
+ int64_t target_begin_, target_end_;
+ // each element of endpoint_base_ is the furthest position in base reachable given an
+ // edit_count and (# insertions) - (# deletions). Each bit of insert_ records whether
+ // the corresponding furthest position was reached via an insertion or a deletion
+ // (followed by a run of shared elements). See StorageOffset for the
+ // layout of these vectors
+ std::vector<int64_t> endpoint_base_;
+ std::vector<bool> insert_;
+};
+
+Result<std::shared_ptr<StructArray>> NullDiff(const Array& base, const Array& target,
+ MemoryPool* pool) {
+ bool insert = base.length() < target.length();
+ auto run_length = std::min(base.length(), target.length());
+ auto edit_count = std::max(base.length(), target.length()) - run_length;
+
+ TypedBufferBuilder<bool> insert_builder(pool);
+ RETURN_NOT_OK(insert_builder.Resize(edit_count + 1));
+ insert_builder.UnsafeAppend(false);
+ TypedBufferBuilder<int64_t> run_length_builder(pool);
+ RETURN_NOT_OK(run_length_builder.Resize(edit_count + 1));
+ run_length_builder.UnsafeAppend(run_length);
+ if (edit_count > 0) {
+ insert_builder.UnsafeAppend(edit_count, insert);
+ run_length_builder.UnsafeAppend(edit_count, 0);
+ }
+
+ std::shared_ptr<Buffer> insert_buf, run_length_buf;
+ RETURN_NOT_OK(insert_builder.Finish(&insert_buf));
+ RETURN_NOT_OK(run_length_builder.Finish(&run_length_buf));
+
+ return StructArray::Make({std::make_shared<BooleanArray>(edit_count + 1, insert_buf),
+ std::make_shared<Int64Array>(edit_count + 1, run_length_buf)},
+ {field("insert", boolean()), field("run_length", int64())});
+}
+
+Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
+ MemoryPool* pool) {
+ if (!base.type()->Equals(target.type())) {
+ return Status::TypeError("only taking the diff of like-typed arrays is supported.");
+ }
+
+ if (base.type()->id() == Type::NA) {
+ return NullDiff(base, target, pool);
+ } else if (base.type()->id() == Type::EXTENSION) {
+ auto base_storage = checked_cast<const ExtensionArray&>(base).storage();
+ auto target_storage = checked_cast<const ExtensionArray&>(target).storage();
+ return Diff(*base_storage, *target_storage, pool);
+ } else if (base.type()->id() == Type::DICTIONARY) {
+ return Status::NotImplemented("diffing arrays of type ", *base.type());
+ } else {
+ return QuadraticSpaceMyersDiff(base, target, pool).Diff();
+ }
+}
+
+using Formatter = std::function<void(const Array&, int64_t index, std::ostream*)>;
+
+static Result<Formatter> MakeFormatter(const DataType& type);
+
+class MakeFormatterImpl {
+ public:
+ Result<Formatter> Make(const DataType& type) && {
+ RETURN_NOT_OK(VisitTypeInline(type, this));
+ return std::move(impl_);
+ }
+
+ private:
+ template <typename VISITOR>
+ friend Status VisitTypeInline(const DataType&, VISITOR*);
+
+ // factory implementation
+ Status Visit(const BooleanType&) {
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ *os << (checked_cast<const BooleanArray&>(array).Value(index) ? "true" : "false");
+ };
+ return Status::OK();
+ }
+
+ // format Numerics with std::ostream defaults
+ template <typename T>
+ enable_if_number<T, Status> Visit(const T&) {
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ const auto& numeric = checked_cast<const NumericArray<T>&>(array);
+ if (sizeof(decltype(numeric.Value(index))) == sizeof(char)) {
+ // override std::ostream defaults for /(u|)int8_t/ since they are
+ // formatted as potentially unprintable/tty borking characters
+ *os << static_cast<int16_t>(numeric.Value(index));
+ } else {
+ *os << numeric.Value(index);
+ }
+ };
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_date<T, Status> Visit(const T&) {
+ using unit = typename std::conditional<std::is_same<T, Date32Type>::value,
+ arrow_vendored::date::days,
+ std::chrono::milliseconds>::type;
+
+ static arrow_vendored::date::sys_days epoch{arrow_vendored::date::jan / 1 / 1970};
+
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ unit value(checked_cast<const NumericArray<T>&>(array).Value(index));
+ *os << arrow_vendored::date::format("%F", value + epoch);
+ };
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_time<T, Status> Visit(const T&) {
+ impl_ = MakeTimeFormatter<T, false>("%T");
+ return Status::OK();
+ }
+
+ Status Visit(const TimestampType&) {
+ impl_ = MakeTimeFormatter<TimestampType, true>("%F %T");
+ return Status::OK();
+ }
+
+ Status Visit(const DayTimeIntervalType&) {
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ auto day_millis = checked_cast<const DayTimeIntervalArray&>(array).Value(index);
+ *os << day_millis.days << "d" << day_millis.milliseconds << "ms";
+ };
+ return Status::OK();
+ }
+
+ // format Binary, LargeBinary and FixedSizeBinary in hexadecimal
+ template <typename T>
+ enable_if_binary_like<T, Status> Visit(const T&) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ *os << HexEncode(checked_cast<const ArrayType&>(array).GetView(index));
+ };
+ return Status::OK();
+ }
+
+ // format Strings with \"\n\r\t\\ escaped
+ template <typename T>
+ enable_if_string_like<T, Status> Visit(const T&) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ *os << "\"" << Escape(checked_cast<const ArrayType&>(array).GetView(index)) << "\"";
+ };
+ return Status::OK();
+ }
+
+ // format Decimals with Decimal128Array::FormatValue
+ Status Visit(const Decimal128Type&) {
+ impl_ = [](const Array& array, int64_t index, std::ostream* os) {
+ *os << checked_cast<const Decimal128Array&>(array).FormatValue(index);
+ };
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_list_like<T, Status> Visit(const T& t) {
+ struct ListImpl {
+ explicit ListImpl(Formatter f) : values_formatter_(std::move(f)) {}
+
+ void operator()(const Array& array, int64_t index, std::ostream* os) {
+ const auto& list_array =
+ checked_cast<const typename TypeTraits<T>::ArrayType&>(array);
+ *os << "[";
+ for (int32_t i = 0; i < list_array.value_length(index); ++i) {
+ if (i != 0) {
+ *os << ", ";
+ }
+ values_formatter_(*list_array.values(), i + list_array.value_offset(index), os);
+ }
+ *os << "]";
+ }
+
+ Formatter values_formatter_;
+ };
+
+ ARROW_ASSIGN_OR_RAISE(auto values_formatter, MakeFormatter(*t.value_type()));
+ impl_ = ListImpl(std::move(values_formatter));
+ return Status::OK();
+ }
+
+ // TODO(bkietz) format maps better
+
+ Status Visit(const StructType& t) {
+ struct StructImpl {
+ explicit StructImpl(std::vector<Formatter> f) : field_formatters_(std::move(f)) {}
+
+ void operator()(const Array& array, int64_t index, std::ostream* os) {
+ const auto& struct_array = checked_cast<const StructArray&>(array);
+ *os << "{";
+ for (int i = 0, printed = 0; i < struct_array.num_fields(); ++i) {
+ if (printed != 0) {
+ *os << ", ";
+ }
+ if (struct_array.field(i)->IsNull(index)) {
+ continue;
+ }
+ ++printed;
+ *os << struct_array.struct_type()->field(i)->name() << ": ";
+ field_formatters_[i](*struct_array.field(i), index, os);
+ }
+ *os << "}";
+ }
+
+ std::vector<Formatter> field_formatters_;
+ };
+
+ std::vector<Formatter> field_formatters(t.num_fields());
+ for (int i = 0; i < t.num_fields(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(field_formatters[i], MakeFormatter(*t.field(i)->type()));
+ }
+
+ impl_ = StructImpl(std::move(field_formatters));
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& t) {
+ struct UnionImpl {
+ explicit UnionImpl(std::vector<Formatter> f) : field_formatters_(std::move(f)) {}
+
+ void DoFormat(const UnionArray& array, int64_t index, int64_t child_index,
+ std::ostream* os) {
+ auto type_code = array.raw_type_codes()[index];
+ auto child = array.field(array.child_id(index));
+
+ *os << "{" << static_cast<int16_t>(type_code) << ": ";
+ if (child->IsNull(child_index)) {
+ *os << "null";
+ } else {
+ field_formatters_[type_code](*child, child_index, os);
+ }
+ *os << "}";
+ }
+
+ std::vector<Formatter> field_formatters_;
+ };
+
+ struct SparseImpl : UnionImpl {
+ using UnionImpl::UnionImpl;
+
+ void operator()(const Array& array, int64_t index, std::ostream* os) {
+ const auto& union_array = checked_cast<const SparseUnionArray&>(array);
+ DoFormat(union_array, index, index, os);
+ }
+ };
+
+ struct DenseImpl : UnionImpl {
+ using UnionImpl::UnionImpl;
+
+ void operator()(const Array& array, int64_t index, std::ostream* os) {
+ const auto& union_array = checked_cast<const DenseUnionArray&>(array);
+ DoFormat(union_array, index, union_array.raw_value_offsets()[index], os);
+ }
+ };
+
+ std::vector<Formatter> field_formatters(t.max_type_code() + 1);
+ for (int i = 0; i < t.num_fields(); ++i) {
+ auto type_id = t.type_codes()[i];
+ ARROW_ASSIGN_OR_RAISE(field_formatters[type_id],
+ MakeFormatter(*t.field(i)->type()));
+ }
+
+ if (t.mode() == UnionMode::SPARSE) {
+ impl_ = SparseImpl(std::move(field_formatters));
+ } else {
+ impl_ = DenseImpl(std::move(field_formatters));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const NullType& t) {
+ return Status::NotImplemented("formatting diffs between arrays of type ", t);
+ }
+
+ Status Visit(const DictionaryType& t) {
+ return Status::NotImplemented("formatting diffs between arrays of type ", t);
+ }
+
+ Status Visit(const ExtensionType& t) {
+ return Status::NotImplemented("formatting diffs between arrays of type ", t);
+ }
+
+ Status Visit(const DurationType& t) {
+ return Status::NotImplemented("formatting diffs between arrays of type ", t);
+ }
+
+ Status Visit(const MonthIntervalType& t) {
+ return Status::NotImplemented("formatting diffs between arrays of type ", t);
+ }
+
+ template <typename T, bool AddEpoch>
+ Formatter MakeTimeFormatter(const std::string& fmt_str) {
+ return [fmt_str](const Array& array, int64_t index, std::ostream* os) {
+ auto fmt = fmt_str.c_str();
+ auto unit = checked_cast<const T&>(*array.type()).unit();
+ auto value = checked_cast<const NumericArray<T>&>(array).Value(index);
+ using arrow_vendored::date::format;
+ using std::chrono::nanoseconds;
+ using std::chrono::microseconds;
+ using std::chrono::milliseconds;
+ using std::chrono::seconds;
+ if (AddEpoch) {
+ static arrow_vendored::date::sys_days epoch{arrow_vendored::date::jan / 1 / 1970};
+
+ switch (unit) {
+ case TimeUnit::NANO:
+ *os << format(fmt, static_cast<nanoseconds>(value) + epoch);
+ break;
+ case TimeUnit::MICRO:
+ *os << format(fmt, static_cast<microseconds>(value) + epoch);
+ break;
+ case TimeUnit::MILLI:
+ *os << format(fmt, static_cast<milliseconds>(value) + epoch);
+ break;
+ case TimeUnit::SECOND:
+ *os << format(fmt, static_cast<seconds>(value) + epoch);
+ break;
+ }
+ return;
+ }
+ switch (unit) {
+ case TimeUnit::NANO:
+ *os << format(fmt, static_cast<nanoseconds>(value));
+ break;
+ case TimeUnit::MICRO:
+ *os << format(fmt, static_cast<microseconds>(value));
+ break;
+ case TimeUnit::MILLI:
+ *os << format(fmt, static_cast<milliseconds>(value));
+ break;
+ case TimeUnit::SECOND:
+ *os << format(fmt, static_cast<seconds>(value));
+ break;
+ }
+ };
+ }
+
+ Formatter impl_;
+};
+
+static Result<Formatter> MakeFormatter(const DataType& type) {
+ return MakeFormatterImpl{}.Make(type);
+}
+
+Status VisitEditScript(
+ const Array& edits,
+ const std::function<Status(int64_t delete_begin, int64_t delete_end,
+ int64_t insert_begin, int64_t insert_end)>& visitor) {
+ static const auto edits_type =
+ struct_({field("insert", boolean()), field("run_length", int64())});
+ DCHECK(edits.type()->Equals(*edits_type));
+ DCHECK_GE(edits.length(), 1);
+
+ auto insert = checked_pointer_cast<BooleanArray>(
+ checked_cast<const StructArray&>(edits).field(0));
+ auto run_lengths =
+ checked_pointer_cast<Int64Array>(checked_cast<const StructArray&>(edits).field(1));
+
+ DCHECK(!insert->Value(0));
+
+ auto length = run_lengths->Value(0);
+ int64_t base_begin, base_end, target_begin, target_end;
+ base_begin = base_end = target_begin = target_end = length;
+ for (int64_t i = 1; i < edits.length(); ++i) {
+ if (insert->Value(i)) {
+ ++target_end;
+ } else {
+ ++base_end;
+ }
+ length = run_lengths->Value(i);
+ if (length != 0) {
+ RETURN_NOT_OK(visitor(base_begin, base_end, target_begin, target_end));
+ base_begin = base_end = base_end + length;
+ target_begin = target_end = target_end + length;
+ }
+ }
+ if (length == 0) {
+ return visitor(base_begin, base_end, target_begin, target_end);
+ }
+ return Status::OK();
+}
+
+class UnifiedDiffFormatter {
+ public:
+ UnifiedDiffFormatter(std::ostream* os, Formatter formatter)
+ : os_(os), formatter_(std::move(formatter)) {}
+
+ Status operator()(int64_t delete_begin, int64_t delete_end, int64_t insert_begin,
+ int64_t insert_end) {
+ *os_ << "@@ -" << delete_begin << ", +" << insert_begin << " @@" << std::endl;
+
+ for (int64_t i = delete_begin; i < delete_end; ++i) {
+ *os_ << "-";
+ if (base_->IsValid(i)) {
+ formatter_(*base_, i, &*os_);
+ } else {
+ *os_ << "null";
+ }
+ *os_ << std::endl;
+ }
+
+ for (int64_t i = insert_begin; i < insert_end; ++i) {
+ *os_ << "+";
+ if (target_->IsValid(i)) {
+ formatter_(*target_, i, &*os_);
+ } else {
+ *os_ << "null";
+ }
+ *os_ << std::endl;
+ }
+
+ return Status::OK();
+ }
+
+ Status operator()(const Array& edits, const Array& base, const Array& target) {
+ if (edits.length() == 1) {
+ return Status::OK();
+ }
+ base_ = &base;
+ target_ = &target;
+ *os_ << std::endl;
+ return VisitEditScript(edits, *this);
+ }
+
+ private:
+ std::ostream* os_ = nullptr;
+ const Array* base_ = nullptr;
+ const Array* target_ = nullptr;
+ Formatter formatter_;
+};
+
+Result<std::function<Status(const Array& edits, const Array& base, const Array& target)>>
+MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os) {
+ if (type.id() == Type::NA) {
+ return [os](const Array& edits, const Array& base, const Array& target) {
+ if (base.length() != target.length()) {
+ *os << "# Null arrays differed" << std::endl
+ << "-" << base.length() << " nulls" << std::endl
+ << "+" << target.length() << " nulls" << std::endl;
+ }
+ return Status::OK();
+ };
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto formatter, MakeFormatter(type));
+ return UnifiedDiffFormatter(os, std::move(formatter));
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.h
index a405164b333..874f7a632b2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/diff.h
@@ -1,76 +1,76 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <functional>
-#include <iosfwd>
-#include <memory>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_nested.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief Compare two arrays, returning an edit script which expresses the difference
-/// between them
-///
-/// An edit script is an array of struct(insert: bool, run_length: int64_t).
-/// Each element of "insert" determines whether an element was inserted into (true)
-/// or deleted from (false) base. Each insertion or deletion is followed by a run of
-/// elements which are unchanged from base to target; the length of this run is stored
-/// in "run_length". (Note that the edit script begins and ends with a run of shared
-/// elements but both fields of the struct must have the same length. To accommodate this
-/// the first element of "insert" should be ignored.)
-///
-/// For example for base "hlloo" and target "hello", the edit script would be
-/// [
-/// {"insert": false, "run_length": 1}, // leading run of length 1 ("h")
-/// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo")
-/// {"insert": false, "run_length": 0} // delete("o") then an empty run
-/// ]
-///
-/// Diffing arrays containing nulls is not currently supported.
-///
-/// \param[in] base baseline for comparison
-/// \param[in] target an array of identical type to base whose elements differ from base's
-/// \param[in] pool memory to store the result will be allocated from this memory pool
-/// \return an edit script array which can be applied to base to produce target
-ARROW_EXPORT
-Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
- MemoryPool* pool = default_memory_pool());
-
-/// \brief visitor interface for easy traversal of an edit script
-///
-/// visitor will be called for each hunk of insertions and deletions.
-ARROW_EXPORT Status VisitEditScript(
- const Array& edits,
- const std::function<Status(int64_t delete_begin, int64_t delete_end,
- int64_t insert_begin, int64_t insert_end)>& visitor);
-
-/// \brief return a function which will format an edit script in unified
-/// diff format to os, given base and target arrays of type
-ARROW_EXPORT Result<
- std::function<Status(const Array& edits, const Array& base, const Array& target)>>
-MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os);
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Compare two arrays, returning an edit script which expresses the difference
+/// between them
+///
+/// An edit script is an array of struct(insert: bool, run_length: int64_t).
+/// Each element of "insert" determines whether an element was inserted into (true)
+/// or deleted from (false) base. Each insertion or deletion is followed by a run of
+/// elements which are unchanged from base to target; the length of this run is stored
+/// in "run_length". (Note that the edit script begins and ends with a run of shared
+/// elements but both fields of the struct must have the same length. To accommodate this
+/// the first element of "insert" should be ignored.)
+///
+/// For example for base "hlloo" and target "hello", the edit script would be
+/// [
+/// {"insert": false, "run_length": 1}, // leading run of length 1 ("h")
+/// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo")
+/// {"insert": false, "run_length": 0} // delete("o") then an empty run
+/// ]
+///
+/// Diffing arrays containing nulls is not currently supported.
+///
+/// \param[in] base baseline for comparison
+/// \param[in] target an array of identical type to base whose elements differ from base's
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return an edit script array which can be applied to base to produce target
+ARROW_EXPORT
+Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
+ MemoryPool* pool = default_memory_pool());
+
+/// \brief visitor interface for easy traversal of an edit script
+///
+/// visitor will be called for each hunk of insertions and deletions.
+ARROW_EXPORT Status VisitEditScript(
+ const Array& edits,
+ const std::function<Status(int64_t delete_begin, int64_t delete_end,
+ int64_t insert_begin, int64_t insert_end)>& visitor);
+
+/// \brief return a function which will format an edit script in unified
+/// diff format to os, given base and target arrays of type
+ARROW_EXPORT Result<
+ std::function<Status(const Array& edits, const Array& base, const Array& target)>>
+MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os);
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
index ed26ecff4e0..fc64e7414bf 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
@@ -1,80 +1,80 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/util.h"
-
-#include <algorithm>
-#include <array>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <memory>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_dict.h"
-#include "arrow/array/array_primitive.h"
-#include "arrow/array/concatenate.h"
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/extension_type.h"
-#include "arrow/scalar.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/util.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/extension_type.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/endian.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-// ----------------------------------------------------------------------
-// Loading from ArrayData
-
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// Loading from ArrayData
+
namespace {
-
-class ArrayDataWrapper {
- public:
- ArrayDataWrapper(const std::shared_ptr<ArrayData>& data, std::shared_ptr<Array>* out)
- : data_(data), out_(out) {}
-
- template <typename T>
- Status Visit(const T&) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- *out_ = std::make_shared<ArrayType>(data_);
- return Status::OK();
- }
-
- Status Visit(const ExtensionType& type) {
- *out_ = type.MakeArray(data_);
- return Status::OK();
- }
-
- const std::shared_ptr<ArrayData>& data_;
- std::shared_ptr<Array>* out_;
-};
-
+
+class ArrayDataWrapper {
+ public:
+ ArrayDataWrapper(const std::shared_ptr<ArrayData>& data, std::shared_ptr<Array>* out)
+ : data_(data), out_(out) {}
+
+ template <typename T>
+ Status Visit(const T&) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ *out_ = std::make_shared<ArrayType>(data_);
+ return Status::OK();
+ }
+
+ Status Visit(const ExtensionType& type) {
+ *out_ = type.MakeArray(data_);
+ return Status::OK();
+ }
+
+ const std::shared_ptr<ArrayData>& data_;
+ std::shared_ptr<Array>* out_;
+};
+
class ArrayDataEndianSwapper {
public:
ArrayDataEndianSwapper(const std::shared_ptr<ArrayData>& data, int64_t length)
@@ -273,249 +273,249 @@ Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
return std::move(swapper.out_);
}
-} // namespace internal
-
-std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
- std::shared_ptr<Array> out;
+} // namespace internal
+
+std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
+ std::shared_ptr<Array> out;
ArrayDataWrapper wrapper_visitor(data, &out);
- DCHECK_OK(VisitTypeInline(*data->type, &wrapper_visitor));
- DCHECK(out);
- return out;
-}
-
-// ----------------------------------------------------------------------
-// Misc APIs
-
+ DCHECK_OK(VisitTypeInline(*data->type, &wrapper_visitor));
+ DCHECK(out);
+ return out;
+}
+
+// ----------------------------------------------------------------------
+// Misc APIs
+
namespace {
-
-// get the maximum buffer length required, then allocate a single zeroed buffer
-// to use anywhere a buffer is required
-class NullArrayFactory {
- public:
- struct GetBufferLength {
- GetBufferLength(const std::shared_ptr<DataType>& type, int64_t length)
- : type_(*type), length_(length), buffer_length_(BitUtil::BytesForBits(length)) {}
-
- Result<int64_t> Finish() && {
- RETURN_NOT_OK(VisitTypeInline(type_, this));
- return buffer_length_;
- }
-
- template <typename T, typename = decltype(TypeTraits<T>::bytes_required(0))>
- Status Visit(const T&) {
- return MaxOf(TypeTraits<T>::bytes_required(length_));
- }
-
- template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T&) {
- // values array may be empty, but there must be at least one offset of 0
- return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
- }
-
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T&) {
- // values buffer may be empty, but there must be at least one offset of 0
- return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
- }
-
- Status Visit(const FixedSizeListType& type) {
- return MaxOf(GetBufferLength(type.value_type(), type.list_size() * length_));
- }
-
- Status Visit(const FixedSizeBinaryType& type) {
- return MaxOf(type.byte_width() * length_);
- }
-
- Status Visit(const StructType& type) {
- for (const auto& child : type.fields()) {
- RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
- }
- return Status::OK();
- }
-
- Status Visit(const UnionType& type) {
- // type codes
- RETURN_NOT_OK(MaxOf(length_));
- if (type.mode() == UnionMode::DENSE) {
- // offsets
- RETURN_NOT_OK(MaxOf(sizeof(int32_t) * length_));
- }
- for (const auto& child : type.fields()) {
- RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
- }
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& type) {
- RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
- return MaxOf(GetBufferLength(type.index_type(), length_));
- }
-
- Status Visit(const ExtensionType& type) {
- // XXX is an extension array's length always == storage length
- return MaxOf(GetBufferLength(type.storage_type(), length_));
- }
-
- Status Visit(const DataType& type) {
- return Status::NotImplemented("construction of all-null ", type);
- }
-
- private:
- Status MaxOf(GetBufferLength&& other) {
- ARROW_ASSIGN_OR_RAISE(int64_t buffer_length, std::move(other).Finish());
- return MaxOf(buffer_length);
- }
-
- Status MaxOf(int64_t buffer_length) {
- if (buffer_length > buffer_length_) {
- buffer_length_ = buffer_length;
- }
- return Status::OK();
- }
-
- const DataType& type_;
- int64_t length_, buffer_length_;
- };
-
- NullArrayFactory(MemoryPool* pool, const std::shared_ptr<DataType>& type,
- int64_t length)
- : pool_(pool), type_(type), length_(length) {}
-
- Status CreateBuffer() {
- ARROW_ASSIGN_OR_RAISE(int64_t buffer_length,
- GetBufferLength(type_, length_).Finish());
- ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(buffer_length, pool_));
- std::memset(buffer_->mutable_data(), 0, buffer_->size());
- return Status::OK();
- }
-
- Result<std::shared_ptr<ArrayData>> Create() {
- if (buffer_ == nullptr) {
- RETURN_NOT_OK(CreateBuffer());
- }
- std::vector<std::shared_ptr<ArrayData>> child_data(type_->num_fields());
- out_ = ArrayData::Make(type_, length_, {buffer_}, child_data, length_, 0);
- RETURN_NOT_OK(VisitTypeInline(*type_, this));
- return out_;
- }
-
- Status Visit(const NullType&) {
- out_->buffers.resize(1, nullptr);
- return Status::OK();
- }
-
- Status Visit(const FixedWidthType&) {
- out_->buffers.resize(2, buffer_);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T&) {
- out_->buffers.resize(3, buffer_);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T& type) {
- out_->buffers.resize(2, buffer_);
- ARROW_ASSIGN_OR_RAISE(out_->child_data[0], CreateChild(0, /*length=*/0));
- return Status::OK();
- }
-
- Status Visit(const FixedSizeListType& type) {
- ARROW_ASSIGN_OR_RAISE(out_->child_data[0],
- CreateChild(0, length_ * type.list_size()));
- return Status::OK();
- }
-
- Status Visit(const StructType& type) {
- for (int i = 0; i < type_->num_fields(); ++i) {
- ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, length_));
- }
- return Status::OK();
- }
-
- Status Visit(const UnionType& type) {
- out_->buffers.resize(2);
-
- // First buffer is always null
- out_->buffers[0] = nullptr;
-
- // Type codes are all zero, so we can use buffer_ which has had it's memory
- // zeroed
- out_->buffers[1] = buffer_;
-
- // For sparse unions, we now create children with the same length as the
- // parent
- int64_t child_length = length_;
- if (type.mode() == UnionMode::DENSE) {
- // For dense unions, we set the offsets to all zero and create children
- // with length 1
- out_->buffers.resize(3);
- out_->buffers[2] = buffer_;
-
- child_length = 1;
- }
- for (int i = 0; i < type_->num_fields(); ++i) {
- ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, child_length));
- }
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& type) {
- out_->buffers.resize(2, buffer_);
- ARROW_ASSIGN_OR_RAISE(auto typed_null_dict, MakeArrayOfNull(type.value_type(), 0));
- out_->dictionary = typed_null_dict->data();
- return Status::OK();
- }
-
- Status Visit(const DataType& type) {
- return Status::NotImplemented("construction of all-null ", type);
- }
-
- Result<std::shared_ptr<ArrayData>> CreateChild(int i, int64_t length) {
- NullArrayFactory child_factory(pool_, type_->field(i)->type(), length);
- child_factory.buffer_ = buffer_;
- return child_factory.Create();
- }
-
- MemoryPool* pool_;
- std::shared_ptr<DataType> type_;
- int64_t length_;
- std::shared_ptr<ArrayData> out_;
- std::shared_ptr<Buffer> buffer_;
-};
-
-class RepeatedArrayFactory {
- public:
- RepeatedArrayFactory(MemoryPool* pool, const Scalar& scalar, int64_t length)
- : pool_(pool), scalar_(scalar), length_(length) {}
-
- Result<std::shared_ptr<Array>> Create() {
- RETURN_NOT_OK(VisitTypeInline(*scalar_.type, this));
- return out_;
- }
-
+
+// get the maximum buffer length required, then allocate a single zeroed buffer
+// to use anywhere a buffer is required
+class NullArrayFactory {
+ public:
+ struct GetBufferLength {
+ GetBufferLength(const std::shared_ptr<DataType>& type, int64_t length)
+ : type_(*type), length_(length), buffer_length_(BitUtil::BytesForBits(length)) {}
+
+ Result<int64_t> Finish() && {
+ RETURN_NOT_OK(VisitTypeInline(type_, this));
+ return buffer_length_;
+ }
+
+ template <typename T, typename = decltype(TypeTraits<T>::bytes_required(0))>
+ Status Visit(const T&) {
+ return MaxOf(TypeTraits<T>::bytes_required(length_));
+ }
+
+ template <typename T>
+ enable_if_var_size_list<T, Status> Visit(const T&) {
+ // values array may be empty, but there must be at least one offset of 0
+ return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
+ }
+
+ template <typename T>
+ enable_if_base_binary<T, Status> Visit(const T&) {
+ // values buffer may be empty, but there must be at least one offset of 0
+ return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
+ }
+
+ Status Visit(const FixedSizeListType& type) {
+ return MaxOf(GetBufferLength(type.value_type(), type.list_size() * length_));
+ }
+
+ Status Visit(const FixedSizeBinaryType& type) {
+ return MaxOf(type.byte_width() * length_);
+ }
+
+ Status Visit(const StructType& type) {
+ for (const auto& child : type.fields()) {
+ RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& type) {
+ // type codes
+ RETURN_NOT_OK(MaxOf(length_));
+ if (type.mode() == UnionMode::DENSE) {
+ // offsets
+ RETURN_NOT_OK(MaxOf(sizeof(int32_t) * length_));
+ }
+ for (const auto& child : type.fields()) {
+ RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& type) {
+ RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
+ return MaxOf(GetBufferLength(type.index_type(), length_));
+ }
+
+ Status Visit(const ExtensionType& type) {
+ // XXX is an extension array's length always == storage length
+ return MaxOf(GetBufferLength(type.storage_type(), length_));
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("construction of all-null ", type);
+ }
+
+ private:
+ Status MaxOf(GetBufferLength&& other) {
+ ARROW_ASSIGN_OR_RAISE(int64_t buffer_length, std::move(other).Finish());
+ return MaxOf(buffer_length);
+ }
+
+ Status MaxOf(int64_t buffer_length) {
+ if (buffer_length > buffer_length_) {
+ buffer_length_ = buffer_length;
+ }
+ return Status::OK();
+ }
+
+ const DataType& type_;
+ int64_t length_, buffer_length_;
+ };
+
+ NullArrayFactory(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+ int64_t length)
+ : pool_(pool), type_(type), length_(length) {}
+
+ Status CreateBuffer() {
+ ARROW_ASSIGN_OR_RAISE(int64_t buffer_length,
+ GetBufferLength(type_, length_).Finish());
+ ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(buffer_length, pool_));
+ std::memset(buffer_->mutable_data(), 0, buffer_->size());
+ return Status::OK();
+ }
+
+ Result<std::shared_ptr<ArrayData>> Create() {
+ if (buffer_ == nullptr) {
+ RETURN_NOT_OK(CreateBuffer());
+ }
+ std::vector<std::shared_ptr<ArrayData>> child_data(type_->num_fields());
+ out_ = ArrayData::Make(type_, length_, {buffer_}, child_data, length_, 0);
+ RETURN_NOT_OK(VisitTypeInline(*type_, this));
+ return out_;
+ }
+
+ Status Visit(const NullType&) {
+ out_->buffers.resize(1, nullptr);
+ return Status::OK();
+ }
+
+ Status Visit(const FixedWidthType&) {
+ out_->buffers.resize(2, buffer_);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_base_binary<T, Status> Visit(const T&) {
+ out_->buffers.resize(3, buffer_);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_var_size_list<T, Status> Visit(const T& type) {
+ out_->buffers.resize(2, buffer_);
+ ARROW_ASSIGN_OR_RAISE(out_->child_data[0], CreateChild(0, /*length=*/0));
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeListType& type) {
+ ARROW_ASSIGN_OR_RAISE(out_->child_data[0],
+ CreateChild(0, length_ * type.list_size()));
+ return Status::OK();
+ }
+
+ Status Visit(const StructType& type) {
+ for (int i = 0; i < type_->num_fields(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, length_));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& type) {
+ out_->buffers.resize(2);
+
+ // First buffer is always null
+ out_->buffers[0] = nullptr;
+
+ // Type codes are all zero, so we can use buffer_ which has had it's memory
+ // zeroed
+ out_->buffers[1] = buffer_;
+
+ // For sparse unions, we now create children with the same length as the
+ // parent
+ int64_t child_length = length_;
+ if (type.mode() == UnionMode::DENSE) {
+ // For dense unions, we set the offsets to all zero and create children
+ // with length 1
+ out_->buffers.resize(3);
+ out_->buffers[2] = buffer_;
+
+ child_length = 1;
+ }
+ for (int i = 0; i < type_->num_fields(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, child_length));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& type) {
+ out_->buffers.resize(2, buffer_);
+ ARROW_ASSIGN_OR_RAISE(auto typed_null_dict, MakeArrayOfNull(type.value_type(), 0));
+ out_->dictionary = typed_null_dict->data();
+ return Status::OK();
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("construction of all-null ", type);
+ }
+
+ Result<std::shared_ptr<ArrayData>> CreateChild(int i, int64_t length) {
+ NullArrayFactory child_factory(pool_, type_->field(i)->type(), length);
+ child_factory.buffer_ = buffer_;
+ return child_factory.Create();
+ }
+
+ MemoryPool* pool_;
+ std::shared_ptr<DataType> type_;
+ int64_t length_;
+ std::shared_ptr<ArrayData> out_;
+ std::shared_ptr<Buffer> buffer_;
+};
+
+class RepeatedArrayFactory {
+ public:
+ RepeatedArrayFactory(MemoryPool* pool, const Scalar& scalar, int64_t length)
+ : pool_(pool), scalar_(scalar), length_(length) {}
+
+ Result<std::shared_ptr<Array>> Create() {
+ RETURN_NOT_OK(VisitTypeInline(*scalar_.type, this));
+ return out_;
+ }
+
Status Visit(const NullType& type) {
DCHECK(false); // already forwarded to MakeArrayOfNull
return Status::OK();
- }
-
- Status Visit(const BooleanType&) {
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length_, pool_));
- BitUtil::SetBitsTo(buffer->mutable_data(), 0, length_,
- checked_cast<const BooleanScalar&>(scalar_).value);
- out_ = std::make_shared<BooleanArray>(length_, buffer);
- return Status::OK();
- }
-
- template <typename T>
+ }
+
+ Status Visit(const BooleanType&) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBitmap(length_, pool_));
+ BitUtil::SetBitsTo(buffer->mutable_data(), 0, length_,
+ checked_cast<const BooleanScalar&>(scalar_).value);
+ out_ = std::make_shared<BooleanArray>(length_, buffer);
+ return Status::OK();
+ }
+
+ template <typename T>
enable_if_t<is_number_type<T>::value || is_temporal_type<T>::value, Status> Visit(
const T&) {
- auto value = checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
- return FinishFixedWidth(&value, sizeof(value));
- }
-
+ auto value = checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
+ return FinishFixedWidth(&value, sizeof(value));
+ }
+
Status Visit(const FixedSizeBinaryType& type) {
auto value = checked_cast<const FixedSizeBinaryScalar&>(scalar_).value;
return FinishFixedWidth(value->data(), type.byte_width());
@@ -525,94 +525,94 @@ class RepeatedArrayFactory {
enable_if_decimal<T, Status> Visit(const T&) {
using ScalarType = typename TypeTraits<T>::ScalarType;
auto value = checked_cast<const ScalarType&>(scalar_).value.ToBytes();
- return FinishFixedWidth(value.data(), value.size());
- }
-
+ return FinishFixedWidth(value.data(), value.size());
+ }
+
Status Visit(const Decimal256Type&) {
auto value = checked_cast<const Decimal256Scalar&>(scalar_).value.ToBytes();
return FinishFixedWidth(value.data(), value.size());
}
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T&) {
- std::shared_ptr<Buffer> value =
- checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
- std::shared_ptr<Buffer> values_buffer, offsets_buffer;
- RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(), &values_buffer));
- auto size = static_cast<typename T::offset_type>(value->size());
- RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
- out_ = std::make_shared<typename TypeTraits<T>::ArrayType>(length_, offsets_buffer,
- values_buffer);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T& type) {
- using ScalarType = typename TypeTraits<T>::ScalarType;
- using ArrayType = typename TypeTraits<T>::ArrayType;
-
- auto value = checked_cast<const ScalarType&>(scalar_).value;
-
- ArrayVector values(length_, value);
- ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
-
- std::shared_ptr<Buffer> offsets_buffer;
- auto size = static_cast<typename T::offset_type>(value->length());
- RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
-
- out_ =
- std::make_shared<ArrayType>(scalar_.type, length_, offsets_buffer, value_array);
- return Status::OK();
- }
-
- Status Visit(const FixedSizeListType& type) {
- auto value = checked_cast<const FixedSizeListScalar&>(scalar_).value;
-
- ArrayVector values(length_, value);
- ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
-
- out_ = std::make_shared<FixedSizeListArray>(scalar_.type, length_, value_array);
- return Status::OK();
- }
-
- Status Visit(const MapType& type) {
- auto map_scalar = checked_cast<const MapScalar&>(scalar_);
- auto struct_array = checked_cast<const StructArray*>(map_scalar.value.get());
-
- ArrayVector keys(length_, struct_array->field(0));
- ArrayVector values(length_, struct_array->field(1));
-
- ARROW_ASSIGN_OR_RAISE(auto key_array, Concatenate(keys, pool_));
- ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
-
- std::shared_ptr<Buffer> offsets_buffer;
- auto size = static_cast<typename MapType::offset_type>(struct_array->length());
- RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
-
- out_ = std::make_shared<MapArray>(scalar_.type, length_, std::move(offsets_buffer),
- std::move(key_array), std::move(value_array));
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& type) {
- const auto& value = checked_cast<const DictionaryScalar&>(scalar_).value;
- ARROW_ASSIGN_OR_RAISE(auto indices,
- MakeArrayFromScalar(*value.index, length_, pool_));
- out_ = std::make_shared<DictionaryArray>(scalar_.type, std::move(indices),
- value.dictionary);
- return Status::OK();
- }
-
- Status Visit(const StructType& type) {
- ArrayVector fields;
- for (const auto& value : checked_cast<const StructScalar&>(scalar_).value) {
- fields.emplace_back();
- ARROW_ASSIGN_OR_RAISE(fields.back(), MakeArrayFromScalar(*value, length_, pool_));
- }
- out_ = std::make_shared<StructArray>(scalar_.type, length_, std::move(fields));
- return Status::OK();
- }
-
+ template <typename T>
+ enable_if_base_binary<T, Status> Visit(const T&) {
+ std::shared_ptr<Buffer> value =
+ checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
+ std::shared_ptr<Buffer> values_buffer, offsets_buffer;
+ RETURN_NOT_OK(CreateBufferOf(value->data(), value->size(), &values_buffer));
+ auto size = static_cast<typename T::offset_type>(value->size());
+ RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
+ out_ = std::make_shared<typename TypeTraits<T>::ArrayType>(length_, offsets_buffer,
+ values_buffer);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_var_size_list<T, Status> Visit(const T& type) {
+ using ScalarType = typename TypeTraits<T>::ScalarType;
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+
+ auto value = checked_cast<const ScalarType&>(scalar_).value;
+
+ ArrayVector values(length_, value);
+ ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
+
+ std::shared_ptr<Buffer> offsets_buffer;
+ auto size = static_cast<typename T::offset_type>(value->length());
+ RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
+
+ out_ =
+ std::make_shared<ArrayType>(scalar_.type, length_, offsets_buffer, value_array);
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeListType& type) {
+ auto value = checked_cast<const FixedSizeListScalar&>(scalar_).value;
+
+ ArrayVector values(length_, value);
+ ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
+
+ out_ = std::make_shared<FixedSizeListArray>(scalar_.type, length_, value_array);
+ return Status::OK();
+ }
+
+ Status Visit(const MapType& type) {
+ auto map_scalar = checked_cast<const MapScalar&>(scalar_);
+ auto struct_array = checked_cast<const StructArray*>(map_scalar.value.get());
+
+ ArrayVector keys(length_, struct_array->field(0));
+ ArrayVector values(length_, struct_array->field(1));
+
+ ARROW_ASSIGN_OR_RAISE(auto key_array, Concatenate(keys, pool_));
+ ARROW_ASSIGN_OR_RAISE(auto value_array, Concatenate(values, pool_));
+
+ std::shared_ptr<Buffer> offsets_buffer;
+ auto size = static_cast<typename MapType::offset_type>(struct_array->length());
+ RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
+
+ out_ = std::make_shared<MapArray>(scalar_.type, length_, std::move(offsets_buffer),
+ std::move(key_array), std::move(value_array));
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& type) {
+ const auto& value = checked_cast<const DictionaryScalar&>(scalar_).value;
+ ARROW_ASSIGN_OR_RAISE(auto indices,
+ MakeArrayFromScalar(*value.index, length_, pool_));
+ out_ = std::make_shared<DictionaryArray>(scalar_.type, std::move(indices),
+ value.dictionary);
+ return Status::OK();
+ }
+
+ Status Visit(const StructType& type) {
+ ArrayVector fields;
+ for (const auto& value : checked_cast<const StructScalar&>(scalar_).value) {
+ fields.emplace_back();
+ ARROW_ASSIGN_OR_RAISE(fields.back(), MakeArrayFromScalar(*value, length_, pool_));
+ }
+ out_ = std::make_shared<StructArray>(scalar_.type, length_, std::move(fields));
+ return Status::OK();
+ }
+
Status Visit(const ExtensionType& type) {
return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
}
@@ -625,130 +625,130 @@ class RepeatedArrayFactory {
return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
}
- template <typename OffsetType>
- Status CreateOffsetsBuffer(OffsetType value_length, std::shared_ptr<Buffer>* out) {
- TypedBufferBuilder<OffsetType> builder(pool_);
- RETURN_NOT_OK(builder.Resize(length_ + 1));
- OffsetType offset = 0;
- for (int64_t i = 0; i < length_ + 1; ++i, offset += value_length) {
- builder.UnsafeAppend(offset);
- }
- return builder.Finish(out);
- }
-
- Status CreateBufferOf(const void* data, size_t data_length,
- std::shared_ptr<Buffer>* out) {
- BufferBuilder builder(pool_);
- RETURN_NOT_OK(builder.Resize(length_ * data_length));
- for (int64_t i = 0; i < length_; ++i) {
- builder.UnsafeAppend(data, data_length);
- }
- return builder.Finish(out);
- }
-
- Status FinishFixedWidth(const void* data, size_t data_length) {
- std::shared_ptr<Buffer> buffer;
- RETURN_NOT_OK(CreateBufferOf(data, data_length, &buffer));
- out_ = MakeArray(
- ArrayData::Make(scalar_.type, length_, {nullptr, std::move(buffer)}, 0));
- return Status::OK();
- }
-
- MemoryPool* pool_;
- const Scalar& scalar_;
- int64_t length_;
- std::shared_ptr<Array> out_;
-};
-
+ template <typename OffsetType>
+ Status CreateOffsetsBuffer(OffsetType value_length, std::shared_ptr<Buffer>* out) {
+ TypedBufferBuilder<OffsetType> builder(pool_);
+ RETURN_NOT_OK(builder.Resize(length_ + 1));
+ OffsetType offset = 0;
+ for (int64_t i = 0; i < length_ + 1; ++i, offset += value_length) {
+ builder.UnsafeAppend(offset);
+ }
+ return builder.Finish(out);
+ }
+
+ Status CreateBufferOf(const void* data, size_t data_length,
+ std::shared_ptr<Buffer>* out) {
+ BufferBuilder builder(pool_);
+ RETURN_NOT_OK(builder.Resize(length_ * data_length));
+ for (int64_t i = 0; i < length_; ++i) {
+ builder.UnsafeAppend(data, data_length);
+ }
+ return builder.Finish(out);
+ }
+
+ Status FinishFixedWidth(const void* data, size_t data_length) {
+ std::shared_ptr<Buffer> buffer;
+ RETURN_NOT_OK(CreateBufferOf(data, data_length, &buffer));
+ out_ = MakeArray(
+ ArrayData::Make(scalar_.type, length_, {nullptr, std::move(buffer)}, 0));
+ return Status::OK();
+ }
+
+ MemoryPool* pool_;
+ const Scalar& scalar_;
+ int64_t length_;
+ std::shared_ptr<Array> out_;
+};
+
} // namespace
-
-Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
- int64_t length, MemoryPool* pool) {
+
+Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
+ int64_t length, MemoryPool* pool) {
ARROW_ASSIGN_OR_RAISE(auto data, NullArrayFactory(pool, type, length).Create());
- return MakeArray(data);
-}
-
-Result<std::shared_ptr<Array>> MakeArrayFromScalar(const Scalar& scalar, int64_t length,
- MemoryPool* pool) {
- if (!scalar.is_valid) {
- return MakeArrayOfNull(scalar.type, length, pool);
- }
+ return MakeArray(data);
+}
+
+Result<std::shared_ptr<Array>> MakeArrayFromScalar(const Scalar& scalar, int64_t length,
+ MemoryPool* pool) {
+ if (!scalar.is_valid) {
+ return MakeArrayOfNull(scalar.type, length, pool);
+ }
return RepeatedArrayFactory(pool, scalar, length).Create();
-}
-
-namespace internal {
-
-std::vector<ArrayVector> RechunkArraysConsistently(
- const std::vector<ArrayVector>& groups) {
- if (groups.size() <= 1) {
- return groups;
- }
- int64_t total_length = 0;
- for (const auto& array : groups.front()) {
- total_length += array->length();
- }
-#ifndef NDEBUG
- for (const auto& group : groups) {
- int64_t group_length = 0;
- for (const auto& array : group) {
- group_length += array->length();
- }
- DCHECK_EQ(group_length, total_length)
- << "Array groups should have the same total number of elements";
- }
-#endif
- if (total_length == 0) {
- return groups;
- }
-
- // Set up result vectors
- std::vector<ArrayVector> rechunked_groups(groups.size());
-
- // Set up progress counters
- std::vector<ArrayVector::const_iterator> current_arrays;
- std::vector<int64_t> array_offsets;
- for (const auto& group : groups) {
- current_arrays.emplace_back(group.cbegin());
- array_offsets.emplace_back(0);
- }
-
- // Scan all array vectors at once, rechunking along the way
- int64_t start = 0;
- while (start < total_length) {
- // First compute max possible length for next chunk
- int64_t chunk_length = std::numeric_limits<int64_t>::max();
- for (size_t i = 0; i < groups.size(); i++) {
- auto& arr_it = current_arrays[i];
- auto& offset = array_offsets[i];
- // Skip any done arrays (including 0-length arrays)
- while (offset == (*arr_it)->length()) {
- ++arr_it;
- offset = 0;
- }
- const auto& array = *arr_it;
- DCHECK_GT(array->length(), offset);
- chunk_length = std::min(chunk_length, array->length() - offset);
- }
- DCHECK_GT(chunk_length, 0);
-
- // Then slice all arrays along this chunk size
- for (size_t i = 0; i < groups.size(); i++) {
- const auto& array = *current_arrays[i];
- auto& offset = array_offsets[i];
- if (offset == 0 && array->length() == chunk_length) {
- // Slice spans entire array
- rechunked_groups[i].emplace_back(array);
- } else {
- DCHECK_LT(chunk_length - offset, array->length());
- rechunked_groups[i].emplace_back(array->Slice(offset, chunk_length));
- }
- offset += chunk_length;
- }
- start += chunk_length;
- }
-
- return rechunked_groups;
-}
-
-} // namespace internal
-} // namespace arrow
+}
+
+namespace internal {
+
+std::vector<ArrayVector> RechunkArraysConsistently(
+ const std::vector<ArrayVector>& groups) {
+ if (groups.size() <= 1) {
+ return groups;
+ }
+ int64_t total_length = 0;
+ for (const auto& array : groups.front()) {
+ total_length += array->length();
+ }
+#ifndef NDEBUG
+ for (const auto& group : groups) {
+ int64_t group_length = 0;
+ for (const auto& array : group) {
+ group_length += array->length();
+ }
+ DCHECK_EQ(group_length, total_length)
+ << "Array groups should have the same total number of elements";
+ }
+#endif
+ if (total_length == 0) {
+ return groups;
+ }
+
+ // Set up result vectors
+ std::vector<ArrayVector> rechunked_groups(groups.size());
+
+ // Set up progress counters
+ std::vector<ArrayVector::const_iterator> current_arrays;
+ std::vector<int64_t> array_offsets;
+ for (const auto& group : groups) {
+ current_arrays.emplace_back(group.cbegin());
+ array_offsets.emplace_back(0);
+ }
+
+ // Scan all array vectors at once, rechunking along the way
+ int64_t start = 0;
+ while (start < total_length) {
+ // First compute max possible length for next chunk
+ int64_t chunk_length = std::numeric_limits<int64_t>::max();
+ for (size_t i = 0; i < groups.size(); i++) {
+ auto& arr_it = current_arrays[i];
+ auto& offset = array_offsets[i];
+ // Skip any done arrays (including 0-length arrays)
+ while (offset == (*arr_it)->length()) {
+ ++arr_it;
+ offset = 0;
+ }
+ const auto& array = *arr_it;
+ DCHECK_GT(array->length(), offset);
+ chunk_length = std::min(chunk_length, array->length() - offset);
+ }
+ DCHECK_GT(chunk_length, 0);
+
+ // Then slice all arrays along this chunk size
+ for (size_t i = 0; i < groups.size(); i++) {
+ const auto& array = *current_arrays[i];
+ auto& offset = array_offsets[i];
+ if (offset == 0 && array->length() == chunk_length) {
+ // Slice spans entire array
+ rechunked_groups[i].emplace_back(array);
+ } else {
+ DCHECK_LT(chunk_length - offset, array->length());
+ rechunked_groups[i].emplace_back(array->Slice(offset, chunk_length));
+ }
+ offset += chunk_length;
+ }
+ start += chunk_length;
+ }
+
+ return rechunked_groups;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
index 3ef4e08828f..40c522df870 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
@@ -1,61 +1,61 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/array/data.h"
-#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief Create a strongly-typed Array instance from generic ArrayData
-/// \param[in] data the array contents
-/// \return the resulting Array instance
-ARROW_EXPORT
-std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data);
-
-/// \brief Create a strongly-typed Array instance with all elements null
-/// \param[in] type the array type
-/// \param[in] length the array length
-/// \param[in] pool the memory pool to allocate memory from
-ARROW_EXPORT
-Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
- int64_t length,
- MemoryPool* pool = default_memory_pool());
-
-/// \brief Create an Array instance whose slots are the given scalar
-/// \param[in] scalar the value with which to fill the array
-/// \param[in] length the array length
-/// \param[in] pool the memory pool to allocate memory from
-ARROW_EXPORT
-Result<std::shared_ptr<Array>> MakeArrayFromScalar(
- const Scalar& scalar, int64_t length, MemoryPool* pool = default_memory_pool());
-
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Create a strongly-typed Array instance from generic ArrayData
+/// \param[in] data the array contents
+/// \return the resulting Array instance
+ARROW_EXPORT
+std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data);
+
+/// \brief Create a strongly-typed Array instance with all elements null
+/// \param[in] type the array type
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
+ int64_t length,
+ MemoryPool* pool = default_memory_pool());
+
+/// \brief Create an Array instance whose slots are the given scalar
+/// \param[in] scalar the value with which to fill the array
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayFromScalar(
+ const Scalar& scalar, int64_t length, MemoryPool* pool = default_memory_pool());
+
+namespace internal {
+
/// \brief Swap endian of each element in a generic ArrayData
///
/// As dictionaries are often shared between different arrays, dictionaries
@@ -67,12 +67,12 @@ ARROW_EXPORT
Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
const std::shared_ptr<ArrayData>& data);
-/// Given a number of ArrayVectors, treat each ArrayVector as the
-/// chunks of a chunked array. Then rechunk each ArrayVector such that
-/// all ArrayVectors are chunked identically. It is mandatory that
-/// all ArrayVectors contain the same total number of elements.
-ARROW_EXPORT
-std::vector<ArrayVector> RechunkArraysConsistently(const std::vector<ArrayVector>&);
-
-} // namespace internal
-} // namespace arrow
+/// Given a number of ArrayVectors, treat each ArrayVector as the
+/// chunks of a chunked array. Then rechunk each ArrayVector such that
+/// all ArrayVectors are chunked identically. It is mandatory that
+/// all ArrayVectors contain the same total number of elements.
+ARROW_EXPORT
+std::vector<ArrayVector> RechunkArraysConsistently(const std::vector<ArrayVector>&);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
index 5cc3bacf282..f762e502c16 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
@@ -1,47 +1,47 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/array/validate.h"
-
-#include <vector>
-
-#include "arrow/array.h" // IWYU pragma: keep
-#include "arrow/extension_type.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/validate.h"
+
+#include <vector>
+
+#include "arrow/array.h" // IWYU pragma: keep
+#include "arrow/extension_type.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
#include "arrow/util/utf8.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-namespace internal {
-
-///////////////////////////////////////////////////////////////////////////
-// ValidateArray: cheap validation checks
-
-namespace {
-
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////////
+// ValidateArray: cheap validation checks
+
+namespace {
+
struct ValidateArrayImpl {
const ArrayData& data;
-
+
Status Validate() { return ValidateWithType(*data.type); }
Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); }
@@ -49,31 +49,31 @@ struct ValidateArrayImpl {
Status Visit(const NullType&) {
if (data.null_count != data.length) {
return Status::Invalid("Null array null_count unequal to its length");
- }
- return Status::OK();
- }
-
+ }
+ return Status::OK();
+ }
+
Status Visit(const FixedWidthType&) {
if (data.length > 0) {
if (!IsBufferValid(1)) {
return Status::Invalid("Missing values buffer in non-empty array");
}
- }
- return Status::OK();
- }
-
+ }
+ return Status::OK();
+ }
+
Status Visit(const StringType& type) { return ValidateBinaryLike(type); }
-
+
Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }
-
+
Status Visit(const LargeStringType& type) { return ValidateBinaryLike(type); }
-
+
Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
-
+
Status Visit(const ListType& type) { return ValidateListLike(type); }
-
+
Status Visit(const LargeListType& type) { return ValidateListLike(type); }
-
+
Status Visit(const MapType& type) { return ValidateListLike(type); }
Status Visit(const FixedSizeListType& type) {
@@ -81,52 +81,52 @@ struct ValidateArrayImpl {
const int64_t list_size = type.list_size();
if (list_size < 0) {
return Status::Invalid("Fixed size list has negative list size");
- }
-
- int64_t expected_values_length = -1;
+ }
+
+ int64_t expected_values_length = -1;
if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) ||
values.length != expected_values_length) {
return Status::Invalid("Values length (", values.length,
") is not equal to the length (", data.length,
") multiplied by the value size (", list_size, ")");
- }
-
+ }
+
const Status child_valid = ValidateArray(values);
if (!child_valid.ok()) {
return Status::Invalid("Fixed size list child array invalid: ",
child_valid.ToString());
}
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
Status Visit(const StructType& type) {
for (int i = 0; i < type.num_fields(); ++i) {
const auto& field_data = *data.child_data[i];
-
+
// Validate child first, to catch nonsensical length / offset etc.
const Status field_valid = ValidateArray(field_data);
if (!field_valid.ok()) {
- return Status::Invalid("Struct child array #", i,
+ return Status::Invalid("Struct child array #", i,
" invalid: ", field_valid.ToString());
- }
-
+ }
+
if (field_data.length < data.length + data.offset) {
- return Status::Invalid("Struct child array #", i,
+ return Status::Invalid("Struct child array #", i,
" has length smaller than expected for struct array (",
field_data.length, " < ", data.length + data.offset, ")");
- }
-
+ }
+
const auto& field_type = type.field(i)->type();
if (!field_data.type->Equals(*field_type)) {
return Status::Invalid("Struct child array #", i, " does not match type field: ",
field_data.type->ToString(), " vs ",
field_type->ToString());
- }
- }
- return Status::OK();
- }
-
+ }
+ }
+ return Status::OK();
+ }
+
Status Visit(const UnionType& type) {
for (int i = 0; i < type.num_fields(); ++i) {
const auto& field_data = *data.child_data[i];
@@ -136,61 +136,61 @@ struct ValidateArrayImpl {
if (!field_valid.ok()) {
return Status::Invalid("Union child array #", i,
" invalid: ", field_valid.ToString());
- }
-
+ }
+
if (type.mode() == UnionMode::SPARSE &&
field_data.length < data.length + data.offset) {
- return Status::Invalid("Sparse union child array #", i,
+ return Status::Invalid("Sparse union child array #", i,
" has length smaller than expected for union array (",
field_data.length, " < ", data.length + data.offset, ")");
- }
-
+ }
+
const auto& field_type = type.field(i)->type();
if (!field_data.type->Equals(*field_type)) {
return Status::Invalid("Union child array #", i, " does not match type field: ",
field_data.type->ToString(), " vs ",
field_type->ToString());
- }
- }
- return Status::OK();
- }
-
+ }
+ }
+ return Status::OK();
+ }
+
Status Visit(const DictionaryType& type) {
Type::type index_type_id = type.index_type()->id();
- if (!is_integer(index_type_id)) {
- return Status::Invalid("Dictionary indices must be integer type");
- }
+ if (!is_integer(index_type_id)) {
+ return Status::Invalid("Dictionary indices must be integer type");
+ }
if (!data.dictionary) {
- return Status::Invalid("Dictionary values must be non-null");
- }
+ return Status::Invalid("Dictionary values must be non-null");
+ }
const Status dict_valid = ValidateArray(*data.dictionary);
- if (!dict_valid.ok()) {
- return Status::Invalid("Dictionary array invalid: ", dict_valid.ToString());
- }
+ if (!dict_valid.ok()) {
+ return Status::Invalid("Dictionary array invalid: ", dict_valid.ToString());
+ }
// Visit indices
return ValidateWithType(*type.index_type());
- }
-
+ }
+
Status Visit(const ExtensionType& type) {
// Visit storage
return ValidateWithType(*type.storage_type());
}
-
+
private:
bool IsBufferValid(int index) { return IsBufferValid(data, index); }
static bool IsBufferValid(const ArrayData& data, int index) {
return data.buffers[index] != nullptr && data.buffers[index]->address() != 0;
- }
-
+ }
+
template <typename BinaryType>
Status ValidateBinaryLike(const BinaryType& type) {
if (!IsBufferValid(2)) {
return Status::Invalid("Value data buffer is null");
- }
+ }
// First validate offsets, to make sure the accesses below are valid
RETURN_NOT_OK(ValidateOffsets(type));
-
+
if (data.length > 0 && data.buffers[1]->is_cpu()) {
using offset_type = typename BinaryType::offset_type;
@@ -199,36 +199,36 @@ struct ValidateArrayImpl {
const auto first_offset = offsets[0];
const auto last_offset = offsets[data.length];
- // This early test avoids undefined behaviour when computing `data_extent`
- if (first_offset < 0 || last_offset < 0) {
- return Status::Invalid("Negative offsets in binary array");
- }
- const auto data_extent = last_offset - first_offset;
+ // This early test avoids undefined behaviour when computing `data_extent`
+ if (first_offset < 0 || last_offset < 0) {
+ return Status::Invalid("Negative offsets in binary array");
+ }
+ const auto data_extent = last_offset - first_offset;
const auto values_length = values.size();
- if (values_length < data_extent) {
- return Status::Invalid("Length spanned by binary offsets (", data_extent,
- ") larger than values array (size ", values_length, ")");
- }
- // These tests ensure that array concatenation is safe if Validate() succeeds
- // (for delta dictionaries)
- if (first_offset > values_length || last_offset > values_length) {
- return Status::Invalid("First or last binary offset out of bounds");
- }
- if (first_offset > last_offset) {
- return Status::Invalid("First offset larger than last offset in binary array");
- }
- }
- return Status::OK();
- }
-
+ if (values_length < data_extent) {
+ return Status::Invalid("Length spanned by binary offsets (", data_extent,
+ ") larger than values array (size ", values_length, ")");
+ }
+ // These tests ensure that array concatenation is safe if Validate() succeeds
+ // (for delta dictionaries)
+ if (first_offset > values_length || last_offset > values_length) {
+ return Status::Invalid("First or last binary offset out of bounds");
+ }
+ if (first_offset > last_offset) {
+ return Status::Invalid("First offset larger than last offset in binary array");
+ }
+ }
+ return Status::OK();
+ }
+
template <typename ListType>
Status ValidateListLike(const ListType& type) {
- // First validate offsets, to make sure the accesses below are valid
+ // First validate offsets, to make sure the accesses below are valid
RETURN_NOT_OK(ValidateOffsets(type));
-
+
const ArrayData& values = *data.child_data[0];
- // An empty list array can have 0 offsets
+ // An empty list array can have 0 offsets
if (data.length > 0 && data.buffers[1]->is_cpu()) {
using offset_type = typename ListType::offset_type;
@@ -236,166 +236,166 @@ struct ValidateArrayImpl {
const auto first_offset = offsets[0];
const auto last_offset = offsets[data.length];
- // This early test avoids undefined behaviour when computing `data_extent`
- if (first_offset < 0 || last_offset < 0) {
- return Status::Invalid("Negative offsets in list array");
- }
- const auto data_extent = last_offset - first_offset;
+ // This early test avoids undefined behaviour when computing `data_extent`
+ if (first_offset < 0 || last_offset < 0) {
+ return Status::Invalid("Negative offsets in list array");
+ }
+ const auto data_extent = last_offset - first_offset;
const auto values_length = values.length;
- if (values_length < data_extent) {
- return Status::Invalid("Length spanned by list offsets (", data_extent,
- ") larger than values array (length ", values_length, ")");
- }
- // These tests ensure that array concatenation is safe if Validate() succeeds
- // (for delta dictionaries)
- if (first_offset > values_length || last_offset > values_length) {
- return Status::Invalid("First or last list offset out of bounds");
- }
- if (first_offset > last_offset) {
- return Status::Invalid("First offset larger than last offset in list array");
- }
- }
-
+ if (values_length < data_extent) {
+ return Status::Invalid("Length spanned by list offsets (", data_extent,
+ ") larger than values array (length ", values_length, ")");
+ }
+ // These tests ensure that array concatenation is safe if Validate() succeeds
+ // (for delta dictionaries)
+ if (first_offset > values_length || last_offset > values_length) {
+ return Status::Invalid("First or last list offset out of bounds");
+ }
+ if (first_offset > last_offset) {
+ return Status::Invalid("First offset larger than last offset in list array");
+ }
+ }
+
const Status child_valid = ValidateArray(values);
- if (!child_valid.ok()) {
- return Status::Invalid("List child array invalid: ", child_valid.ToString());
- }
- return Status::OK();
- }
-
+ if (!child_valid.ok()) {
+ return Status::Invalid("List child array invalid: ", child_valid.ToString());
+ }
+ return Status::OK();
+ }
+
template <typename TypeClass>
Status ValidateOffsets(const TypeClass& type) {
using offset_type = typename TypeClass::offset_type;
-
+
const Buffer* offsets = data.buffers[1].get();
if (offsets == nullptr) {
// For length 0, an empty offsets buffer seems accepted as a special case
// (ARROW-544)
if (data.length > 0) {
return Status::Invalid("Non-empty array but offsets are null");
- }
- return Status::OK();
- }
-
- // An empty list array can have 0 offsets
+ }
+ return Status::OK();
+ }
+
+ // An empty list array can have 0 offsets
auto required_offsets = (data.length > 0) ? data.length + data.offset + 1 : 0;
if (offsets->size() / static_cast<int32_t>(sizeof(offset_type)) < required_offsets) {
return Status::Invalid("Offsets buffer size (bytes): ", offsets->size(),
" isn't large enough for length: ", data.length);
- }
-
- return Status::OK();
- }
-};
-
-} // namespace
-
-ARROW_EXPORT
+ }
+
+ return Status::OK();
+ }
+};
+
+} // namespace
+
+ARROW_EXPORT
Status ValidateArray(const ArrayData& data) {
// First check the data layout conforms to the spec
const DataType& type = *data.type;
- const auto layout = type.layout();
-
+ const auto layout = type.layout();
+
if (data.length < 0) {
- return Status::Invalid("Array length is negative");
- }
-
- if (data.buffers.size() != layout.buffers.size()) {
- return Status::Invalid("Expected ", layout.buffers.size(),
- " buffers in array "
- "of type ",
- type.ToString(), ", got ", data.buffers.size());
- }
-
- // This check is required to avoid addition overflow below
- int64_t length_plus_offset = -1;
+ return Status::Invalid("Array length is negative");
+ }
+
+ if (data.buffers.size() != layout.buffers.size()) {
+ return Status::Invalid("Expected ", layout.buffers.size(),
+ " buffers in array "
+ "of type ",
+ type.ToString(), ", got ", data.buffers.size());
+ }
+
+ // This check is required to avoid addition overflow below
+ int64_t length_plus_offset = -1;
if (AddWithOverflow(data.length, data.offset, &length_plus_offset)) {
- return Status::Invalid("Array of type ", type.ToString(),
- " has impossibly large length and offset");
- }
-
- for (int i = 0; i < static_cast<int>(data.buffers.size()); ++i) {
- const auto& buffer = data.buffers[i];
- const auto& spec = layout.buffers[i];
-
- if (buffer == nullptr) {
- continue;
- }
- int64_t min_buffer_size = -1;
- switch (spec.kind) {
- case DataTypeLayout::BITMAP:
- min_buffer_size = BitUtil::BytesForBits(length_plus_offset);
- break;
- case DataTypeLayout::FIXED_WIDTH:
- if (MultiplyWithOverflow(length_plus_offset, spec.byte_width, &min_buffer_size)) {
- return Status::Invalid("Array of type ", type.ToString(),
- " has impossibly large length and offset");
- }
- break;
- case DataTypeLayout::ALWAYS_NULL:
- // XXX Should we raise on non-null buffer?
- continue;
- default:
- continue;
- }
- if (buffer->size() < min_buffer_size) {
- return Status::Invalid("Buffer #", i, " too small in array of type ",
+ return Status::Invalid("Array of type ", type.ToString(),
+ " has impossibly large length and offset");
+ }
+
+ for (int i = 0; i < static_cast<int>(data.buffers.size()); ++i) {
+ const auto& buffer = data.buffers[i];
+ const auto& spec = layout.buffers[i];
+
+ if (buffer == nullptr) {
+ continue;
+ }
+ int64_t min_buffer_size = -1;
+ switch (spec.kind) {
+ case DataTypeLayout::BITMAP:
+ min_buffer_size = BitUtil::BytesForBits(length_plus_offset);
+ break;
+ case DataTypeLayout::FIXED_WIDTH:
+ if (MultiplyWithOverflow(length_plus_offset, spec.byte_width, &min_buffer_size)) {
+ return Status::Invalid("Array of type ", type.ToString(),
+ " has impossibly large length and offset");
+ }
+ break;
+ case DataTypeLayout::ALWAYS_NULL:
+ // XXX Should we raise on non-null buffer?
+ continue;
+ default:
+ continue;
+ }
+ if (buffer->size() < min_buffer_size) {
+ return Status::Invalid("Buffer #", i, " too small in array of type ",
type.ToString(), " and length ", data.length,
- ": expected at least ", min_buffer_size, " byte(s), got ",
- buffer->size());
- }
- }
- if (type.id() != Type::NA && data.null_count > 0 && data.buffers[0] == nullptr) {
- return Status::Invalid("Array of type ", type.ToString(), " has ", data.null_count,
- " nulls but no null bitmap");
- }
-
- // Check null_count() *after* validating the buffer sizes, to avoid
- // reading out of bounds.
+ ": expected at least ", min_buffer_size, " byte(s), got ",
+ buffer->size());
+ }
+ }
+ if (type.id() != Type::NA && data.null_count > 0 && data.buffers[0] == nullptr) {
+ return Status::Invalid("Array of type ", type.ToString(), " has ", data.null_count,
+ " nulls but no null bitmap");
+ }
+
+ // Check null_count() *after* validating the buffer sizes, to avoid
+ // reading out of bounds.
if (data.null_count > data.length) {
- return Status::Invalid("Null count exceeds array length");
- }
+ return Status::Invalid("Null count exceeds array length");
+ }
if (data.null_count < 0 && data.null_count != kUnknownNullCount) {
return Status::Invalid("Negative null count");
}
-
- if (type.id() != Type::EXTENSION) {
- if (data.child_data.size() != static_cast<size_t>(type.num_fields())) {
- return Status::Invalid("Expected ", type.num_fields(),
- " child arrays in array "
- "of type ",
- type.ToString(), ", got ", data.child_data.size());
- }
- }
- if (layout.has_dictionary && !data.dictionary) {
- return Status::Invalid("Array of type ", type.ToString(),
- " must have dictionary values");
- }
- if (!layout.has_dictionary && data.dictionary) {
- return Status::Invalid("Unexpected dictionary values in array of type ",
- type.ToString());
- }
-
+
+ if (type.id() != Type::EXTENSION) {
+ if (data.child_data.size() != static_cast<size_t>(type.num_fields())) {
+ return Status::Invalid("Expected ", type.num_fields(),
+ " child arrays in array "
+ "of type ",
+ type.ToString(), ", got ", data.child_data.size());
+ }
+ }
+ if (layout.has_dictionary && !data.dictionary) {
+ return Status::Invalid("Array of type ", type.ToString(),
+ " must have dictionary values");
+ }
+ if (!layout.has_dictionary && data.dictionary) {
+ return Status::Invalid("Unexpected dictionary values in array of type ",
+ type.ToString());
+ }
+
ValidateArrayImpl validator{data};
return validator.Validate();
-}
-
+}
+
ARROW_EXPORT
Status ValidateArray(const Array& array) { return ValidateArray(*array.data()); }
-///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
// ValidateArrayFull: expensive validation checks
-
-namespace {
-
+
+namespace {
+
struct UTF8DataValidator {
const ArrayData& data;
-
+
Status Visit(const DataType&) {
- // Default, should be unreachable
- return Status::NotImplemented("");
- }
-
+ // Default, should be unreachable
+ return Status::NotImplemented("");
+ }
+
template <typename StringType>
enable_if_string<StringType, Status> Visit(const StringType&) {
util::InitializeUTF8();
@@ -414,19 +414,19 @@ struct UTF8DataValidator {
++i;
return Status::OK();
});
- }
-};
-
+ }
+};
+
struct BoundsChecker {
const ArrayData& data;
int64_t min_value;
int64_t max_value;
-
+
Status Visit(const DataType&) {
// Default, should be unreachable
return Status::NotImplemented("");
- }
-
+ }
+
template <typename IntegerType>
enable_if_integer<IntegerType, Status> Visit(const IntegerType&) {
using c_type = typename IntegerType::c_type;
@@ -447,30 +447,30 @@ struct BoundsChecker {
++i;
return Status::OK();
});
- }
+ }
};
-
+
struct ValidateArrayFullImpl {
const ArrayData& data;
-
+
Status Validate() { return ValidateWithType(*data.type); }
-
+
Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); }
-
+
Status Visit(const NullType& type) { return Status::OK(); }
-
+
Status Visit(const FixedWidthType& type) { return Status::OK(); }
Status Visit(const StringType& type) {
RETURN_NOT_OK(ValidateBinaryLike(type));
return ValidateUTF8(data);
- }
-
+ }
+
Status Visit(const LargeStringType& type) {
RETURN_NOT_OK(ValidateBinaryLike(type));
return ValidateUTF8(data);
}
-
+
Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }
Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
@@ -499,7 +499,7 @@ struct ValidateArrayFullImpl {
if (!field_valid.ok()) {
return Status::Invalid("Struct child array #", i,
" invalid: ", field_valid.ToString());
- }
+ }
}
return Status::OK();
}
@@ -512,43 +512,43 @@ struct ValidateArrayFullImpl {
for (int64_t i = 0; i < data.length; ++i) {
// Note that union arrays never have top-level nulls
- const int32_t code = type_codes[i];
- if (code < 0 || child_ids[code] == UnionType::kInvalidChildId) {
- return Status::Invalid("Union value at position ", i, " has invalid type id ",
- code);
- }
- }
-
+ const int32_t code = type_codes[i];
+ if (code < 0 || child_ids[code] == UnionType::kInvalidChildId) {
+ return Status::Invalid("Union value at position ", i, " has invalid type id ",
+ code);
+ }
+ }
+
if (type.mode() == UnionMode::DENSE) {
- // Map logical type id to child length
- std::vector<int64_t> child_lengths(256);
+ // Map logical type id to child length
+ std::vector<int64_t> child_lengths(256);
for (int child_id = 0; child_id < type.num_fields(); ++child_id) {
child_lengths[type_codes_map[child_id]] = data.child_data[child_id]->length;
- }
-
+ }
+
// Check offsets are in bounds
std::vector<int64_t> last_child_offsets(256, 0);
const int32_t* offsets = data.GetValues<int32_t>(2);
for (int64_t i = 0; i < data.length; ++i) {
- const int32_t code = type_codes[i];
- const int32_t offset = offsets[i];
- if (offset < 0) {
- return Status::Invalid("Union value at position ", i, " has negative offset ",
- offset);
- }
- if (offset >= child_lengths[code]) {
- return Status::Invalid("Union value at position ", i,
- " has offset larger "
- "than child length (",
- offset, " >= ", child_lengths[code], ")");
- }
+ const int32_t code = type_codes[i];
+ const int32_t offset = offsets[i];
+ if (offset < 0) {
+ return Status::Invalid("Union value at position ", i, " has negative offset ",
+ offset);
+ }
+ if (offset >= child_lengths[code]) {
+ return Status::Invalid("Union value at position ", i,
+ " has offset larger "
+ "than child length (",
+ offset, " >= ", child_lengths[code], ")");
+ }
if (offset < last_child_offsets[code]) {
return Status::Invalid("Union value at position ", i,
" has non-monotonic offset ", offset);
}
last_child_offsets[code] = offset;
- }
- }
+ }
+ }
// Validate children
for (int64_t i = 0; i < type.num_fields(); ++i) {
@@ -559,87 +559,87 @@ struct ValidateArrayFullImpl {
" invalid: ", field_valid.ToString());
}
}
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
Status Visit(const DictionaryType& type) {
- const Status indices_status =
+ const Status indices_status =
CheckBounds(*type.index_type(), 0, data.dictionary->length - 1);
- if (!indices_status.ok()) {
- return Status::Invalid("Dictionary indices invalid: ", indices_status.ToString());
- }
+ if (!indices_status.ok()) {
+ return Status::Invalid("Dictionary indices invalid: ", indices_status.ToString());
+ }
return ValidateArrayFull(*data.dictionary);
- }
-
+ }
+
Status Visit(const ExtensionType& type) {
return ValidateWithType(*type.storage_type());
- }
-
- protected:
+ }
+
+ protected:
template <typename BinaryType>
Status ValidateBinaryLike(const BinaryType& type) {
const auto& data_buffer = data.buffers[2];
if (data_buffer == nullptr) {
return Status::Invalid("Binary data buffer is null");
- }
+ }
return ValidateOffsets(type, data_buffer->size());
- }
-
+ }
+
template <typename ListType>
Status ValidateListLike(const ListType& type) {
const ArrayData& child = *data.child_data[0];
const Status child_valid = ValidateArrayFull(child);
- if (!child_valid.ok()) {
- return Status::Invalid("List child array invalid: ", child_valid.ToString());
- }
+ if (!child_valid.ok()) {
+ return Status::Invalid("List child array invalid: ", child_valid.ToString());
+ }
return ValidateOffsets(type, child.offset + child.length);
- }
-
+ }
+
template <typename TypeClass>
Status ValidateOffsets(const TypeClass& type, int64_t offset_limit) {
using offset_type = typename TypeClass::offset_type;
if (data.length == 0) {
- return Status::OK();
- }
+ return Status::OK();
+ }
const offset_type* offsets = data.GetValues<offset_type>(1);
if (offsets == nullptr) {
return Status::Invalid("Non-empty array but offsets are null");
- }
-
+ }
+
auto prev_offset = offsets[0];
- if (prev_offset < 0) {
+ if (prev_offset < 0) {
return Status::Invalid("Offset invariant failure: array starts at negative offset ",
prev_offset);
- }
+ }
for (int64_t i = 1; i <= data.length; ++i) {
const auto current_offset = offsets[i];
- if (current_offset < prev_offset) {
- return Status::Invalid("Offset invariant failure: non-monotonic offset at slot ",
- i, ": ", current_offset, " < ", prev_offset);
- }
- if (current_offset > offset_limit) {
- return Status::Invalid("Offset invariant failure: offset for slot ", i,
- " out of bounds: ", current_offset, " > ", offset_limit);
- }
- prev_offset = current_offset;
- }
- return Status::OK();
- }
-
+ if (current_offset < prev_offset) {
+ return Status::Invalid("Offset invariant failure: non-monotonic offset at slot ",
+ i, ": ", current_offset, " < ", prev_offset);
+ }
+ if (current_offset > offset_limit) {
+ return Status::Invalid("Offset invariant failure: offset for slot ", i,
+ " out of bounds: ", current_offset, " > ", offset_limit);
+ }
+ prev_offset = current_offset;
+ }
+ return Status::OK();
+ }
+
Status CheckBounds(const DataType& type, int64_t min_value, int64_t max_value) {
BoundsChecker checker{data, min_value, max_value};
return VisitTypeInline(type, &checker);
- }
-};
-
-} // namespace
-
-ARROW_EXPORT
+ }
+};
+
+} // namespace
+
+ARROW_EXPORT
Status ValidateArrayFull(const ArrayData& data) {
return ValidateArrayFullImpl{data}.Validate();
-}
-
+}
+
ARROW_EXPORT
Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.data()); }
@@ -653,5 +653,5 @@ Status ValidateUTF8(const ArrayData& data) {
ARROW_EXPORT
Status ValidateUTF8(const Array& array) { return ValidateUTF8(*array.data()); }
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
index cae3e16b3c5..a14fe37fe7e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
@@ -1,39 +1,39 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/status.h"
#include "arrow/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-// Internal functions implementing Array::Validate() and friends.
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// Internal functions implementing Array::Validate() and friends.
+
// O(1) array metadata validation
-ARROW_EXPORT
-Status ValidateArray(const Array& array);
-
-ARROW_EXPORT
+ARROW_EXPORT
+Status ValidateArray(const Array& array);
+
+ARROW_EXPORT
Status ValidateArray(const ArrayData& data);
-
+
// O(N) array data validation.
// Note the "full" routines don't validate metadata. It should be done
// beforehand using ValidateArray(), otherwise invalid memory accesses
@@ -51,5 +51,5 @@ Status ValidateUTF8(const Array& array);
ARROW_EXPORT
Status ValidateUTF8(const ArrayData& data);
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.cc b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.cc
index b1b2945d0f5..a51263b16ba 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.cc
@@ -1,207 +1,207 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/buffer.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <utility>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/string.h"
-
-namespace arrow {
-
-Result<std::shared_ptr<Buffer>> Buffer::CopySlice(const int64_t start,
- const int64_t nbytes,
- MemoryPool* pool) const {
- // Sanity checks
- ARROW_CHECK_LE(start, size_);
- ARROW_CHECK_LE(nbytes, size_ - start);
- DCHECK_GE(nbytes, 0);
-
- ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateResizableBuffer(nbytes, pool));
- std::memcpy(new_buffer->mutable_data(), data() + start, static_cast<size_t>(nbytes));
- return std::move(new_buffer);
-}
-
-namespace {
-
-Status CheckBufferSlice(const Buffer& buffer, int64_t offset, int64_t length) {
- return internal::CheckSliceParams(buffer.size(), offset, length, "buffer");
-}
-
-Status CheckBufferSlice(const Buffer& buffer, int64_t offset) {
- if (ARROW_PREDICT_FALSE(offset < 0)) {
- // Avoid UBSAN in subtraction below
- return Status::Invalid("Negative buffer slice offset");
- }
- return CheckBufferSlice(buffer, offset, buffer.size() - offset);
-}
-
-} // namespace
-
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
- int64_t offset) {
- RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
- return SliceBuffer(buffer, offset);
-}
-
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
- int64_t offset, int64_t length) {
- RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
- return SliceBuffer(buffer, offset, length);
-}
-
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
- const std::shared_ptr<Buffer>& buffer, int64_t offset) {
- RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
- return SliceMutableBuffer(buffer, offset);
-}
-
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
- const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
- RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
- return SliceMutableBuffer(buffer, offset, length);
-}
-
-std::string Buffer::ToHexString() {
- return HexEncode(data(), static_cast<size_t>(size()));
-}
-
-bool Buffer::Equals(const Buffer& other, const int64_t nbytes) const {
- return this == &other || (size_ >= nbytes && other.size_ >= nbytes &&
- (data_ == other.data_ ||
- !memcmp(data_, other.data_, static_cast<size_t>(nbytes))));
-}
-
-bool Buffer::Equals(const Buffer& other) const {
- return this == &other || (size_ == other.size_ &&
- (data_ == other.data_ ||
- !memcmp(data_, other.data_, static_cast<size_t>(size_))));
-}
-
-std::string Buffer::ToString() const {
- return std::string(reinterpret_cast<const char*>(data_), static_cast<size_t>(size_));
-}
-
-void Buffer::CheckMutable() const { DCHECK(is_mutable()) << "buffer not mutable"; }
-
-void Buffer::CheckCPU() const {
- DCHECK(is_cpu()) << "not a CPU buffer (device: " << device()->ToString() << ")";
-}
-
-Result<std::shared_ptr<io::RandomAccessFile>> Buffer::GetReader(
- std::shared_ptr<Buffer> buf) {
- return buf->memory_manager_->GetBufferReader(buf);
-}
-
-Result<std::shared_ptr<io::OutputStream>> Buffer::GetWriter(std::shared_ptr<Buffer> buf) {
- if (!buf->is_mutable()) {
- return Status::Invalid("Expected mutable buffer");
- }
- return buf->memory_manager_->GetBufferWriter(buf);
-}
-
-Result<std::shared_ptr<Buffer>> Buffer::Copy(std::shared_ptr<Buffer> source,
- const std::shared_ptr<MemoryManager>& to) {
- return MemoryManager::CopyBuffer(source, to);
-}
-
-Result<std::shared_ptr<Buffer>> Buffer::View(std::shared_ptr<Buffer> source,
- const std::shared_ptr<MemoryManager>& to) {
- return MemoryManager::ViewBuffer(source, to);
-}
-
-Result<std::shared_ptr<Buffer>> Buffer::ViewOrCopy(
- std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to) {
- auto maybe_buffer = MemoryManager::ViewBuffer(source, to);
- if (maybe_buffer.ok()) {
- return maybe_buffer;
- }
- return MemoryManager::CopyBuffer(source, to);
-}
-
-class StlStringBuffer : public Buffer {
- public:
- explicit StlStringBuffer(std::string data)
- : Buffer(nullptr, 0), input_(std::move(data)) {
- data_ = reinterpret_cast<const uint8_t*>(input_.c_str());
- size_ = static_cast<int64_t>(input_.size());
- capacity_ = size_;
- }
-
- private:
- std::string input_;
-};
-
-std::shared_ptr<Buffer> Buffer::FromString(std::string data) {
- return std::make_shared<StlStringBuffer>(std::move(data));
-}
-
-std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
- const int64_t offset, const int64_t length) {
- return std::make_shared<MutableBuffer>(buffer, offset, length);
-}
-
-MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
- const int64_t size)
- : MutableBuffer(reinterpret_cast<uint8_t*>(parent->mutable_address()) + offset,
- size) {
- DCHECK(parent->is_mutable()) << "Must pass mutable buffer";
- parent_ = parent;
-}
-
-Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length, MemoryPool* pool) {
- ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
- // Zero out any trailing bits
- if (buf->size() > 0) {
- buf->mutable_data()[buf->size() - 1] = 0;
- }
- return std::move(buf);
-}
-
-Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length, MemoryPool* pool) {
- ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
- memset(buf->mutable_data(), 0, static_cast<size_t>(buf->size()));
- return std::move(buf);
-}
-
-Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out) {
- return AllocateEmptyBitmap(length).Value(out);
-}
-
-Result<std::shared_ptr<Buffer>> ConcatenateBuffers(
- const std::vector<std::shared_ptr<Buffer>>& buffers, MemoryPool* pool) {
- int64_t out_length = 0;
- for (const auto& buffer : buffers) {
- out_length += buffer->size();
- }
- ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length, pool));
- auto out_data = out->mutable_data();
- for (const auto& buffer : buffers) {
- std::memcpy(out_data, buffer->data(), buffer->size());
- out_data += buffer->size();
- }
- return std::move(out);
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/buffer.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string.h"
+
+namespace arrow {
+
+Result<std::shared_ptr<Buffer>> Buffer::CopySlice(const int64_t start,
+ const int64_t nbytes,
+ MemoryPool* pool) const {
+ // Sanity checks
+ ARROW_CHECK_LE(start, size_);
+ ARROW_CHECK_LE(nbytes, size_ - start);
+ DCHECK_GE(nbytes, 0);
+
+ ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateResizableBuffer(nbytes, pool));
+ std::memcpy(new_buffer->mutable_data(), data() + start, static_cast<size_t>(nbytes));
+ return std::move(new_buffer);
+}
+
+namespace {
+
+Status CheckBufferSlice(const Buffer& buffer, int64_t offset, int64_t length) {
+ return internal::CheckSliceParams(buffer.size(), offset, length, "buffer");
+}
+
+Status CheckBufferSlice(const Buffer& buffer, int64_t offset) {
+ if (ARROW_PREDICT_FALSE(offset < 0)) {
+ // Avoid UBSAN in subtraction below
+ return Status::Invalid("Negative buffer slice offset");
+ }
+ return CheckBufferSlice(buffer, offset, buffer.size() - offset);
+}
+
+} // namespace
+
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+ int64_t offset) {
+ RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
+ return SliceBuffer(buffer, offset);
+}
+
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+ int64_t offset, int64_t length) {
+ RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
+ return SliceBuffer(buffer, offset, length);
+}
+
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset) {
+ RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
+ return SliceMutableBuffer(buffer, offset);
+}
+
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
+ RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
+ return SliceMutableBuffer(buffer, offset, length);
+}
+
+std::string Buffer::ToHexString() {
+ return HexEncode(data(), static_cast<size_t>(size()));
+}
+
+bool Buffer::Equals(const Buffer& other, const int64_t nbytes) const {
+ return this == &other || (size_ >= nbytes && other.size_ >= nbytes &&
+ (data_ == other.data_ ||
+ !memcmp(data_, other.data_, static_cast<size_t>(nbytes))));
+}
+
+bool Buffer::Equals(const Buffer& other) const {
+ return this == &other || (size_ == other.size_ &&
+ (data_ == other.data_ ||
+ !memcmp(data_, other.data_, static_cast<size_t>(size_))));
+}
+
+std::string Buffer::ToString() const {
+ return std::string(reinterpret_cast<const char*>(data_), static_cast<size_t>(size_));
+}
+
+void Buffer::CheckMutable() const { DCHECK(is_mutable()) << "buffer not mutable"; }
+
+void Buffer::CheckCPU() const {
+ DCHECK(is_cpu()) << "not a CPU buffer (device: " << device()->ToString() << ")";
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> Buffer::GetReader(
+ std::shared_ptr<Buffer> buf) {
+ return buf->memory_manager_->GetBufferReader(buf);
+}
+
+Result<std::shared_ptr<io::OutputStream>> Buffer::GetWriter(std::shared_ptr<Buffer> buf) {
+ if (!buf->is_mutable()) {
+ return Status::Invalid("Expected mutable buffer");
+ }
+ return buf->memory_manager_->GetBufferWriter(buf);
+}
+
+Result<std::shared_ptr<Buffer>> Buffer::Copy(std::shared_ptr<Buffer> source,
+ const std::shared_ptr<MemoryManager>& to) {
+ return MemoryManager::CopyBuffer(source, to);
+}
+
+Result<std::shared_ptr<Buffer>> Buffer::View(std::shared_ptr<Buffer> source,
+ const std::shared_ptr<MemoryManager>& to) {
+ return MemoryManager::ViewBuffer(source, to);
+}
+
+Result<std::shared_ptr<Buffer>> Buffer::ViewOrCopy(
+ std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to) {
+ auto maybe_buffer = MemoryManager::ViewBuffer(source, to);
+ if (maybe_buffer.ok()) {
+ return maybe_buffer;
+ }
+ return MemoryManager::CopyBuffer(source, to);
+}
+
+class StlStringBuffer : public Buffer {
+ public:
+ explicit StlStringBuffer(std::string data)
+ : Buffer(nullptr, 0), input_(std::move(data)) {
+ data_ = reinterpret_cast<const uint8_t*>(input_.c_str());
+ size_ = static_cast<int64_t>(input_.size());
+ capacity_ = size_;
+ }
+
+ private:
+ std::string input_;
+};
+
+std::shared_ptr<Buffer> Buffer::FromString(std::string data) {
+ return std::make_shared<StlStringBuffer>(std::move(data));
+}
+
+std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+ const int64_t offset, const int64_t length) {
+ return std::make_shared<MutableBuffer>(buffer, offset, length);
+}
+
+MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
+ const int64_t size)
+ : MutableBuffer(reinterpret_cast<uint8_t*>(parent->mutable_address()) + offset,
+ size) {
+ DCHECK(parent->is_mutable()) << "Must pass mutable buffer";
+ parent_ = parent;
+}
+
+Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length, MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
+ // Zero out any trailing bits
+ if (buf->size() > 0) {
+ buf->mutable_data()[buf->size() - 1] = 0;
+ }
+ return std::move(buf);
+}
+
+Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length, MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
+ memset(buf->mutable_data(), 0, static_cast<size_t>(buf->size()));
+ return std::move(buf);
+}
+
+Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out) {
+ return AllocateEmptyBitmap(length).Value(out);
+}
+
+Result<std::shared_ptr<Buffer>> ConcatenateBuffers(
+ const std::vector<std::shared_ptr<Buffer>>& buffers, MemoryPool* pool) {
+ int64_t out_length = 0;
+ for (const auto& buffer : buffers) {
+ out_length += buffer->size();
+ }
+ ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length, pool));
+ auto out_data = out->mutable_data();
+ for (const auto& buffer : buffers) {
+ std::memcpy(out_data, buffer->data(), buffer->size());
+ out_data += buffer->size();
+ }
+ return std::move(out);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
index 6c47a464b1d..64e7061aa94 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
@@ -1,496 +1,496 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/device.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Buffer classes
-
-/// \class Buffer
-/// \brief Object containing a pointer to a piece of contiguous memory with a
-/// particular size.
-///
-/// Buffers have two related notions of length: size and capacity. Size is
-/// the number of bytes that might have valid data. Capacity is the number
-/// of bytes that were allocated for the buffer in total.
-///
-/// The Buffer base class does not own its memory, but subclasses often do.
-///
-/// The following invariant is always true: Size <= Capacity
-class ARROW_EXPORT Buffer {
- public:
- /// \brief Construct from buffer and size without copying memory
- ///
- /// \param[in] data a memory buffer
- /// \param[in] size buffer size
- ///
- /// \note The passed memory must be kept alive through some other means
- Buffer(const uint8_t* data, int64_t size)
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/device.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+/// \class Buffer
+/// \brief Object containing a pointer to a piece of contiguous memory with a
+/// particular size.
+///
+/// Buffers have two related notions of length: size and capacity. Size is
+/// the number of bytes that might have valid data. Capacity is the number
+/// of bytes that were allocated for the buffer in total.
+///
+/// The Buffer base class does not own its memory, but subclasses often do.
+///
+/// The following invariant is always true: Size <= Capacity
+class ARROW_EXPORT Buffer {
+ public:
+ /// \brief Construct from buffer and size without copying memory
+ ///
+ /// \param[in] data a memory buffer
+ /// \param[in] size buffer size
+ ///
+ /// \note The passed memory must be kept alive through some other means
+ Buffer(const uint8_t* data, int64_t size)
: is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
- SetMemoryManager(default_cpu_memory_manager());
- }
-
- Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
- std::shared_ptr<Buffer> parent = NULLPTR)
+ SetMemoryManager(default_cpu_memory_manager());
+ }
+
+ Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
+ std::shared_ptr<Buffer> parent = NULLPTR)
: is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
- SetMemoryManager(std::move(mm));
- }
-
- Buffer(uintptr_t address, int64_t size, std::shared_ptr<MemoryManager> mm,
- std::shared_ptr<Buffer> parent = NULLPTR)
- : Buffer(reinterpret_cast<const uint8_t*>(address), size, std::move(mm),
- std::move(parent)) {}
-
- /// \brief Construct from string_view without copying memory
- ///
- /// \param[in] data a string_view object
- ///
- /// \note The memory viewed by data must not be deallocated in the lifetime of the
- /// Buffer; temporary rvalue strings must be stored in an lvalue somewhere
- explicit Buffer(util::string_view data)
- : Buffer(reinterpret_cast<const uint8_t*>(data.data()),
- static_cast<int64_t>(data.size())) {}
-
- virtual ~Buffer() = default;
-
- /// An offset into data that is owned by another buffer, but we want to be
- /// able to retain a valid pointer to it even after other shared_ptr's to the
- /// parent buffer have been destroyed
- ///
- /// This method makes no assertions about alignment or padding of the buffer but
- /// in general we expected buffers to be aligned and padded to 64 bytes. In the future
- /// we might add utility methods to help determine if a buffer satisfies this contract.
- Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
- : Buffer(parent->data_ + offset, size) {
- parent_ = parent;
- SetMemoryManager(parent->memory_manager_);
- }
-
- uint8_t operator[](std::size_t i) const { return data_[i]; }
-
- /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
- /// \return std::string
- std::string ToHexString();
-
- /// Return true if both buffers are the same size and contain the same bytes
- /// up to the number of compared bytes
- bool Equals(const Buffer& other, int64_t nbytes) const;
-
- /// Return true if both buffers are the same size and contain the same bytes
- bool Equals(const Buffer& other) const;
-
- /// Copy a section of the buffer into a new Buffer.
- Result<std::shared_ptr<Buffer>> CopySlice(
- const int64_t start, const int64_t nbytes,
- MemoryPool* pool = default_memory_pool()) const;
-
- /// Zero bytes in padding, i.e. bytes between size_ and capacity_.
- void ZeroPadding() {
-#ifndef NDEBUG
- CheckMutable();
-#endif
- // A zero-capacity buffer can have a null data pointer
- if (capacity_ != 0) {
+ SetMemoryManager(std::move(mm));
+ }
+
+ Buffer(uintptr_t address, int64_t size, std::shared_ptr<MemoryManager> mm,
+ std::shared_ptr<Buffer> parent = NULLPTR)
+ : Buffer(reinterpret_cast<const uint8_t*>(address), size, std::move(mm),
+ std::move(parent)) {}
+
+ /// \brief Construct from string_view without copying memory
+ ///
+ /// \param[in] data a string_view object
+ ///
+ /// \note The memory viewed by data must not be deallocated in the lifetime of the
+ /// Buffer; temporary rvalue strings must be stored in an lvalue somewhere
+ explicit Buffer(util::string_view data)
+ : Buffer(reinterpret_cast<const uint8_t*>(data.data()),
+ static_cast<int64_t>(data.size())) {}
+
+ virtual ~Buffer() = default;
+
+ /// An offset into data that is owned by another buffer, but we want to be
+ /// able to retain a valid pointer to it even after other shared_ptr's to the
+ /// parent buffer have been destroyed
+ ///
+ /// This method makes no assertions about alignment or padding of the buffer but
+ /// in general we expected buffers to be aligned and padded to 64 bytes. In the future
+ /// we might add utility methods to help determine if a buffer satisfies this contract.
+ Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
+ : Buffer(parent->data_ + offset, size) {
+ parent_ = parent;
+ SetMemoryManager(parent->memory_manager_);
+ }
+
+ uint8_t operator[](std::size_t i) const { return data_[i]; }
+
+ /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
+ /// \return std::string
+ std::string ToHexString();
+
+ /// Return true if both buffers are the same size and contain the same bytes
+ /// up to the number of compared bytes
+ bool Equals(const Buffer& other, int64_t nbytes) const;
+
+ /// Return true if both buffers are the same size and contain the same bytes
+ bool Equals(const Buffer& other) const;
+
+ /// Copy a section of the buffer into a new Buffer.
+ Result<std::shared_ptr<Buffer>> CopySlice(
+ const int64_t start, const int64_t nbytes,
+ MemoryPool* pool = default_memory_pool()) const;
+
+ /// Zero bytes in padding, i.e. bytes between size_ and capacity_.
+ void ZeroPadding() {
+#ifndef NDEBUG
+ CheckMutable();
+#endif
+ // A zero-capacity buffer can have a null data pointer
+ if (capacity_ != 0) {
memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
- }
- }
-
- /// \brief Construct an immutable buffer that takes ownership of the contents
- /// of an std::string (without copying it).
- ///
- /// \param[in] data a string to own
- /// \return a new Buffer instance
- static std::shared_ptr<Buffer> FromString(std::string data);
-
- /// \brief Create buffer referencing typed memory with some length without
- /// copying
- /// \param[in] data the typed memory as C array
- /// \param[in] length the number of values in the array
- /// \return a new shared_ptr<Buffer>
- template <typename T, typename SizeType = int64_t>
- static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
- return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
- static_cast<int64_t>(sizeof(T) * length));
- }
-
- /// \brief Create buffer referencing std::vector with some length without
- /// copying
- /// \param[in] data the vector to be referenced. If this vector is changed,
- /// the buffer may become invalid
- /// \return a new shared_ptr<Buffer>
- template <typename T>
- static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
- return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
- static_cast<int64_t>(sizeof(T) * data.size()));
- }
-
- /// \brief Copy buffer contents into a new std::string
- /// \return std::string
- /// \note Can throw std::bad_alloc if buffer is large
- std::string ToString() const;
-
- /// \brief View buffer contents as a util::string_view
- /// \return util::string_view
- explicit operator util::string_view() const {
- return util::string_view(reinterpret_cast<const char*>(data_), size_);
- }
-
- /// \brief View buffer contents as a util::bytes_view
- /// \return util::bytes_view
- explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }
-
- /// \brief Return a pointer to the buffer's data
- ///
- /// The buffer has to be a CPU buffer (`is_cpu()` is true).
- /// Otherwise, an assertion may be thrown or a null pointer may be returned.
- ///
- /// To get the buffer's data address regardless of its device, call `address()`.
- const uint8_t* data() const {
-#ifndef NDEBUG
- CheckCPU();
-#endif
- return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
- }
-
- /// \brief Return a writable pointer to the buffer's data
- ///
- /// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
- /// are true). Otherwise, an assertion may be thrown or a null pointer may
- /// be returned.
- ///
- /// To get the buffer's mutable data address regardless of its device, call
- /// `mutable_address()`.
- uint8_t* mutable_data() {
-#ifndef NDEBUG
- CheckCPU();
- CheckMutable();
-#endif
+ }
+ }
+
+ /// \brief Construct an immutable buffer that takes ownership of the contents
+ /// of an std::string (without copying it).
+ ///
+ /// \param[in] data a string to own
+ /// \return a new Buffer instance
+ static std::shared_ptr<Buffer> FromString(std::string data);
+
+ /// \brief Create buffer referencing typed memory with some length without
+ /// copying
+ /// \param[in] data the typed memory as C array
+ /// \param[in] length the number of values in the array
+ /// \return a new shared_ptr<Buffer>
+ template <typename T, typename SizeType = int64_t>
+ static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
+ return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
+ static_cast<int64_t>(sizeof(T) * length));
+ }
+
+ /// \brief Create buffer referencing std::vector with some length without
+ /// copying
+ /// \param[in] data the vector to be referenced. If this vector is changed,
+ /// the buffer may become invalid
+ /// \return a new shared_ptr<Buffer>
+ template <typename T>
+ static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
+ return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
+ static_cast<int64_t>(sizeof(T) * data.size()));
+ }
+
+ /// \brief Copy buffer contents into a new std::string
+ /// \return std::string
+ /// \note Can throw std::bad_alloc if buffer is large
+ std::string ToString() const;
+
+ /// \brief View buffer contents as a util::string_view
+ /// \return util::string_view
+ explicit operator util::string_view() const {
+ return util::string_view(reinterpret_cast<const char*>(data_), size_);
+ }
+
+ /// \brief View buffer contents as a util::bytes_view
+ /// \return util::bytes_view
+ explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }
+
+ /// \brief Return a pointer to the buffer's data
+ ///
+ /// The buffer has to be a CPU buffer (`is_cpu()` is true).
+ /// Otherwise, an assertion may be thrown or a null pointer may be returned.
+ ///
+ /// To get the buffer's data address regardless of its device, call `address()`.
+ const uint8_t* data() const {
+#ifndef NDEBUG
+ CheckCPU();
+#endif
+ return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
+ }
+
+ /// \brief Return a writable pointer to the buffer's data
+ ///
+ /// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
+ /// are true). Otherwise, an assertion may be thrown or a null pointer may
+ /// be returned.
+ ///
+ /// To get the buffer's mutable data address regardless of its device, call
+ /// `mutable_address()`.
+ uint8_t* mutable_data() {
+#ifndef NDEBUG
+ CheckCPU();
+ CheckMutable();
+#endif
return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
: NULLPTR;
- }
-
- /// \brief Return the device address of the buffer's data
- uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
-
- /// \brief Return a writable device address to the buffer's data
- ///
- /// The buffer has to be a mutable buffer (`is_mutable()` is true).
- /// Otherwise, an assertion may be thrown or 0 may be returned.
- uintptr_t mutable_address() const {
-#ifndef NDEBUG
- CheckMutable();
-#endif
+ }
+
+ /// \brief Return the device address of the buffer's data
+ uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
+
+ /// \brief Return a writable device address to the buffer's data
+ ///
+ /// The buffer has to be a mutable buffer (`is_mutable()` is true).
+ /// Otherwise, an assertion may be thrown or 0 may be returned.
+ uintptr_t mutable_address() const {
+#ifndef NDEBUG
+ CheckMutable();
+#endif
return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
- }
-
- /// \brief Return the buffer's size in bytes
- int64_t size() const { return size_; }
-
- /// \brief Return the buffer's capacity (number of allocated bytes)
- int64_t capacity() const { return capacity_; }
-
- /// \brief Whether the buffer is directly CPU-accessible
- ///
- /// If this function returns true, you can read directly from the buffer's
- /// `data()` pointer. Otherwise, you'll have to `View()` or `Copy()` it.
- bool is_cpu() const { return is_cpu_; }
-
- /// \brief Whether the buffer is mutable
- ///
- /// If this function returns true, you are allowed to modify buffer contents
- /// using the pointer returned by `mutable_data()` or `mutable_address()`.
- bool is_mutable() const { return is_mutable_; }
-
- const std::shared_ptr<Device>& device() const { return memory_manager_->device(); }
-
- const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
-
- std::shared_ptr<Buffer> parent() const { return parent_; }
-
- /// \brief Get a RandomAccessFile for reading a buffer
- ///
- /// The returned file object reads from this buffer's underlying memory.
- static Result<std::shared_ptr<io::RandomAccessFile>> GetReader(std::shared_ptr<Buffer>);
-
- /// \brief Get a OutputStream for writing to a buffer
- ///
- /// The buffer must be mutable. The returned stream object writes into the buffer's
- /// underlying memory (but it won't resize it).
- static Result<std::shared_ptr<io::OutputStream>> GetWriter(std::shared_ptr<Buffer>);
-
- /// \brief Copy buffer
- ///
- /// The buffer contents will be copied into a new buffer allocated by the
- /// given MemoryManager. This function supports cross-device copies.
- static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
- const std::shared_ptr<MemoryManager>& to);
-
- /// \brief View buffer
- ///
- /// Return a Buffer that reflects this buffer, seen potentially from another
- /// device, without making an explicit copy of the contents. The underlying
- /// mechanism is typically implemented by the kernel or device driver, and may
- /// involve lazy caching of parts of the buffer contents on the destination
- /// device's memory.
- ///
- /// If a non-copy view is unsupported for the buffer on the given device,
- /// nullptr is returned. An error can be returned if some low-level
- /// operation fails (such as an out-of-memory condition).
- static Result<std::shared_ptr<Buffer>> View(std::shared_ptr<Buffer> source,
- const std::shared_ptr<MemoryManager>& to);
-
- /// \brief View or copy buffer
- ///
- /// Try to view buffer contents on the given MemoryManager's device, but
- /// fall back to copying if a no-copy view isn't supported.
- static Result<std::shared_ptr<Buffer>> ViewOrCopy(
- std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
-
- protected:
- bool is_mutable_;
- bool is_cpu_;
- const uint8_t* data_;
- int64_t size_;
- int64_t capacity_;
-
- // null by default, but may be set
- std::shared_ptr<Buffer> parent_;
-
- private:
- // private so that subclasses are forced to call SetMemoryManager()
- std::shared_ptr<MemoryManager> memory_manager_;
-
- protected:
- void CheckMutable() const;
- void CheckCPU() const;
-
- void SetMemoryManager(std::shared_ptr<MemoryManager> mm) {
- memory_manager_ = std::move(mm);
- is_cpu_ = memory_manager_->is_cpu();
- }
-
- private:
- Buffer() = delete;
- ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
-};
-
-/// \defgroup buffer-slicing-functions Functions for slicing buffers
-///
-/// @{
-
-/// \brief Construct a view on a buffer at the given offset and length.
-///
-/// This function cannot fail and does not check for errors (except in debug builds)
-static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
- const int64_t offset,
- const int64_t length) {
- return std::make_shared<Buffer>(buffer, offset, length);
-}
-
-/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
-///
-/// This function cannot fail and does not check for errors (except in debug builds)
-static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
- const int64_t offset) {
- int64_t length = buffer->size() - offset;
- return SliceBuffer(buffer, offset, length);
-}
-
-/// \brief Input-checking version of SliceBuffer
-///
-/// An Invalid Status is returned if the requested slice falls out of bounds.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
- int64_t offset);
-/// \brief Input-checking version of SliceBuffer
-///
-/// An Invalid Status is returned if the requested slice falls out of bounds.
-/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
- int64_t offset, int64_t length);
-
-/// \brief Like SliceBuffer, but construct a mutable buffer slice.
-///
-/// If the parent buffer is not mutable, behavior is undefined (it may abort
-/// in debug builds).
-ARROW_EXPORT
-std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
- const int64_t offset, const int64_t length);
-
-/// \brief Like SliceBuffer, but construct a mutable buffer slice.
-///
-/// If the parent buffer is not mutable, behavior is undefined (it may abort
-/// in debug builds).
-static inline std::shared_ptr<Buffer> SliceMutableBuffer(
- const std::shared_ptr<Buffer>& buffer, const int64_t offset) {
- int64_t length = buffer->size() - offset;
- return SliceMutableBuffer(buffer, offset, length);
-}
-
-/// \brief Input-checking version of SliceMutableBuffer
-///
-/// An Invalid Status is returned if the requested slice falls out of bounds.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
- const std::shared_ptr<Buffer>& buffer, int64_t offset);
-/// \brief Input-checking version of SliceMutableBuffer
-///
-/// An Invalid Status is returned if the requested slice falls out of bounds.
-/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
- const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
-
-/// @}
-
-/// \class MutableBuffer
-/// \brief A Buffer whose contents can be mutated. May or may not own its data.
-class ARROW_EXPORT MutableBuffer : public Buffer {
- public:
- MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
- is_mutable_ = true;
- }
-
- MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
- : Buffer(data, size, std::move(mm)) {
- is_mutable_ = true;
- }
-
- MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
- const int64_t size);
-
- /// \brief Create buffer referencing typed memory with some length
- /// \param[in] data the typed memory as C array
- /// \param[in] length the number of values in the array
- /// \return a new shared_ptr<Buffer>
- template <typename T, typename SizeType = int64_t>
- static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
- return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
- static_cast<int64_t>(sizeof(T) * length));
- }
-
- protected:
- MutableBuffer() : Buffer(NULLPTR, 0) {}
-};
-
-/// \class ResizableBuffer
-/// \brief A mutable buffer that can be resized
-class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
- public:
- /// Change buffer reported size to indicated size, allocating memory if
- /// necessary. This will ensure that the capacity of the buffer is a multiple
- /// of 64 bytes as defined in Layout.md.
- /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
- /// specification.
- ///
- /// @param new_size The new size for the buffer.
- /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
- virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
-
- /// Ensure that buffer has enough memory allocated to fit the indicated
- /// capacity (and meets the 64 byte padding requirement in Layout.md).
- /// It does not change buffer's reported size and doesn't zero the padding.
- virtual Status Reserve(const int64_t new_capacity) = 0;
-
- template <class T>
- Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
- return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
- }
-
- template <class T>
- Status TypedReserve(const int64_t new_nb_elements) {
- return Reserve(sizeof(T) * new_nb_elements);
- }
-
- protected:
- ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
- ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm)
- : MutableBuffer(data, size, std::move(mm)) {}
-};
-
-/// \defgroup buffer-allocation-functions Functions for allocating buffers
-///
-/// @{
-
-/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
-///
-/// \param[in] size size of buffer to allocate
-/// \param[in] pool a memory pool
-ARROW_EXPORT
-Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size,
- MemoryPool* pool = NULLPTR);
-
-/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
-///
-/// \param[in] size size of buffer to allocate
-/// \param[in] pool a memory pool
-ARROW_EXPORT
-Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(
- const int64_t size, MemoryPool* pool = NULLPTR);
-
-/// \brief Allocate a bitmap buffer from a memory pool
-/// no guarantee on values is provided.
-///
-/// \param[in] length size in bits of bitmap to allocate
-/// \param[in] pool memory pool to allocate memory from
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length,
- MemoryPool* pool = NULLPTR);
-
-ARROW_EXPORT
-Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
-
-/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
-///
-/// \param[in] length size in bits of bitmap to allocate
-/// \param[in] pool memory pool to allocate memory from
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length,
- MemoryPool* pool = NULLPTR);
-
-/// \brief Concatenate multiple buffers into a single buffer
-///
-/// \param[in] buffers to be concatenated
-/// \param[in] pool memory pool to allocate the new buffer from
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> ConcatenateBuffers(const BufferVector& buffers,
- MemoryPool* pool = NULLPTR);
-
-ARROW_EXPORT
-Status ConcatenateBuffers(const BufferVector& buffers, MemoryPool* pool,
- std::shared_ptr<Buffer>* out);
-
-/// @}
-
-} // namespace arrow
+ }
+
+ /// \brief Return the buffer's size in bytes
+ int64_t size() const { return size_; }
+
+ /// \brief Return the buffer's capacity (number of allocated bytes)
+ int64_t capacity() const { return capacity_; }
+
+ /// \brief Whether the buffer is directly CPU-accessible
+ ///
+ /// If this function returns true, you can read directly from the buffer's
+ /// `data()` pointer. Otherwise, you'll have to `View()` or `Copy()` it.
+ bool is_cpu() const { return is_cpu_; }
+
+ /// \brief Whether the buffer is mutable
+ ///
+ /// If this function returns true, you are allowed to modify buffer contents
+ /// using the pointer returned by `mutable_data()` or `mutable_address()`.
+ bool is_mutable() const { return is_mutable_; }
+
+ const std::shared_ptr<Device>& device() const { return memory_manager_->device(); }
+
+ const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
+
+ std::shared_ptr<Buffer> parent() const { return parent_; }
+
+ /// \brief Get a RandomAccessFile for reading a buffer
+ ///
+ /// The returned file object reads from this buffer's underlying memory.
+ static Result<std::shared_ptr<io::RandomAccessFile>> GetReader(std::shared_ptr<Buffer>);
+
+ /// \brief Get a OutputStream for writing to a buffer
+ ///
+ /// The buffer must be mutable. The returned stream object writes into the buffer's
+ /// underlying memory (but it won't resize it).
+ static Result<std::shared_ptr<io::OutputStream>> GetWriter(std::shared_ptr<Buffer>);
+
+ /// \brief Copy buffer
+ ///
+ /// The buffer contents will be copied into a new buffer allocated by the
+ /// given MemoryManager. This function supports cross-device copies.
+ static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
+ const std::shared_ptr<MemoryManager>& to);
+
+ /// \brief View buffer
+ ///
+ /// Return a Buffer that reflects this buffer, seen potentially from another
+ /// device, without making an explicit copy of the contents. The underlying
+ /// mechanism is typically implemented by the kernel or device driver, and may
+ /// involve lazy caching of parts of the buffer contents on the destination
+ /// device's memory.
+ ///
+ /// If a non-copy view is unsupported for the buffer on the given device,
+ /// nullptr is returned. An error can be returned if some low-level
+ /// operation fails (such as an out-of-memory condition).
+ static Result<std::shared_ptr<Buffer>> View(std::shared_ptr<Buffer> source,
+ const std::shared_ptr<MemoryManager>& to);
+
+ /// \brief View or copy buffer
+ ///
+ /// Try to view buffer contents on the given MemoryManager's device, but
+ /// fall back to copying if a no-copy view isn't supported.
+ static Result<std::shared_ptr<Buffer>> ViewOrCopy(
+ std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
+
+ protected:
+ bool is_mutable_;
+ bool is_cpu_;
+ const uint8_t* data_;
+ int64_t size_;
+ int64_t capacity_;
+
+ // null by default, but may be set
+ std::shared_ptr<Buffer> parent_;
+
+ private:
+ // private so that subclasses are forced to call SetMemoryManager()
+ std::shared_ptr<MemoryManager> memory_manager_;
+
+ protected:
+ void CheckMutable() const;
+ void CheckCPU() const;
+
+ void SetMemoryManager(std::shared_ptr<MemoryManager> mm) {
+ memory_manager_ = std::move(mm);
+ is_cpu_ = memory_manager_->is_cpu();
+ }
+
+ private:
+ Buffer() = delete;
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
+};
+
+/// \defgroup buffer-slicing-functions Functions for slicing buffers
+///
+/// @{
+
+/// \brief Construct a view on a buffer at the given offset and length.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+ const int64_t offset,
+ const int64_t length) {
+ return std::make_shared<Buffer>(buffer, offset, length);
+}
+
+/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+ const int64_t offset) {
+ int64_t length = buffer->size() - offset;
+ return SliceBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+ int64_t offset);
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+ int64_t offset, int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+ARROW_EXPORT
+std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+ const int64_t offset, const int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+static inline std::shared_ptr<Buffer> SliceMutableBuffer(
+ const std::shared_ptr<Buffer>& buffer, const int64_t offset) {
+ int64_t length = buffer->size() - offset;
+ return SliceMutableBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset);
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
+/// @}
+
+/// \class MutableBuffer
+/// \brief A Buffer whose contents can be mutated. May or may not own its data.
+class ARROW_EXPORT MutableBuffer : public Buffer {
+ public:
+ MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
+ is_mutable_ = true;
+ }
+
+ MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
+ : Buffer(data, size, std::move(mm)) {
+ is_mutable_ = true;
+ }
+
+ MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
+ const int64_t size);
+
+ /// \brief Create buffer referencing typed memory with some length
+ /// \param[in] data the typed memory as C array
+ /// \param[in] length the number of values in the array
+ /// \return a new shared_ptr<Buffer>
+ template <typename T, typename SizeType = int64_t>
+ static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
+ return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
+ static_cast<int64_t>(sizeof(T) * length));
+ }
+
+ protected:
+ MutableBuffer() : Buffer(NULLPTR, 0) {}
+};
+
+/// \class ResizableBuffer
+/// \brief A mutable buffer that can be resized
+class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
+ public:
+ /// Change buffer reported size to indicated size, allocating memory if
+ /// necessary. This will ensure that the capacity of the buffer is a multiple
+ /// of 64 bytes as defined in Layout.md.
+ /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
+ /// specification.
+ ///
+ /// @param new_size The new size for the buffer.
+ /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
+ virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
+
+ /// Ensure that buffer has enough memory allocated to fit the indicated
+ /// capacity (and meets the 64 byte padding requirement in Layout.md).
+ /// It does not change buffer's reported size and doesn't zero the padding.
+ virtual Status Reserve(const int64_t new_capacity) = 0;
+
+ template <class T>
+ Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
+ return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
+ }
+
+ template <class T>
+ Status TypedReserve(const int64_t new_nb_elements) {
+ return Reserve(sizeof(T) * new_nb_elements);
+ }
+
+ protected:
+ ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
+ ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm)
+ : MutableBuffer(data, size, std::move(mm)) {}
+};
+
+/// \defgroup buffer-allocation-functions Functions for allocating buffers
+///
+/// @{
+
+/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size,
+ MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(
+ const int64_t size, MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a bitmap buffer from a memory pool
+/// no guarantee on values is provided.
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length,
+ MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
+
+/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length,
+ MemoryPool* pool = NULLPTR);
+
+/// \brief Concatenate multiple buffers into a single buffer
+///
+/// \param[in] buffers to be concatenated
+/// \param[in] pool memory pool to allocate the new buffer from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> ConcatenateBuffers(const BufferVector& buffers,
+ MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status ConcatenateBuffers(const BufferVector& buffers, MemoryPool* pool,
+ std::shared_ptr<Buffer>* out);
+
+/// @}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
index c6250ae2b76..6d2f8030d4b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
@@ -1,164 +1,164 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_generate.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/ubsan.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// Buffer builder classes
-
-/// \class BufferBuilder
-/// \brief A class for incrementally building a contiguous chunk of in-memory
-/// data
-class ARROW_EXPORT BufferBuilder {
- public:
- explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
- : pool_(pool),
- data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
- &util::internal::non_null_filler),
-
- capacity_(0),
- size_(0) {}
-
- /// \brief Constructs new Builder that will start using
- /// the provided buffer until Finish/Reset are called.
- /// The buffer is not resized.
- explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
- MemoryPool* pool = default_memory_pool())
- : buffer_(std::move(buffer)),
- pool_(pool),
- data_(buffer_->mutable_data()),
- capacity_(buffer_->capacity()),
- size_(buffer_->size()) {}
-
- /// \brief Resize the buffer to the nearest multiple of 64 bytes
- ///
- /// \param new_capacity the new capacity of the of the builder. Will be
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_generate.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer builder classes
+
+/// \class BufferBuilder
+/// \brief A class for incrementally building a contiguous chunk of in-memory
+/// data
+class ARROW_EXPORT BufferBuilder {
+ public:
+ explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
+ : pool_(pool),
+ data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
+ &util::internal::non_null_filler),
+
+ capacity_(0),
+ size_(0) {}
+
+ /// \brief Constructs new Builder that will start using
+ /// the provided buffer until Finish/Reset are called.
+ /// The buffer is not resized.
+ explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+ MemoryPool* pool = default_memory_pool())
+ : buffer_(std::move(buffer)),
+ pool_(pool),
+ data_(buffer_->mutable_data()),
+ capacity_(buffer_->capacity()),
+ size_(buffer_->size()) {}
+
+ /// \brief Resize the buffer to the nearest multiple of 64 bytes
+ ///
+ /// \param new_capacity the new capacity of the of the builder. Will be
/// rounded up to a multiple of 64 bytes for padding
/// \param shrink_to_fit if new capacity is smaller than the existing,
/// reallocate internal buffer. Set to false to avoid reallocations when
/// shrinking the builder.
- /// \return Status
- Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
- if (buffer_ == NULLPTR) {
- ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
- } else {
- ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
- }
- capacity_ = buffer_->capacity();
- data_ = buffer_->mutable_data();
- return Status::OK();
- }
-
- /// \brief Ensure that builder can accommodate the additional number of bytes
- /// without the need to perform allocations
- ///
- /// \param[in] additional_bytes number of additional bytes to make space for
- /// \return Status
- Status Reserve(const int64_t additional_bytes) {
- auto min_capacity = size_ + additional_bytes;
- if (min_capacity <= capacity_) {
- return Status::OK();
- }
- return Resize(GrowByFactor(capacity_, min_capacity), false);
- }
-
- /// \brief Return a capacity expanded by the desired growth factor
- static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
- // Doubling capacity except for large Reserve requests. 2x growth strategy
- // (versus 1.5x) seems to have slightly better performance when using
- // jemalloc, but significantly better performance when using the system
- // allocator. See ARROW-6450 for further discussion
- return std::max(new_capacity, current_capacity * 2);
- }
-
- /// \brief Append the given data to the buffer
- ///
- /// The buffer is automatically expanded if necessary.
- Status Append(const void* data, const int64_t length) {
- if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
- ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
- }
- UnsafeAppend(data, length);
- return Status::OK();
- }
-
- /// \brief Append copies of a value to the buffer
- ///
- /// The buffer is automatically expanded if necessary.
- Status Append(const int64_t num_copies, uint8_t value) {
- ARROW_RETURN_NOT_OK(Reserve(num_copies));
- UnsafeAppend(num_copies, value);
- return Status::OK();
- }
-
- // Advance pointer and zero out memory
- Status Advance(const int64_t length) { return Append(length, 0); }
-
- // Advance pointer, but don't allocate or zero memory
- void UnsafeAdvance(const int64_t length) { size_ += length; }
-
- // Unsafe methods don't check existing size
- void UnsafeAppend(const void* data, const int64_t length) {
- memcpy(data_ + size_, data, static_cast<size_t>(length));
- size_ += length;
- }
-
- void UnsafeAppend(const int64_t num_copies, uint8_t value) {
- memset(data_ + size_, value, static_cast<size_t>(num_copies));
- size_ += num_copies;
- }
-
- /// \brief Return result of builder as a Buffer object.
- ///
- /// The builder is reset and can be reused afterwards.
- ///
- /// \param[out] out the finalized Buffer object
- /// \param shrink_to_fit if the buffer size is smaller than its capacity,
- /// reallocate to fit more tightly in memory. Set to false to avoid
- /// a reallocation, at the expense of potentially more memory consumption.
- /// \return Status
- Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
- ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
- if (size_ != 0) buffer_->ZeroPadding();
- *out = buffer_;
- if (*out == NULLPTR) {
- ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, pool_));
- }
- Reset();
- return Status::OK();
- }
-
+ /// \return Status
+ Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+ if (buffer_ == NULLPTR) {
+ ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
+ } else {
+ ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
+ }
+ capacity_ = buffer_->capacity();
+ data_ = buffer_->mutable_data();
+ return Status::OK();
+ }
+
+ /// \brief Ensure that builder can accommodate the additional number of bytes
+ /// without the need to perform allocations
+ ///
+ /// \param[in] additional_bytes number of additional bytes to make space for
+ /// \return Status
+ Status Reserve(const int64_t additional_bytes) {
+ auto min_capacity = size_ + additional_bytes;
+ if (min_capacity <= capacity_) {
+ return Status::OK();
+ }
+ return Resize(GrowByFactor(capacity_, min_capacity), false);
+ }
+
+ /// \brief Return a capacity expanded by the desired growth factor
+ static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+ // Doubling capacity except for large Reserve requests. 2x growth strategy
+ // (versus 1.5x) seems to have slightly better performance when using
+ // jemalloc, but significantly better performance when using the system
+ // allocator. See ARROW-6450 for further discussion
+ return std::max(new_capacity, current_capacity * 2);
+ }
+
+ /// \brief Append the given data to the buffer
+ ///
+ /// The buffer is automatically expanded if necessary.
+ Status Append(const void* data, const int64_t length) {
+ if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
+ ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
+ }
+ UnsafeAppend(data, length);
+ return Status::OK();
+ }
+
+ /// \brief Append copies of a value to the buffer
+ ///
+ /// The buffer is automatically expanded if necessary.
+ Status Append(const int64_t num_copies, uint8_t value) {
+ ARROW_RETURN_NOT_OK(Reserve(num_copies));
+ UnsafeAppend(num_copies, value);
+ return Status::OK();
+ }
+
+ // Advance pointer and zero out memory
+ Status Advance(const int64_t length) { return Append(length, 0); }
+
+ // Advance pointer, but don't allocate or zero memory
+ void UnsafeAdvance(const int64_t length) { size_ += length; }
+
+ // Unsafe methods don't check existing size
+ void UnsafeAppend(const void* data, const int64_t length) {
+ memcpy(data_ + size_, data, static_cast<size_t>(length));
+ size_ += length;
+ }
+
+ void UnsafeAppend(const int64_t num_copies, uint8_t value) {
+ memset(data_ + size_, value, static_cast<size_t>(num_copies));
+ size_ += num_copies;
+ }
+
+ /// \brief Return result of builder as a Buffer object.
+ ///
+ /// The builder is reset and can be reused afterwards.
+ ///
+ /// \param[out] out the finalized Buffer object
+ /// \param shrink_to_fit if the buffer size is smaller than its capacity,
+ /// reallocate to fit more tightly in memory. Set to false to avoid
+ /// a reallocation, at the expense of potentially more memory consumption.
+ /// \return Status
+ Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+ ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
+ if (size_ != 0) buffer_->ZeroPadding();
+ *out = buffer_;
+ if (*out == NULLPTR) {
+ ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, pool_));
+ }
+ Reset();
+ return Status::OK();
+ }
+
Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
std::shared_ptr<Buffer> out;
ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
@@ -176,105 +176,105 @@ class ARROW_EXPORT BufferBuilder {
return Finish(shrink_to_fit);
}
- void Reset() {
- buffer_ = NULLPTR;
- capacity_ = size_ = 0;
- }
-
- /// \brief Set size to a smaller value without modifying builder
- /// contents. For reusable BufferBuilder classes
- /// \param[in] position must be non-negative and less than or equal
- /// to the current length()
- void Rewind(int64_t position) { size_ = position; }
-
- int64_t capacity() const { return capacity_; }
- int64_t length() const { return size_; }
- const uint8_t* data() const { return data_; }
- uint8_t* mutable_data() { return data_; }
-
- private:
- std::shared_ptr<ResizableBuffer> buffer_;
- MemoryPool* pool_;
- uint8_t* data_;
- int64_t capacity_;
- int64_t size_;
-};
-
-template <typename T, typename Enable = void>
-class TypedBufferBuilder;
-
-/// \brief A BufferBuilder for building a buffer of arithmetic elements
-template <typename T>
-class TypedBufferBuilder<
- T, typename std::enable_if<std::is_arithmetic<T>::value ||
- std::is_standard_layout<T>::value>::type> {
- public:
- explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
- : bytes_builder_(pool) {}
-
- explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
- MemoryPool* pool = default_memory_pool())
- : bytes_builder_(std::move(buffer), pool) {}
-
+ void Reset() {
+ buffer_ = NULLPTR;
+ capacity_ = size_ = 0;
+ }
+
+ /// \brief Set size to a smaller value without modifying builder
+ /// contents. For reusable BufferBuilder classes
+ /// \param[in] position must be non-negative and less than or equal
+ /// to the current length()
+ void Rewind(int64_t position) { size_ = position; }
+
+ int64_t capacity() const { return capacity_; }
+ int64_t length() const { return size_; }
+ const uint8_t* data() const { return data_; }
+ uint8_t* mutable_data() { return data_; }
+
+ private:
+ std::shared_ptr<ResizableBuffer> buffer_;
+ MemoryPool* pool_;
+ uint8_t* data_;
+ int64_t capacity_;
+ int64_t size_;
+};
+
+template <typename T, typename Enable = void>
+class TypedBufferBuilder;
+
+/// \brief A BufferBuilder for building a buffer of arithmetic elements
+template <typename T>
+class TypedBufferBuilder<
+ T, typename std::enable_if<std::is_arithmetic<T>::value ||
+ std::is_standard_layout<T>::value>::type> {
+ public:
+ explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+ : bytes_builder_(pool) {}
+
+ explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+ MemoryPool* pool = default_memory_pool())
+ : bytes_builder_(std::move(buffer), pool) {}
+
explicit TypedBufferBuilder(BufferBuilder builder)
: bytes_builder_(std::move(builder)) {}
BufferBuilder* bytes_builder() { return &bytes_builder_; }
- Status Append(T value) {
- return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
- }
-
- Status Append(const T* values, int64_t num_elements) {
- return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
- num_elements * sizeof(T));
- }
-
- Status Append(const int64_t num_copies, T value) {
- ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
- UnsafeAppend(num_copies, value);
- return Status::OK();
- }
-
- void UnsafeAppend(T value) {
- bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
- }
-
- void UnsafeAppend(const T* values, int64_t num_elements) {
- bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
- num_elements * sizeof(T));
- }
-
- template <typename Iter>
- void UnsafeAppend(Iter values_begin, Iter values_end) {
- int64_t num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
- auto data = mutable_data() + length();
- bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
- std::copy(values_begin, values_end, data);
- }
-
- void UnsafeAppend(const int64_t num_copies, T value) {
- auto data = mutable_data() + length();
- bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
- std::fill(data, data + num_copies, value);
- }
-
- Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
- return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
- }
-
- Status Reserve(const int64_t additional_elements) {
- return bytes_builder_.Reserve(additional_elements * sizeof(T));
- }
-
- Status Advance(const int64_t length) {
- return bytes_builder_.Advance(length * sizeof(T));
- }
-
- Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
- return bytes_builder_.Finish(out, shrink_to_fit);
- }
-
+ Status Append(T value) {
+ return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+ }
+
+ Status Append(const T* values, int64_t num_elements) {
+ return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
+ num_elements * sizeof(T));
+ }
+
+ Status Append(const int64_t num_copies, T value) {
+ ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
+ UnsafeAppend(num_copies, value);
+ return Status::OK();
+ }
+
+ void UnsafeAppend(T value) {
+ bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+ }
+
+ void UnsafeAppend(const T* values, int64_t num_elements) {
+ bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
+ num_elements * sizeof(T));
+ }
+
+ template <typename Iter>
+ void UnsafeAppend(Iter values_begin, Iter values_end) {
+ int64_t num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
+ auto data = mutable_data() + length();
+ bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
+ std::copy(values_begin, values_end, data);
+ }
+
+ void UnsafeAppend(const int64_t num_copies, T value) {
+ auto data = mutable_data() + length();
+ bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
+ std::fill(data, data + num_copies, value);
+ }
+
+ Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+ return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
+ }
+
+ Status Reserve(const int64_t additional_elements) {
+ return bytes_builder_.Reserve(additional_elements * sizeof(T));
+ }
+
+ Status Advance(const int64_t length) {
+ return bytes_builder_.Advance(length * sizeof(T));
+ }
+
+ Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+ return bytes_builder_.Finish(out, shrink_to_fit);
+ }
+
Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
std::shared_ptr<Buffer> out;
ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
@@ -291,126 +291,126 @@ class TypedBufferBuilder<
return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
}
- void Reset() { bytes_builder_.Reset(); }
-
- int64_t length() const { return bytes_builder_.length() / sizeof(T); }
- int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
- const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
- T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
-
- private:
- BufferBuilder bytes_builder_;
-};
-
-/// \brief A BufferBuilder for building a buffer containing a bitmap
-template <>
-class TypedBufferBuilder<bool> {
- public:
- explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
- : bytes_builder_(pool) {}
-
+ void Reset() { bytes_builder_.Reset(); }
+
+ int64_t length() const { return bytes_builder_.length() / sizeof(T); }
+ int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
+ const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
+ T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
+
+ private:
+ BufferBuilder bytes_builder_;
+};
+
+/// \brief A BufferBuilder for building a buffer containing a bitmap
+template <>
+class TypedBufferBuilder<bool> {
+ public:
+ explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+ : bytes_builder_(pool) {}
+
explicit TypedBufferBuilder(BufferBuilder builder)
: bytes_builder_(std::move(builder)) {}
BufferBuilder* bytes_builder() { return &bytes_builder_; }
- Status Append(bool value) {
- ARROW_RETURN_NOT_OK(Reserve(1));
- UnsafeAppend(value);
- return Status::OK();
- }
-
- Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
- ARROW_RETURN_NOT_OK(Reserve(num_elements));
- UnsafeAppend(valid_bytes, num_elements);
- return Status::OK();
- }
-
- Status Append(const int64_t num_copies, bool value) {
- ARROW_RETURN_NOT_OK(Reserve(num_copies));
- UnsafeAppend(num_copies, value);
- return Status::OK();
- }
-
- void UnsafeAppend(bool value) {
- BitUtil::SetBitTo(mutable_data(), bit_length_, value);
- if (!value) {
- ++false_count_;
- }
- ++bit_length_;
- }
-
- void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
- if (num_elements == 0) return;
- int64_t i = 0;
- internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
- bool value = bytes[i++];
- false_count_ += !value;
- return value;
- });
- bit_length_ += num_elements;
- }
-
- void UnsafeAppend(const int64_t num_copies, bool value) {
- BitUtil::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
- false_count_ += num_copies * !value;
- bit_length_ += num_copies;
- }
-
- template <bool count_falses, typename Generator>
- void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
- if (num_elements == 0) return;
-
- if (count_falses) {
- internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
- bool value = gen();
- false_count_ += !value;
- return value;
- });
- } else {
- internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
- std::forward<Generator>(gen));
- }
- bit_length_ += num_elements;
- }
-
- Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
- const int64_t old_byte_capacity = bytes_builder_.capacity();
- ARROW_RETURN_NOT_OK(
- bytes_builder_.Resize(BitUtil::BytesForBits(new_capacity), shrink_to_fit));
- // Resize() may have chosen a larger capacity (e.g. for padding),
- // so ask it again before calling memset().
- const int64_t new_byte_capacity = bytes_builder_.capacity();
- if (new_byte_capacity > old_byte_capacity) {
- // The additional buffer space is 0-initialized for convenience,
- // so that other methods can simply bump the length.
- memset(mutable_data() + old_byte_capacity, 0,
- static_cast<size_t>(new_byte_capacity - old_byte_capacity));
- }
- return Status::OK();
- }
-
- Status Reserve(const int64_t additional_elements) {
- return Resize(
- BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
- false);
- }
-
- Status Advance(const int64_t length) {
- ARROW_RETURN_NOT_OK(Reserve(length));
- bit_length_ += length;
- false_count_ += length;
- return Status::OK();
- }
-
- Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
- // set bytes_builder_.size_ == byte size of data
- bytes_builder_.UnsafeAdvance(BitUtil::BytesForBits(bit_length_) -
- bytes_builder_.length());
- bit_length_ = false_count_ = 0;
- return bytes_builder_.Finish(out, shrink_to_fit);
- }
-
+ Status Append(bool value) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(value);
+ return Status::OK();
+ }
+
+ Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
+ ARROW_RETURN_NOT_OK(Reserve(num_elements));
+ UnsafeAppend(valid_bytes, num_elements);
+ return Status::OK();
+ }
+
+ Status Append(const int64_t num_copies, bool value) {
+ ARROW_RETURN_NOT_OK(Reserve(num_copies));
+ UnsafeAppend(num_copies, value);
+ return Status::OK();
+ }
+
+ void UnsafeAppend(bool value) {
+ BitUtil::SetBitTo(mutable_data(), bit_length_, value);
+ if (!value) {
+ ++false_count_;
+ }
+ ++bit_length_;
+ }
+
+ void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
+ if (num_elements == 0) return;
+ int64_t i = 0;
+ internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+ bool value = bytes[i++];
+ false_count_ += !value;
+ return value;
+ });
+ bit_length_ += num_elements;
+ }
+
+ void UnsafeAppend(const int64_t num_copies, bool value) {
+ BitUtil::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
+ false_count_ += num_copies * !value;
+ bit_length_ += num_copies;
+ }
+
+ template <bool count_falses, typename Generator>
+ void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
+ if (num_elements == 0) return;
+
+ if (count_falses) {
+ internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+ bool value = gen();
+ false_count_ += !value;
+ return value;
+ });
+ } else {
+ internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
+ std::forward<Generator>(gen));
+ }
+ bit_length_ += num_elements;
+ }
+
+ Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+ const int64_t old_byte_capacity = bytes_builder_.capacity();
+ ARROW_RETURN_NOT_OK(
+ bytes_builder_.Resize(BitUtil::BytesForBits(new_capacity), shrink_to_fit));
+ // Resize() may have chosen a larger capacity (e.g. for padding),
+ // so ask it again before calling memset().
+ const int64_t new_byte_capacity = bytes_builder_.capacity();
+ if (new_byte_capacity > old_byte_capacity) {
+ // The additional buffer space is 0-initialized for convenience,
+ // so that other methods can simply bump the length.
+ memset(mutable_data() + old_byte_capacity, 0,
+ static_cast<size_t>(new_byte_capacity - old_byte_capacity));
+ }
+ return Status::OK();
+ }
+
+ Status Reserve(const int64_t additional_elements) {
+ return Resize(
+ BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
+ false);
+ }
+
+ Status Advance(const int64_t length) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ bit_length_ += length;
+ false_count_ += length;
+ return Status::OK();
+ }
+
+ Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+ // set bytes_builder_.size_ == byte size of data
+ bytes_builder_.UnsafeAdvance(BitUtil::BytesForBits(bit_length_) -
+ bytes_builder_.length());
+ bit_length_ = false_count_ = 0;
+ return bytes_builder_.Finish(out, shrink_to_fit);
+ }
+
Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
std::shared_ptr<Buffer> out;
ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
@@ -430,21 +430,21 @@ class TypedBufferBuilder<bool> {
return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
}
- void Reset() {
- bytes_builder_.Reset();
- bit_length_ = false_count_ = 0;
- }
-
- int64_t length() const { return bit_length_; }
- int64_t capacity() const { return bytes_builder_.capacity() * 8; }
- const uint8_t* data() const { return bytes_builder_.data(); }
- uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
- int64_t false_count() const { return false_count_; }
-
- private:
- BufferBuilder bytes_builder_;
- int64_t bit_length_ = 0;
- int64_t false_count_ = 0;
-};
-
-} // namespace arrow
+ void Reset() {
+ bytes_builder_.Reset();
+ bit_length_ = false_count_ = 0;
+ }
+
+ int64_t length() const { return bit_length_; }
+ int64_t capacity() const { return bytes_builder_.capacity() * 8; }
+ const uint8_t* data() const { return bytes_builder_.data(); }
+ uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
+ int64_t false_count() const { return false_count_; }
+
+ private:
+ BufferBuilder bytes_builder_;
+ int64_t bit_length_ = 0;
+ int64_t false_count_ = 0;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
index f22228a4588..0b5fd273571 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
@@ -1,222 +1,222 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/builder.h"
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-// ----------------------------------------------------------------------
-// Helper functions
-
-struct DictionaryBuilderCase {
- template <typename ValueType, typename Enable = typename ValueType::c_type>
- Status Visit(const ValueType&) {
- return CreateFor<ValueType>();
- }
-
- Status Visit(const NullType&) { return CreateFor<NullType>(); }
- Status Visit(const BinaryType&) { return Create<BinaryDictionaryBuilder>(); }
- Status Visit(const StringType&) { return Create<StringDictionaryBuilder>(); }
- Status Visit(const LargeBinaryType&) {
- return Create<DictionaryBuilder<LargeBinaryType>>();
- }
- Status Visit(const LargeStringType&) {
- return Create<DictionaryBuilder<LargeStringType>>();
- }
- Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
- Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/builder.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+// ----------------------------------------------------------------------
+// Helper functions
+
+struct DictionaryBuilderCase {
+ template <typename ValueType, typename Enable = typename ValueType::c_type>
+ Status Visit(const ValueType&) {
+ return CreateFor<ValueType>();
+ }
+
+ Status Visit(const NullType&) { return CreateFor<NullType>(); }
+ Status Visit(const BinaryType&) { return Create<BinaryDictionaryBuilder>(); }
+ Status Visit(const StringType&) { return Create<StringDictionaryBuilder>(); }
+ Status Visit(const LargeBinaryType&) {
+ return Create<DictionaryBuilder<LargeBinaryType>>();
+ }
+ Status Visit(const LargeStringType&) {
+ return Create<DictionaryBuilder<LargeStringType>>();
+ }
+ Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
+ Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
Status Visit(const Decimal256Type&) { return CreateFor<Decimal256Type>(); }
-
- Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
- Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
- Status NotImplemented(const DataType& value_type) {
- return Status::NotImplemented(
- "MakeBuilder: cannot construct builder for dictionaries with value type ",
- value_type);
- }
-
- template <typename ValueType>
- Status CreateFor() {
- return Create<DictionaryBuilder<ValueType>>();
- }
-
- template <typename BuilderType>
- Status Create() {
- BuilderType* builder;
- if (dictionary != nullptr) {
- builder = new BuilderType(dictionary, pool);
- } else {
- auto start_int_size = internal::GetByteWidth(*index_type);
- builder = new BuilderType(start_int_size, value_type, pool);
- }
- out->reset(builder);
- return Status::OK();
- }
-
- Status Make() { return VisitTypeInline(*value_type, this); }
-
- MemoryPool* pool;
- const std::shared_ptr<DataType>& index_type;
- const std::shared_ptr<DataType>& value_type;
- const std::shared_ptr<Array>& dictionary;
- std::unique_ptr<ArrayBuilder>* out;
-};
-
-#define BUILDER_CASE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
- out->reset(new TYPE_CLASS##Builder(type, pool)); \
- return Status::OK();
-
-Result<std::vector<std::shared_ptr<ArrayBuilder>>> FieldBuilders(const DataType& type,
- MemoryPool* pool) {
- std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
-
- for (const auto& field : type.fields()) {
- std::unique_ptr<ArrayBuilder> builder;
- RETURN_NOT_OK(MakeBuilder(pool, field->type(), &builder));
- field_builders.emplace_back(std::move(builder));
- }
-
- return field_builders;
-}
-
-Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
- std::unique_ptr<ArrayBuilder>* out) {
- switch (type->id()) {
- case Type::NA: {
- out->reset(new NullBuilder(pool));
- return Status::OK();
- }
- BUILDER_CASE(UInt8);
- BUILDER_CASE(Int8);
- BUILDER_CASE(UInt16);
- BUILDER_CASE(Int16);
- BUILDER_CASE(UInt32);
- BUILDER_CASE(Int32);
- BUILDER_CASE(UInt64);
- BUILDER_CASE(Int64);
- BUILDER_CASE(Date32);
- BUILDER_CASE(Date64);
- BUILDER_CASE(Duration);
- BUILDER_CASE(Time32);
- BUILDER_CASE(Time64);
- BUILDER_CASE(Timestamp);
- BUILDER_CASE(MonthInterval);
- BUILDER_CASE(DayTimeInterval);
- BUILDER_CASE(Boolean);
- BUILDER_CASE(HalfFloat);
- BUILDER_CASE(Float);
- BUILDER_CASE(Double);
- BUILDER_CASE(String);
- BUILDER_CASE(Binary);
- BUILDER_CASE(LargeString);
- BUILDER_CASE(LargeBinary);
- BUILDER_CASE(FixedSizeBinary);
- BUILDER_CASE(Decimal128);
+
+ Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
+ Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
+ Status NotImplemented(const DataType& value_type) {
+ return Status::NotImplemented(
+ "MakeBuilder: cannot construct builder for dictionaries with value type ",
+ value_type);
+ }
+
+ template <typename ValueType>
+ Status CreateFor() {
+ return Create<DictionaryBuilder<ValueType>>();
+ }
+
+ template <typename BuilderType>
+ Status Create() {
+ BuilderType* builder;
+ if (dictionary != nullptr) {
+ builder = new BuilderType(dictionary, pool);
+ } else {
+ auto start_int_size = internal::GetByteWidth(*index_type);
+ builder = new BuilderType(start_int_size, value_type, pool);
+ }
+ out->reset(builder);
+ return Status::OK();
+ }
+
+ Status Make() { return VisitTypeInline(*value_type, this); }
+
+ MemoryPool* pool;
+ const std::shared_ptr<DataType>& index_type;
+ const std::shared_ptr<DataType>& value_type;
+ const std::shared_ptr<Array>& dictionary;
+ std::unique_ptr<ArrayBuilder>* out;
+};
+
+#define BUILDER_CASE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: \
+ out->reset(new TYPE_CLASS##Builder(type, pool)); \
+ return Status::OK();
+
+Result<std::vector<std::shared_ptr<ArrayBuilder>>> FieldBuilders(const DataType& type,
+ MemoryPool* pool) {
+ std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
+
+ for (const auto& field : type.fields()) {
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(pool, field->type(), &builder));
+ field_builders.emplace_back(std::move(builder));
+ }
+
+ return field_builders;
+}
+
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+ std::unique_ptr<ArrayBuilder>* out) {
+ switch (type->id()) {
+ case Type::NA: {
+ out->reset(new NullBuilder(pool));
+ return Status::OK();
+ }
+ BUILDER_CASE(UInt8);
+ BUILDER_CASE(Int8);
+ BUILDER_CASE(UInt16);
+ BUILDER_CASE(Int16);
+ BUILDER_CASE(UInt32);
+ BUILDER_CASE(Int32);
+ BUILDER_CASE(UInt64);
+ BUILDER_CASE(Int64);
+ BUILDER_CASE(Date32);
+ BUILDER_CASE(Date64);
+ BUILDER_CASE(Duration);
+ BUILDER_CASE(Time32);
+ BUILDER_CASE(Time64);
+ BUILDER_CASE(Timestamp);
+ BUILDER_CASE(MonthInterval);
+ BUILDER_CASE(DayTimeInterval);
+ BUILDER_CASE(Boolean);
+ BUILDER_CASE(HalfFloat);
+ BUILDER_CASE(Float);
+ BUILDER_CASE(Double);
+ BUILDER_CASE(String);
+ BUILDER_CASE(Binary);
+ BUILDER_CASE(LargeString);
+ BUILDER_CASE(LargeBinary);
+ BUILDER_CASE(FixedSizeBinary);
+ BUILDER_CASE(Decimal128);
BUILDER_CASE(Decimal256);
-
- case Type::DICTIONARY: {
- const auto& dict_type = static_cast<const DictionaryType&>(*type);
- DictionaryBuilderCase visitor = {pool, dict_type.index_type(),
- dict_type.value_type(), nullptr, out};
- return visitor.Make();
- }
-
- case Type::LIST: {
- std::unique_ptr<ArrayBuilder> value_builder;
- std::shared_ptr<DataType> value_type =
- internal::checked_cast<const ListType&>(*type).value_type();
- RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
- out->reset(new ListBuilder(pool, std::move(value_builder), type));
- return Status::OK();
- }
-
- case Type::LARGE_LIST: {
- std::unique_ptr<ArrayBuilder> value_builder;
- std::shared_ptr<DataType> value_type =
- internal::checked_cast<const LargeListType&>(*type).value_type();
- RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
- out->reset(new LargeListBuilder(pool, std::move(value_builder), type));
- return Status::OK();
- }
-
- case Type::MAP: {
- const auto& map_type = internal::checked_cast<const MapType&>(*type);
- std::unique_ptr<ArrayBuilder> key_builder, item_builder;
- RETURN_NOT_OK(MakeBuilder(pool, map_type.key_type(), &key_builder));
- RETURN_NOT_OK(MakeBuilder(pool, map_type.item_type(), &item_builder));
- out->reset(
- new MapBuilder(pool, std::move(key_builder), std::move(item_builder), type));
- return Status::OK();
- }
-
- case Type::FIXED_SIZE_LIST: {
- const auto& list_type = internal::checked_cast<const FixedSizeListType&>(*type);
- std::unique_ptr<ArrayBuilder> value_builder;
- auto value_type = list_type.value_type();
- RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
- out->reset(new FixedSizeListBuilder(pool, std::move(value_builder), type));
- return Status::OK();
- }
-
- case Type::STRUCT: {
- ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
- out->reset(new StructBuilder(type, pool, std::move(field_builders)));
- return Status::OK();
- }
-
- case Type::SPARSE_UNION: {
- ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
- out->reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
- return Status::OK();
- }
-
- case Type::DENSE_UNION: {
- ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
- out->reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
- return Status::OK();
- }
-
- default:
- break;
- }
- return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
- type->ToString());
-}
-
-Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& dictionary,
- std::unique_ptr<ArrayBuilder>* out) {
- const auto& dict_type = static_cast<const DictionaryType&>(*type);
- DictionaryBuilderCase visitor = {pool, dict_type.index_type(), dict_type.value_type(),
- dictionary, out};
- return visitor.Make();
-}
-
-} // namespace arrow
+
+ case Type::DICTIONARY: {
+ const auto& dict_type = static_cast<const DictionaryType&>(*type);
+ DictionaryBuilderCase visitor = {pool, dict_type.index_type(),
+ dict_type.value_type(), nullptr, out};
+ return visitor.Make();
+ }
+
+ case Type::LIST: {
+ std::unique_ptr<ArrayBuilder> value_builder;
+ std::shared_ptr<DataType> value_type =
+ internal::checked_cast<const ListType&>(*type).value_type();
+ RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
+ out->reset(new ListBuilder(pool, std::move(value_builder), type));
+ return Status::OK();
+ }
+
+ case Type::LARGE_LIST: {
+ std::unique_ptr<ArrayBuilder> value_builder;
+ std::shared_ptr<DataType> value_type =
+ internal::checked_cast<const LargeListType&>(*type).value_type();
+ RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
+ out->reset(new LargeListBuilder(pool, std::move(value_builder), type));
+ return Status::OK();
+ }
+
+ case Type::MAP: {
+ const auto& map_type = internal::checked_cast<const MapType&>(*type);
+ std::unique_ptr<ArrayBuilder> key_builder, item_builder;
+ RETURN_NOT_OK(MakeBuilder(pool, map_type.key_type(), &key_builder));
+ RETURN_NOT_OK(MakeBuilder(pool, map_type.item_type(), &item_builder));
+ out->reset(
+ new MapBuilder(pool, std::move(key_builder), std::move(item_builder), type));
+ return Status::OK();
+ }
+
+ case Type::FIXED_SIZE_LIST: {
+ const auto& list_type = internal::checked_cast<const FixedSizeListType&>(*type);
+ std::unique_ptr<ArrayBuilder> value_builder;
+ auto value_type = list_type.value_type();
+ RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
+ out->reset(new FixedSizeListBuilder(pool, std::move(value_builder), type));
+ return Status::OK();
+ }
+
+ case Type::STRUCT: {
+ ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
+ out->reset(new StructBuilder(type, pool, std::move(field_builders)));
+ return Status::OK();
+ }
+
+ case Type::SPARSE_UNION: {
+ ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
+ out->reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
+ return Status::OK();
+ }
+
+ case Type::DENSE_UNION: {
+ ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
+ out->reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
+ return Status::OK();
+ }
+
+ default:
+ break;
+ }
+ return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
+ type->ToString());
+}
+
+Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& dictionary,
+ std::unique_ptr<ArrayBuilder>* out) {
+ const auto& dict_type = static_cast<const DictionaryType&>(*type);
+ DictionaryBuilderCase visitor = {pool, dict_type.index_type(), dict_type.value_type(),
+ dictionary, out};
+ return visitor.Make();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/builder.h
index 4b80e558004..c1d1a049d4e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/builder.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/builder.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/array/builder_adaptive.h" // IWYU pragma: keep
-#include "arrow/array/builder_base.h" // IWYU pragma: keep
-#include "arrow/array/builder_binary.h" // IWYU pragma: keep
-#include "arrow/array/builder_decimal.h" // IWYU pragma: keep
-#include "arrow/array/builder_dict.h" // IWYU pragma: keep
-#include "arrow/array/builder_nested.h" // IWYU pragma: keep
-#include "arrow/array/builder_primitive.h" // IWYU pragma: keep
-#include "arrow/array/builder_time.h" // IWYU pragma: keep
-#include "arrow/array/builder_union.h" // IWYU pragma: keep
-#include "arrow/status.h"
-#include "arrow/util/visibility.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_adaptive.h" // IWYU pragma: keep
+#include "arrow/array/builder_base.h" // IWYU pragma: keep
+#include "arrow/array/builder_binary.h" // IWYU pragma: keep
+#include "arrow/array/builder_decimal.h" // IWYU pragma: keep
+#include "arrow/array/builder_dict.h" // IWYU pragma: keep
+#include "arrow/array/builder_nested.h" // IWYU pragma: keep
+#include "arrow/array/builder_primitive.h" // IWYU pragma: keep
+#include "arrow/array/builder_time.h" // IWYU pragma: keep
+#include "arrow/array/builder_union.h" // IWYU pragma: keep
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/abi.h b/contrib/libs/apache/arrow/cpp/src/arrow/c/abi.h
index a78170dbdbc..cd4d2bb45e6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/abi.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/abi.h
@@ -1,103 +1,103 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ARROW_FLAG_DICTIONARY_ORDERED 1
-#define ARROW_FLAG_NULLABLE 2
-#define ARROW_FLAG_MAP_KEYS_SORTED 4
-
-struct ArrowSchema {
- // Array type description
- const char* format;
- const char* name;
- const char* metadata;
- int64_t flags;
- int64_t n_children;
- struct ArrowSchema** children;
- struct ArrowSchema* dictionary;
-
- // Release callback
- void (*release)(struct ArrowSchema*);
- // Opaque producer-specific data
- void* private_data;
-};
-
-struct ArrowArray {
- // Array data description
- int64_t length;
- int64_t null_count;
- int64_t offset;
- int64_t n_buffers;
- int64_t n_children;
- const void** buffers;
- struct ArrowArray** children;
- struct ArrowArray* dictionary;
-
- // Release callback
- void (*release)(struct ArrowArray*);
- // Opaque producer-specific data
- void* private_data;
-};
-
-// EXPERIMENTAL: C stream interface
-
-struct ArrowArrayStream {
- // Callback to get the stream type
- // (will be the same for all arrays in the stream).
- //
- // Return value: 0 if successful, an `errno`-compatible error code otherwise.
- //
- // If successful, the ArrowSchema must be released independently from the stream.
- int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
-
- // Callback to get the next array
- // (if no error and the array is released, the stream has ended)
- //
- // Return value: 0 if successful, an `errno`-compatible error code otherwise.
- //
- // If successful, the ArrowArray must be released independently from the stream.
- int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
-
- // Callback to get optional detailed error information.
- // This must only be called if the last stream operation failed
- // with a non-0 return code.
- //
- // Return value: pointer to a null-terminated character array describing
- // the last error, or NULL if no description is available.
- //
- // The returned pointer is only valid until the next operation on this stream
- // (including release).
- const char* (*get_last_error)(struct ArrowArrayStream*);
-
- // Release callback: release the stream's own resources.
- // Note that arrays returned by `get_next` must be individually released.
- void (*release)(struct ArrowArrayStream*);
-
- // Opaque producer-specific data
- void* private_data;
-};
-
-#ifdef __cplusplus
-}
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+ // Array type description
+ const char* format;
+ const char* name;
+ const char* metadata;
+ int64_t flags;
+ int64_t n_children;
+ struct ArrowSchema** children;
+ struct ArrowSchema* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowSchema*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+struct ArrowArray {
+ // Array data description
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+ int64_t n_buffers;
+ int64_t n_children;
+ const void** buffers;
+ struct ArrowArray** children;
+ struct ArrowArray* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowArray*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+// EXPERIMENTAL: C stream interface
+
+struct ArrowArrayStream {
+ // Callback to get the stream type
+ // (will be the same for all arrays in the stream).
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowSchema must be released independently from the stream.
+ int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+ // Callback to get the next array
+ // (if no error and the array is released, the stream has ended)
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowArray must be released independently from the stream.
+ int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+ // Callback to get optional detailed error information.
+ // This must only be called if the last stream operation failed
+ // with a non-0 return code.
+ //
+ // Return value: pointer to a null-terminated character array describing
+ // the last error, or NULL if no description is available.
+ //
+ // The returned pointer is only valid until the next operation on this stream
+ // (including release).
+ const char* (*get_last_error)(struct ArrowArrayStream*);
+
+ // Release callback: release the stream's own resources.
+ // Note that arrays returned by `get_next` must be individually released.
+ void (*release)(struct ArrowArrayStream*);
+
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
index a43bf8104f2..5a38f245863 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
@@ -1,309 +1,309 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/c/bridge.h"
-
-#include <algorithm>
-#include <cerrno>
-#include <cstring>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/c/helpers.h"
-#include "arrow/c/util_internal.h"
-#include "arrow/memory_pool.h"
-#include "arrow/record_batch.h"
-#include "arrow/result.h"
-#include "arrow/stl_allocator.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/value_parsing.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-
-using internal::ArrayExportGuard;
-using internal::ArrayExportTraits;
-using internal::SchemaExportGuard;
-using internal::SchemaExportTraits;
-
-// TODO export / import Extension types and arrays
-
-namespace {
-
-Status ExportingNotImplemented(const DataType& type) {
- return Status::NotImplemented("Exporting ", type.ToString(), " array not supported");
-}
-
-// Allocate exported private data using MemoryPool,
-// to allow accounting memory and checking for memory leaks.
-
-// XXX use Gandiva's SimpleArena?
-
-template <typename T>
-using PoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
-
-template <typename Derived>
-struct PoolAllocationMixin {
- static void* operator new(size_t size) {
- DCHECK_EQ(size, sizeof(Derived));
- uint8_t* data;
- ARROW_CHECK_OK(default_memory_pool()->Allocate(static_cast<int64_t>(size), &data));
- return data;
- }
-
- static void operator delete(void* ptr) {
- default_memory_pool()->Free(reinterpret_cast<uint8_t*>(ptr), sizeof(Derived));
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-// C schema export
-
-struct ExportedSchemaPrivateData : PoolAllocationMixin<ExportedSchemaPrivateData> {
- std::string format_;
- std::string name_;
- std::string metadata_;
- struct ArrowSchema dictionary_;
- PoolVector<struct ArrowSchema> children_;
- PoolVector<struct ArrowSchema*> child_pointers_;
-
- ExportedSchemaPrivateData() = default;
- ARROW_DEFAULT_MOVE_AND_ASSIGN(ExportedSchemaPrivateData);
- ARROW_DISALLOW_COPY_AND_ASSIGN(ExportedSchemaPrivateData);
-};
-
-void ReleaseExportedSchema(struct ArrowSchema* schema) {
- if (ArrowSchemaIsReleased(schema)) {
- return;
- }
- for (int64_t i = 0; i < schema->n_children; ++i) {
- struct ArrowSchema* child = schema->children[i];
- ArrowSchemaRelease(child);
- DCHECK(ArrowSchemaIsReleased(child))
- << "Child release callback should have marked it released";
- }
- struct ArrowSchema* dict = schema->dictionary;
- if (dict != nullptr) {
- ArrowSchemaRelease(dict);
- DCHECK(ArrowSchemaIsReleased(dict))
- << "Dictionary release callback should have marked it released";
- }
- DCHECK_NE(schema->private_data, nullptr);
- delete reinterpret_cast<ExportedSchemaPrivateData*>(schema->private_data);
-
- ArrowSchemaMarkReleased(schema);
-}
-
-template <typename SizeType>
-Result<int32_t> DowncastMetadataSize(SizeType size) {
- auto res = static_cast<int32_t>(size);
- if (res < 0 || static_cast<SizeType>(res) != size) {
- return Status::Invalid("Metadata too large (more than 2**31 items or bytes)");
- }
- return res;
-}
-
-Result<std::string> EncodeMetadata(const KeyValueMetadata& metadata) {
- ARROW_ASSIGN_OR_RAISE(auto npairs, DowncastMetadataSize(metadata.size()));
- std::string exported;
-
- // Pre-compute total string size
- size_t total_size = 4;
- for (int32_t i = 0; i < npairs; ++i) {
- total_size += 8 + metadata.key(i).length() + metadata.value(i).length();
- }
- exported.resize(total_size);
-
- char* data_start = &exported[0];
- char* data = data_start;
- auto write_int32 = [&](int32_t v) -> void {
- memcpy(data, &v, 4);
- data += 4;
- };
- auto write_string = [&](const std::string& s) -> Status {
- ARROW_ASSIGN_OR_RAISE(auto len, DowncastMetadataSize(s.length()));
- write_int32(len);
- if (len > 0) {
- memcpy(data, s.data(), len);
- data += len;
- }
- return Status::OK();
- };
-
- write_int32(npairs);
- for (int32_t i = 0; i < npairs; ++i) {
- RETURN_NOT_OK(write_string(metadata.key(i)));
- RETURN_NOT_OK(write_string(metadata.value(i)));
- }
- DCHECK_EQ(static_cast<size_t>(data - data_start), total_size);
- return exported;
-}
-
-struct SchemaExporter {
- Status ExportField(const Field& field) {
- export_.name_ = field.name();
- flags_ = field.nullable() ? ARROW_FLAG_NULLABLE : 0;
-
- const DataType& type = *field.type();
- RETURN_NOT_OK(ExportFormat(type));
- RETURN_NOT_OK(ExportChildren(type.fields()));
- RETURN_NOT_OK(ExportMetadata(field.metadata().get()));
- return Status::OK();
- }
-
- Status ExportType(const DataType& type) {
- flags_ = ARROW_FLAG_NULLABLE;
-
- RETURN_NOT_OK(ExportFormat(type));
- RETURN_NOT_OK(ExportChildren(type.fields()));
- return Status::OK();
- }
-
- Status ExportSchema(const Schema& schema) {
- static StructType dummy_struct_type({});
- flags_ = 0;
-
- RETURN_NOT_OK(ExportFormat(dummy_struct_type));
- RETURN_NOT_OK(ExportChildren(schema.fields()));
- RETURN_NOT_OK(ExportMetadata(schema.metadata().get()));
- return Status::OK();
- }
-
- // Finalize exporting by setting C struct fields and allocating
- // autonomous private data for each schema node.
- //
- // This function can't fail, as properly reclaiming memory in case of error
- // would be too fragile. After this function returns, memory is reclaimed
- // by calling the release() pointer in the top level ArrowSchema struct.
- void Finish(struct ArrowSchema* c_struct) {
- // First, create permanent ExportedSchemaPrivateData
- auto pdata = new ExportedSchemaPrivateData(std::move(export_));
-
- // Second, finish dictionary and children.
- if (dict_exporter_) {
- dict_exporter_->Finish(&pdata->dictionary_);
- }
- pdata->child_pointers_.resize(child_exporters_.size(), nullptr);
- for (size_t i = 0; i < child_exporters_.size(); ++i) {
- auto ptr = pdata->child_pointers_[i] = &pdata->children_[i];
- child_exporters_[i].Finish(ptr);
- }
-
- // Third, fill C struct.
- DCHECK_NE(c_struct, nullptr);
- memset(c_struct, 0, sizeof(*c_struct));
-
- c_struct->format = pdata->format_.c_str();
- c_struct->name = pdata->name_.c_str();
- c_struct->metadata = pdata->metadata_.empty() ? nullptr : pdata->metadata_.c_str();
- c_struct->flags = flags_;
-
- c_struct->n_children = static_cast<int64_t>(child_exporters_.size());
- c_struct->children = pdata->child_pointers_.data();
- c_struct->dictionary = dict_exporter_ ? &pdata->dictionary_ : nullptr;
- c_struct->private_data = pdata;
- c_struct->release = ReleaseExportedSchema;
- }
-
- Status ExportFormat(const DataType& type) {
- if (type.id() == Type::DICTIONARY) {
- const auto& dict_type = checked_cast<const DictionaryType&>(type);
- if (dict_type.ordered()) {
- flags_ |= ARROW_FLAG_DICTIONARY_ORDERED;
- }
- // Dictionary type: parent struct describes index type,
- // child dictionary struct describes value type.
- RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), this));
- dict_exporter_.reset(new SchemaExporter());
- RETURN_NOT_OK(dict_exporter_->ExportType(*dict_type.value_type()));
- } else {
- RETURN_NOT_OK(VisitTypeInline(type, this));
- }
- DCHECK(!export_.format_.empty());
- return Status::OK();
- }
-
- Status ExportChildren(const std::vector<std::shared_ptr<Field>>& fields) {
- export_.children_.resize(fields.size());
- child_exporters_.resize(fields.size());
- for (size_t i = 0; i < fields.size(); ++i) {
- RETURN_NOT_OK(child_exporters_[i].ExportField(*fields[i]));
- }
- return Status::OK();
- }
-
- Status ExportMetadata(const KeyValueMetadata* metadata) {
- if (metadata != nullptr && metadata->size() >= 0) {
- ARROW_ASSIGN_OR_RAISE(export_.metadata_, EncodeMetadata(*metadata));
- }
- return Status::OK();
- }
-
- Status SetFormat(std::string s) {
- export_.format_ = std::move(s);
- return Status::OK();
- }
-
- // Type-specific visitors
-
- Status Visit(const DataType& type) { return ExportingNotImplemented(type); }
-
- Status Visit(const NullType& type) { return SetFormat("n"); }
-
- Status Visit(const BooleanType& type) { return SetFormat("b"); }
-
- Status Visit(const Int8Type& type) { return SetFormat("c"); }
-
- Status Visit(const UInt8Type& type) { return SetFormat("C"); }
-
- Status Visit(const Int16Type& type) { return SetFormat("s"); }
-
- Status Visit(const UInt16Type& type) { return SetFormat("S"); }
-
- Status Visit(const Int32Type& type) { return SetFormat("i"); }
-
- Status Visit(const UInt32Type& type) { return SetFormat("I"); }
-
- Status Visit(const Int64Type& type) { return SetFormat("l"); }
-
- Status Visit(const UInt64Type& type) { return SetFormat("L"); }
-
- Status Visit(const HalfFloatType& type) { return SetFormat("e"); }
-
- Status Visit(const FloatType& type) { return SetFormat("f"); }
-
- Status Visit(const DoubleType& type) { return SetFormat("g"); }
-
- Status Visit(const FixedSizeBinaryType& type) {
- return SetFormat("w:" + std::to_string(type.byte_width()));
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/c/bridge.h"
+
+#include <algorithm>
+#include <cerrno>
+#include <cstring>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/c/helpers.h"
+#include "arrow/c/util_internal.h"
+#include "arrow/memory_pool.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/value_parsing.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+using internal::ArrayExportGuard;
+using internal::ArrayExportTraits;
+using internal::SchemaExportGuard;
+using internal::SchemaExportTraits;
+
+// TODO export / import Extension types and arrays
+
+namespace {
+
+Status ExportingNotImplemented(const DataType& type) {
+ return Status::NotImplemented("Exporting ", type.ToString(), " array not supported");
+}
+
+// Allocate exported private data using MemoryPool,
+// to allow accounting memory and checking for memory leaks.
+
+// XXX use Gandiva's SimpleArena?
+
+template <typename T>
+using PoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
+
+template <typename Derived>
+struct PoolAllocationMixin {
+ static void* operator new(size_t size) {
+ DCHECK_EQ(size, sizeof(Derived));
+ uint8_t* data;
+ ARROW_CHECK_OK(default_memory_pool()->Allocate(static_cast<int64_t>(size), &data));
+ return data;
+ }
+
+ static void operator delete(void* ptr) {
+ default_memory_pool()->Free(reinterpret_cast<uint8_t*>(ptr), sizeof(Derived));
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////
+// C schema export
+
+struct ExportedSchemaPrivateData : PoolAllocationMixin<ExportedSchemaPrivateData> {
+ std::string format_;
+ std::string name_;
+ std::string metadata_;
+ struct ArrowSchema dictionary_;
+ PoolVector<struct ArrowSchema> children_;
+ PoolVector<struct ArrowSchema*> child_pointers_;
+
+ ExportedSchemaPrivateData() = default;
+ ARROW_DEFAULT_MOVE_AND_ASSIGN(ExportedSchemaPrivateData);
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ExportedSchemaPrivateData);
+};
+
+void ReleaseExportedSchema(struct ArrowSchema* schema) {
+ if (ArrowSchemaIsReleased(schema)) {
+ return;
+ }
+ for (int64_t i = 0; i < schema->n_children; ++i) {
+ struct ArrowSchema* child = schema->children[i];
+ ArrowSchemaRelease(child);
+ DCHECK(ArrowSchemaIsReleased(child))
+ << "Child release callback should have marked it released";
+ }
+ struct ArrowSchema* dict = schema->dictionary;
+ if (dict != nullptr) {
+ ArrowSchemaRelease(dict);
+ DCHECK(ArrowSchemaIsReleased(dict))
+ << "Dictionary release callback should have marked it released";
+ }
+ DCHECK_NE(schema->private_data, nullptr);
+ delete reinterpret_cast<ExportedSchemaPrivateData*>(schema->private_data);
+
+ ArrowSchemaMarkReleased(schema);
+}
+
+template <typename SizeType>
+Result<int32_t> DowncastMetadataSize(SizeType size) {
+ auto res = static_cast<int32_t>(size);
+ if (res < 0 || static_cast<SizeType>(res) != size) {
+ return Status::Invalid("Metadata too large (more than 2**31 items or bytes)");
+ }
+ return res;
+}
+
+Result<std::string> EncodeMetadata(const KeyValueMetadata& metadata) {
+ ARROW_ASSIGN_OR_RAISE(auto npairs, DowncastMetadataSize(metadata.size()));
+ std::string exported;
+
+ // Pre-compute total string size
+ size_t total_size = 4;
+ for (int32_t i = 0; i < npairs; ++i) {
+ total_size += 8 + metadata.key(i).length() + metadata.value(i).length();
+ }
+ exported.resize(total_size);
+
+ char* data_start = &exported[0];
+ char* data = data_start;
+ auto write_int32 = [&](int32_t v) -> void {
+ memcpy(data, &v, 4);
+ data += 4;
+ };
+ auto write_string = [&](const std::string& s) -> Status {
+ ARROW_ASSIGN_OR_RAISE(auto len, DowncastMetadataSize(s.length()));
+ write_int32(len);
+ if (len > 0) {
+ memcpy(data, s.data(), len);
+ data += len;
+ }
+ return Status::OK();
+ };
+
+ write_int32(npairs);
+ for (int32_t i = 0; i < npairs; ++i) {
+ RETURN_NOT_OK(write_string(metadata.key(i)));
+ RETURN_NOT_OK(write_string(metadata.value(i)));
+ }
+ DCHECK_EQ(static_cast<size_t>(data - data_start), total_size);
+ return exported;
+}
+
+struct SchemaExporter {
+ Status ExportField(const Field& field) {
+ export_.name_ = field.name();
+ flags_ = field.nullable() ? ARROW_FLAG_NULLABLE : 0;
+
+ const DataType& type = *field.type();
+ RETURN_NOT_OK(ExportFormat(type));
+ RETURN_NOT_OK(ExportChildren(type.fields()));
+ RETURN_NOT_OK(ExportMetadata(field.metadata().get()));
+ return Status::OK();
+ }
+
+ Status ExportType(const DataType& type) {
+ flags_ = ARROW_FLAG_NULLABLE;
+
+ RETURN_NOT_OK(ExportFormat(type));
+ RETURN_NOT_OK(ExportChildren(type.fields()));
+ return Status::OK();
+ }
+
+ Status ExportSchema(const Schema& schema) {
+ static StructType dummy_struct_type({});
+ flags_ = 0;
+
+ RETURN_NOT_OK(ExportFormat(dummy_struct_type));
+ RETURN_NOT_OK(ExportChildren(schema.fields()));
+ RETURN_NOT_OK(ExportMetadata(schema.metadata().get()));
+ return Status::OK();
+ }
+
+ // Finalize exporting by setting C struct fields and allocating
+ // autonomous private data for each schema node.
+ //
+ // This function can't fail, as properly reclaiming memory in case of error
+ // would be too fragile. After this function returns, memory is reclaimed
+ // by calling the release() pointer in the top level ArrowSchema struct.
+ void Finish(struct ArrowSchema* c_struct) {
+ // First, create permanent ExportedSchemaPrivateData
+ auto pdata = new ExportedSchemaPrivateData(std::move(export_));
+
+ // Second, finish dictionary and children.
+ if (dict_exporter_) {
+ dict_exporter_->Finish(&pdata->dictionary_);
+ }
+ pdata->child_pointers_.resize(child_exporters_.size(), nullptr);
+ for (size_t i = 0; i < child_exporters_.size(); ++i) {
+ auto ptr = pdata->child_pointers_[i] = &pdata->children_[i];
+ child_exporters_[i].Finish(ptr);
+ }
+
+ // Third, fill C struct.
+ DCHECK_NE(c_struct, nullptr);
+ memset(c_struct, 0, sizeof(*c_struct));
+
+ c_struct->format = pdata->format_.c_str();
+ c_struct->name = pdata->name_.c_str();
+ c_struct->metadata = pdata->metadata_.empty() ? nullptr : pdata->metadata_.c_str();
+ c_struct->flags = flags_;
+
+ c_struct->n_children = static_cast<int64_t>(child_exporters_.size());
+ c_struct->children = pdata->child_pointers_.data();
+ c_struct->dictionary = dict_exporter_ ? &pdata->dictionary_ : nullptr;
+ c_struct->private_data = pdata;
+ c_struct->release = ReleaseExportedSchema;
+ }
+
+ Status ExportFormat(const DataType& type) {
+ if (type.id() == Type::DICTIONARY) {
+ const auto& dict_type = checked_cast<const DictionaryType&>(type);
+ if (dict_type.ordered()) {
+ flags_ |= ARROW_FLAG_DICTIONARY_ORDERED;
+ }
+ // Dictionary type: parent struct describes index type,
+ // child dictionary struct describes value type.
+ RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), this));
+ dict_exporter_.reset(new SchemaExporter());
+ RETURN_NOT_OK(dict_exporter_->ExportType(*dict_type.value_type()));
+ } else {
+ RETURN_NOT_OK(VisitTypeInline(type, this));
+ }
+ DCHECK(!export_.format_.empty());
+ return Status::OK();
+ }
+
+ Status ExportChildren(const std::vector<std::shared_ptr<Field>>& fields) {
+ export_.children_.resize(fields.size());
+ child_exporters_.resize(fields.size());
+ for (size_t i = 0; i < fields.size(); ++i) {
+ RETURN_NOT_OK(child_exporters_[i].ExportField(*fields[i]));
+ }
+ return Status::OK();
+ }
+
+ Status ExportMetadata(const KeyValueMetadata* metadata) {
+ if (metadata != nullptr && metadata->size() >= 0) {
+ ARROW_ASSIGN_OR_RAISE(export_.metadata_, EncodeMetadata(*metadata));
+ }
+ return Status::OK();
+ }
+
+ Status SetFormat(std::string s) {
+ export_.format_ = std::move(s);
+ return Status::OK();
+ }
+
+ // Type-specific visitors
+
+ Status Visit(const DataType& type) { return ExportingNotImplemented(type); }
+
+ Status Visit(const NullType& type) { return SetFormat("n"); }
+
+ Status Visit(const BooleanType& type) { return SetFormat("b"); }
+
+ Status Visit(const Int8Type& type) { return SetFormat("c"); }
+
+ Status Visit(const UInt8Type& type) { return SetFormat("C"); }
+
+ Status Visit(const Int16Type& type) { return SetFormat("s"); }
+
+ Status Visit(const UInt16Type& type) { return SetFormat("S"); }
+
+ Status Visit(const Int32Type& type) { return SetFormat("i"); }
+
+ Status Visit(const UInt32Type& type) { return SetFormat("I"); }
+
+ Status Visit(const Int64Type& type) { return SetFormat("l"); }
+
+ Status Visit(const UInt64Type& type) { return SetFormat("L"); }
+
+ Status Visit(const HalfFloatType& type) { return SetFormat("e"); }
+
+ Status Visit(const FloatType& type) { return SetFormat("f"); }
+
+ Status Visit(const DoubleType& type) { return SetFormat("g"); }
+
+ Status Visit(const FixedSizeBinaryType& type) {
+ return SetFormat("w:" + std::to_string(type.byte_width()));
+ }
+
Status Visit(const DecimalType& type) {
if (type.bit_width() == 128) {
// 128 is the default bit-width
@@ -314,679 +314,679 @@ struct SchemaExporter {
std::to_string(type.scale()) + "," +
std::to_string(type.bit_width()));
}
- }
-
- Status Visit(const BinaryType& type) { return SetFormat("z"); }
-
- Status Visit(const LargeBinaryType& type) { return SetFormat("Z"); }
-
- Status Visit(const StringType& type) { return SetFormat("u"); }
-
- Status Visit(const LargeStringType& type) { return SetFormat("U"); }
-
- Status Visit(const Date32Type& type) { return SetFormat("tdD"); }
-
- Status Visit(const Date64Type& type) { return SetFormat("tdm"); }
-
- Status Visit(const Time32Type& type) {
- switch (type.unit()) {
- case TimeUnit::SECOND:
- export_.format_ = "tts";
- break;
- case TimeUnit::MILLI:
- export_.format_ = "ttm";
- break;
- default:
- return Status::Invalid("Invalid time unit for Time32: ", type.unit());
- }
- return Status::OK();
- }
-
- Status Visit(const Time64Type& type) {
- switch (type.unit()) {
- case TimeUnit::MICRO:
- export_.format_ = "ttu";
- break;
- case TimeUnit::NANO:
- export_.format_ = "ttn";
- break;
- default:
- return Status::Invalid("Invalid time unit for Time64: ", type.unit());
- }
- return Status::OK();
- }
-
- Status Visit(const TimestampType& type) {
- switch (type.unit()) {
- case TimeUnit::SECOND:
- export_.format_ = "tss:";
- break;
- case TimeUnit::MILLI:
- export_.format_ = "tsm:";
- break;
- case TimeUnit::MICRO:
- export_.format_ = "tsu:";
- break;
- case TimeUnit::NANO:
- export_.format_ = "tsn:";
- break;
- default:
- return Status::Invalid("Invalid time unit for Timestamp: ", type.unit());
- }
- export_.format_ += type.timezone();
- return Status::OK();
- }
-
- Status Visit(const DurationType& type) {
- switch (type.unit()) {
- case TimeUnit::SECOND:
- export_.format_ = "tDs";
- break;
- case TimeUnit::MILLI:
- export_.format_ = "tDm";
- break;
- case TimeUnit::MICRO:
- export_.format_ = "tDu";
- break;
- case TimeUnit::NANO:
- export_.format_ = "tDn";
- break;
- default:
- return Status::Invalid("Invalid time unit for Duration: ", type.unit());
- }
- return Status::OK();
- }
-
- Status Visit(const MonthIntervalType& type) { return SetFormat("tiM"); }
-
- Status Visit(const DayTimeIntervalType& type) { return SetFormat("tiD"); }
-
- Status Visit(const ListType& type) { return SetFormat("+l"); }
-
- Status Visit(const LargeListType& type) { return SetFormat("+L"); }
-
- Status Visit(const FixedSizeListType& type) {
- return SetFormat("+w:" + std::to_string(type.list_size()));
- }
-
- Status Visit(const StructType& type) { return SetFormat("+s"); }
-
- Status Visit(const MapType& type) {
- export_.format_ = "+m";
- if (type.keys_sorted()) {
- flags_ |= ARROW_FLAG_MAP_KEYS_SORTED;
- }
- return Status::OK();
- }
-
- Status Visit(const UnionType& type) {
- std::string& s = export_.format_;
- s = "+u";
- if (type.mode() == UnionMode::DENSE) {
- s += "d:";
- } else {
- DCHECK_EQ(type.mode(), UnionMode::SPARSE);
- s += "s:";
- }
- bool first = true;
- for (const auto code : type.type_codes()) {
- if (!first) {
- s += ",";
- }
- s += std::to_string(code);
- first = false;
- }
- return Status::OK();
- }
-
- ExportedSchemaPrivateData export_;
- int64_t flags_ = 0;
- std::unique_ptr<SchemaExporter> dict_exporter_;
- std::vector<SchemaExporter> child_exporters_;
-};
-
-} // namespace
-
-Status ExportType(const DataType& type, struct ArrowSchema* out) {
- SchemaExporter exporter;
- RETURN_NOT_OK(exporter.ExportType(type));
- exporter.Finish(out);
- return Status::OK();
-}
-
-Status ExportField(const Field& field, struct ArrowSchema* out) {
- SchemaExporter exporter;
- RETURN_NOT_OK(exporter.ExportField(field));
- exporter.Finish(out);
- return Status::OK();
-}
-
-Status ExportSchema(const Schema& schema, struct ArrowSchema* out) {
- SchemaExporter exporter;
- RETURN_NOT_OK(exporter.ExportSchema(schema));
- exporter.Finish(out);
- return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-// C data export
-
-namespace {
-
-struct ExportedArrayPrivateData : PoolAllocationMixin<ExportedArrayPrivateData> {
- // The buffers are owned by the ArrayData member
- PoolVector<const void*> buffers_;
- struct ArrowArray dictionary_;
- PoolVector<struct ArrowArray> children_;
- PoolVector<struct ArrowArray*> child_pointers_;
-
- std::shared_ptr<ArrayData> data_;
-
- ExportedArrayPrivateData() = default;
- ARROW_DEFAULT_MOVE_AND_ASSIGN(ExportedArrayPrivateData);
- ARROW_DISALLOW_COPY_AND_ASSIGN(ExportedArrayPrivateData);
-};
-
-void ReleaseExportedArray(struct ArrowArray* array) {
- if (ArrowArrayIsReleased(array)) {
- return;
- }
- for (int64_t i = 0; i < array->n_children; ++i) {
- struct ArrowArray* child = array->children[i];
- ArrowArrayRelease(child);
- DCHECK(ArrowArrayIsReleased(child))
- << "Child release callback should have marked it released";
- }
- struct ArrowArray* dict = array->dictionary;
- if (dict != nullptr) {
- ArrowArrayRelease(dict);
- DCHECK(ArrowArrayIsReleased(dict))
- << "Dictionary release callback should have marked it released";
- }
- DCHECK_NE(array->private_data, nullptr);
- delete reinterpret_cast<ExportedArrayPrivateData*>(array->private_data);
-
- ArrowArrayMarkReleased(array);
-}
-
-struct ArrayExporter {
- Status Export(const std::shared_ptr<ArrayData>& data) {
- // Force computing null count.
- // This is because ARROW-9037 is in version 0.17 and 0.17.1, and they are
- // not able to import arrays without a null bitmap and null_count == -1.
- data->GetNullCount();
- // Store buffer pointers
- export_.buffers_.resize(data->buffers.size());
- std::transform(data->buffers.begin(), data->buffers.end(), export_.buffers_.begin(),
- [](const std::shared_ptr<Buffer>& buffer) -> const void* {
- return buffer ? buffer->data() : nullptr;
- });
-
- // Export dictionary
- if (data->dictionary != nullptr) {
- dict_exporter_.reset(new ArrayExporter());
- RETURN_NOT_OK(dict_exporter_->Export(data->dictionary));
- }
-
- // Export children
- export_.children_.resize(data->child_data.size());
- child_exporters_.resize(data->child_data.size());
- for (size_t i = 0; i < data->child_data.size(); ++i) {
- RETURN_NOT_OK(child_exporters_[i].Export(data->child_data[i]));
- }
-
- // Store owning pointer to ArrayData
- export_.data_ = data;
-
- return Status::OK();
- }
-
- // Finalize exporting by setting C struct fields and allocating
- // autonomous private data for each array node.
- //
- // This function can't fail, as properly reclaiming memory in case of error
- // would be too fragile. After this function returns, memory is reclaimed
- // by calling the release() pointer in the top level ArrowArray struct.
- void Finish(struct ArrowArray* c_struct_) {
- // First, create permanent ExportedArrayPrivateData, to make sure that
- // child ArrayData pointers don't get invalidated.
- auto pdata = new ExportedArrayPrivateData(std::move(export_));
- const ArrayData& data = *pdata->data_;
-
- // Second, finish dictionary and children.
- if (dict_exporter_) {
- dict_exporter_->Finish(&pdata->dictionary_);
- }
- pdata->child_pointers_.resize(data.child_data.size(), nullptr);
- for (size_t i = 0; i < data.child_data.size(); ++i) {
- auto ptr = &pdata->children_[i];
- pdata->child_pointers_[i] = ptr;
- child_exporters_[i].Finish(ptr);
- }
-
- // Third, fill C struct.
- DCHECK_NE(c_struct_, nullptr);
- memset(c_struct_, 0, sizeof(*c_struct_));
-
- c_struct_->length = data.length;
- c_struct_->null_count = data.null_count;
- c_struct_->offset = data.offset;
- c_struct_->n_buffers = static_cast<int64_t>(pdata->buffers_.size());
- c_struct_->n_children = static_cast<int64_t>(pdata->child_pointers_.size());
- c_struct_->buffers = pdata->buffers_.data();
- c_struct_->children = pdata->child_pointers_.data();
- c_struct_->dictionary = dict_exporter_ ? &pdata->dictionary_ : nullptr;
- c_struct_->private_data = pdata;
- c_struct_->release = ReleaseExportedArray;
- }
-
- ExportedArrayPrivateData export_;
- std::unique_ptr<ArrayExporter> dict_exporter_;
- std::vector<ArrayExporter> child_exporters_;
-};
-
-} // namespace
-
-Status ExportArray(const Array& array, struct ArrowArray* out,
- struct ArrowSchema* out_schema) {
- SchemaExportGuard guard(out_schema);
- if (out_schema != nullptr) {
- RETURN_NOT_OK(ExportType(*array.type(), out_schema));
- }
- ArrayExporter exporter;
- RETURN_NOT_OK(exporter.Export(array.data()));
- exporter.Finish(out);
- guard.Detach();
- return Status::OK();
-}
-
-Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
- struct ArrowSchema* out_schema) {
- // XXX perhaps bypass ToStructArray() for speed?
- ARROW_ASSIGN_OR_RAISE(auto array, batch.ToStructArray());
-
- SchemaExportGuard guard(out_schema);
- if (out_schema != nullptr) {
- // Export the schema, not the struct type, so as not to lose top-level metadata
- RETURN_NOT_OK(ExportSchema(*batch.schema(), out_schema));
- }
- ArrayExporter exporter;
- RETURN_NOT_OK(exporter.Export(array->data()));
- exporter.Finish(out);
- guard.Detach();
- return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-// C schema import
-
-namespace {
-
-static constexpr int64_t kMaxImportRecursionLevel = 64;
-
-Status InvalidFormatString(util::string_view v) {
- return Status::Invalid("Invalid or unsupported format string: '", v, "'");
-}
-
-class FormatStringParser {
- public:
- FormatStringParser() {}
-
- explicit FormatStringParser(util::string_view v) : view_(v), index_(0) {}
-
- bool AtEnd() const { return index_ >= view_.length(); }
-
- char Next() { return view_[index_++]; }
-
- util::string_view Rest() { return view_.substr(index_); }
-
- Status CheckNext(char c) {
- if (AtEnd() || Next() != c) {
- return Invalid();
- }
- return Status::OK();
- }
-
- Status CheckHasNext() {
- if (AtEnd()) {
- return Invalid();
- }
- return Status::OK();
- }
-
- Status CheckAtEnd() {
- if (!AtEnd()) {
- return Invalid();
- }
- return Status::OK();
- }
-
- template <typename IntType = int32_t>
- Result<IntType> ParseInt(util::string_view v) {
- using ArrowIntType = typename CTypeTraits<IntType>::ArrowType;
- IntType value;
- if (!internal::ParseValue<ArrowIntType>(v.data(), v.size(), &value)) {
- return Invalid();
- }
- return value;
- }
-
- Result<TimeUnit::type> ParseTimeUnit() {
- RETURN_NOT_OK(CheckHasNext());
- switch (Next()) {
- case 's':
- return TimeUnit::SECOND;
- case 'm':
- return TimeUnit::MILLI;
- case 'u':
- return TimeUnit::MICRO;
- case 'n':
- return TimeUnit::NANO;
- default:
- return Invalid();
- }
- }
-
- std::vector<util::string_view> Split(util::string_view v, char delim = ',') {
- std::vector<util::string_view> parts;
- size_t start = 0, end;
- while (true) {
- end = v.find_first_of(delim, start);
- parts.push_back(v.substr(start, end - start));
- if (end == util::string_view::npos) {
- break;
- }
- start = end + 1;
- }
- return parts;
- }
-
- template <typename IntType = int32_t>
- Result<std::vector<IntType>> ParseInts(util::string_view v) {
- auto parts = Split(v);
- std::vector<IntType> result;
- result.reserve(parts.size());
- for (const auto& p : parts) {
- ARROW_ASSIGN_OR_RAISE(auto i, ParseInt<IntType>(p));
- result.push_back(i);
- }
- return result;
- }
-
- Status Invalid() { return InvalidFormatString(view_); }
-
- protected:
- util::string_view view_;
- size_t index_;
-};
-
-Result<std::shared_ptr<KeyValueMetadata>> DecodeMetadata(const char* metadata) {
- auto read_int32 = [&](int32_t* out) -> Status {
- int32_t v;
- memcpy(&v, metadata, 4);
- metadata += 4;
- *out = v;
- if (*out < 0) {
- return Status::Invalid("Invalid encoded metadata string");
- }
- return Status::OK();
- };
-
- auto read_string = [&](std::string* out) -> Status {
- int32_t len;
- RETURN_NOT_OK(read_int32(&len));
- out->resize(len);
- if (len > 0) {
- memcpy(&(*out)[0], metadata, len);
- metadata += len;
- }
- return Status::OK();
- };
-
- if (metadata == nullptr) {
- return nullptr;
- }
- int32_t npairs;
- RETURN_NOT_OK(read_int32(&npairs));
- if (npairs == 0) {
- return nullptr;
- }
- std::vector<std::string> keys(npairs);
- std::vector<std::string> values(npairs);
- for (int32_t i = 0; i < npairs; ++i) {
- RETURN_NOT_OK(read_string(&keys[i]));
- RETURN_NOT_OK(read_string(&values[i]));
- }
- return key_value_metadata(std::move(keys), std::move(values));
-}
-
-struct SchemaImporter {
- SchemaImporter() : c_struct_(nullptr), guard_(nullptr) {}
-
- Status Import(struct ArrowSchema* src) {
- if (ArrowSchemaIsReleased(src)) {
- return Status::Invalid("Cannot import released ArrowSchema");
- }
- guard_.Reset(src);
- recursion_level_ = 0;
- c_struct_ = src;
- return DoImport();
- }
-
- Result<std::shared_ptr<Field>> MakeField() const {
- ARROW_ASSIGN_OR_RAISE(auto metadata, DecodeMetadata(c_struct_->metadata));
- const char* name = c_struct_->name ? c_struct_->name : "";
- bool nullable = (c_struct_->flags & ARROW_FLAG_NULLABLE) != 0;
- return field(name, type_, nullable, std::move(metadata));
- }
-
- Result<std::shared_ptr<Schema>> MakeSchema() const {
- if (type_->id() != Type::STRUCT) {
- return Status::Invalid(
- "Cannot import schema: ArrowSchema describes non-struct type ",
- type_->ToString());
- }
- ARROW_ASSIGN_OR_RAISE(auto metadata, DecodeMetadata(c_struct_->metadata));
- return schema(type_->fields(), std::move(metadata));
- }
-
- Result<std::shared_ptr<DataType>> MakeType() const { return type_; }
-
- protected:
- Status ImportChild(const SchemaImporter* parent, struct ArrowSchema* src) {
- if (ArrowSchemaIsReleased(src)) {
- return Status::Invalid("Cannot import released ArrowSchema");
- }
- recursion_level_ = parent->recursion_level_ + 1;
- if (recursion_level_ >= kMaxImportRecursionLevel) {
- return Status::Invalid("Recursion level in ArrowSchema struct exceeded");
- }
- // The ArrowSchema is owned by its parent, so don't release it ourselves
- c_struct_ = src;
- return DoImport();
- }
-
- Status ImportDict(const SchemaImporter* parent, struct ArrowSchema* src) {
- return ImportChild(parent, src);
- }
-
- Status DoImport() {
- // First import children (required for reconstituting parent type)
- child_importers_.resize(c_struct_->n_children);
- for (int64_t i = 0; i < c_struct_->n_children; ++i) {
- DCHECK_NE(c_struct_->children[i], nullptr);
- RETURN_NOT_OK(child_importers_[i].ImportChild(this, c_struct_->children[i]));
- }
-
- // Import main type
- RETURN_NOT_OK(ProcessFormat());
- DCHECK_NE(type_, nullptr);
-
- // Import dictionary type
- if (c_struct_->dictionary != nullptr) {
- // Check this index type
- if (!is_integer(type_->id())) {
- return Status::Invalid(
- "ArrowSchema struct has a dictionary but is not an integer type: ",
- type_->ToString());
- }
- SchemaImporter dict_importer;
- RETURN_NOT_OK(dict_importer.ImportDict(this, c_struct_->dictionary));
- bool ordered = (c_struct_->flags & ARROW_FLAG_DICTIONARY_ORDERED) != 0;
- type_ = dictionary(type_, dict_importer.type_, ordered);
- }
- return Status::OK();
- }
-
- Status ProcessFormat() {
- f_parser_ = FormatStringParser(c_struct_->format);
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- switch (f_parser_.Next()) {
- case 'n':
- return ProcessPrimitive(null());
- case 'b':
- return ProcessPrimitive(boolean());
- case 'c':
- return ProcessPrimitive(int8());
- case 'C':
- return ProcessPrimitive(uint8());
- case 's':
- return ProcessPrimitive(int16());
- case 'S':
- return ProcessPrimitive(uint16());
- case 'i':
- return ProcessPrimitive(int32());
- case 'I':
- return ProcessPrimitive(uint32());
- case 'l':
- return ProcessPrimitive(int64());
- case 'L':
- return ProcessPrimitive(uint64());
- case 'e':
- return ProcessPrimitive(float16());
- case 'f':
- return ProcessPrimitive(float32());
- case 'g':
- return ProcessPrimitive(float64());
- case 'u':
- return ProcessPrimitive(utf8());
- case 'U':
- return ProcessPrimitive(large_utf8());
- case 'z':
- return ProcessPrimitive(binary());
- case 'Z':
- return ProcessPrimitive(large_binary());
- case 'w':
- return ProcessFixedSizeBinary();
- case 'd':
- return ProcessDecimal();
- case 't':
- return ProcessTemporal();
- case '+':
- return ProcessNested();
- }
- return f_parser_.Invalid();
- }
-
- Status ProcessTemporal() {
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- switch (f_parser_.Next()) {
- case 'd':
- return ProcessDate();
- case 't':
- return ProcessTime();
- case 'D':
- return ProcessDuration();
- case 'i':
- return ProcessInterval();
- case 's':
- return ProcessTimestamp();
- }
- return f_parser_.Invalid();
- }
-
- Status ProcessNested() {
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- switch (f_parser_.Next()) {
- case 'l':
- return ProcessListLike<ListType>();
- case 'L':
- return ProcessListLike<LargeListType>();
- case 'w':
- return ProcessFixedSizeList();
- case 's':
- return ProcessStruct();
- case 'm':
- return ProcessMap();
- case 'u':
- return ProcessUnion();
- }
- return f_parser_.Invalid();
- }
-
- Status ProcessDate() {
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- switch (f_parser_.Next()) {
- case 'D':
- return ProcessPrimitive(date32());
- case 'm':
- return ProcessPrimitive(date64());
- }
- return f_parser_.Invalid();
- }
-
- Status ProcessInterval() {
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- switch (f_parser_.Next()) {
- case 'D':
- return ProcessPrimitive(day_time_interval());
- case 'M':
- return ProcessPrimitive(month_interval());
- }
- return f_parser_.Invalid();
- }
-
- Status ProcessTime() {
- ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
- if (unit == TimeUnit::SECOND || unit == TimeUnit::MILLI) {
- return ProcessPrimitive(time32(unit));
- } else {
- return ProcessPrimitive(time64(unit));
- }
- }
-
- Status ProcessDuration() {
- ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
- return ProcessPrimitive(duration(unit));
- }
-
- Status ProcessTimestamp() {
- ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
- RETURN_NOT_OK(f_parser_.CheckNext(':'));
- type_ = timestamp(unit, std::string(f_parser_.Rest()));
- return Status::OK();
- }
-
- Status ProcessFixedSizeBinary() {
- RETURN_NOT_OK(f_parser_.CheckNext(':'));
- ARROW_ASSIGN_OR_RAISE(auto byte_width, f_parser_.ParseInt(f_parser_.Rest()));
- if (byte_width < 0) {
- return f_parser_.Invalid();
- }
- type_ = fixed_size_binary(byte_width);
- return Status::OK();
- }
-
- Status ProcessDecimal() {
- RETURN_NOT_OK(f_parser_.CheckNext(':'));
- ARROW_ASSIGN_OR_RAISE(auto prec_scale, f_parser_.ParseInts(f_parser_.Rest()));
+ }
+
+ Status Visit(const BinaryType& type) { return SetFormat("z"); }
+
+ Status Visit(const LargeBinaryType& type) { return SetFormat("Z"); }
+
+ Status Visit(const StringType& type) { return SetFormat("u"); }
+
+ Status Visit(const LargeStringType& type) { return SetFormat("U"); }
+
+ Status Visit(const Date32Type& type) { return SetFormat("tdD"); }
+
+ Status Visit(const Date64Type& type) { return SetFormat("tdm"); }
+
+ Status Visit(const Time32Type& type) {
+ switch (type.unit()) {
+ case TimeUnit::SECOND:
+ export_.format_ = "tts";
+ break;
+ case TimeUnit::MILLI:
+ export_.format_ = "ttm";
+ break;
+ default:
+ return Status::Invalid("Invalid time unit for Time32: ", type.unit());
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const Time64Type& type) {
+ switch (type.unit()) {
+ case TimeUnit::MICRO:
+ export_.format_ = "ttu";
+ break;
+ case TimeUnit::NANO:
+ export_.format_ = "ttn";
+ break;
+ default:
+ return Status::Invalid("Invalid time unit for Time64: ", type.unit());
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const TimestampType& type) {
+ switch (type.unit()) {
+ case TimeUnit::SECOND:
+ export_.format_ = "tss:";
+ break;
+ case TimeUnit::MILLI:
+ export_.format_ = "tsm:";
+ break;
+ case TimeUnit::MICRO:
+ export_.format_ = "tsu:";
+ break;
+ case TimeUnit::NANO:
+ export_.format_ = "tsn:";
+ break;
+ default:
+ return Status::Invalid("Invalid time unit for Timestamp: ", type.unit());
+ }
+ export_.format_ += type.timezone();
+ return Status::OK();
+ }
+
+ Status Visit(const DurationType& type) {
+ switch (type.unit()) {
+ case TimeUnit::SECOND:
+ export_.format_ = "tDs";
+ break;
+ case TimeUnit::MILLI:
+ export_.format_ = "tDm";
+ break;
+ case TimeUnit::MICRO:
+ export_.format_ = "tDu";
+ break;
+ case TimeUnit::NANO:
+ export_.format_ = "tDn";
+ break;
+ default:
+ return Status::Invalid("Invalid time unit for Duration: ", type.unit());
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const MonthIntervalType& type) { return SetFormat("tiM"); }
+
+ Status Visit(const DayTimeIntervalType& type) { return SetFormat("tiD"); }
+
+ Status Visit(const ListType& type) { return SetFormat("+l"); }
+
+ Status Visit(const LargeListType& type) { return SetFormat("+L"); }
+
+ Status Visit(const FixedSizeListType& type) {
+ return SetFormat("+w:" + std::to_string(type.list_size()));
+ }
+
+ Status Visit(const StructType& type) { return SetFormat("+s"); }
+
+ Status Visit(const MapType& type) {
+ export_.format_ = "+m";
+ if (type.keys_sorted()) {
+ flags_ |= ARROW_FLAG_MAP_KEYS_SORTED;
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& type) {
+ std::string& s = export_.format_;
+ s = "+u";
+ if (type.mode() == UnionMode::DENSE) {
+ s += "d:";
+ } else {
+ DCHECK_EQ(type.mode(), UnionMode::SPARSE);
+ s += "s:";
+ }
+ bool first = true;
+ for (const auto code : type.type_codes()) {
+ if (!first) {
+ s += ",";
+ }
+ s += std::to_string(code);
+ first = false;
+ }
+ return Status::OK();
+ }
+
+ ExportedSchemaPrivateData export_;
+ int64_t flags_ = 0;
+ std::unique_ptr<SchemaExporter> dict_exporter_;
+ std::vector<SchemaExporter> child_exporters_;
+};
+
+} // namespace
+
+Status ExportType(const DataType& type, struct ArrowSchema* out) {
+ SchemaExporter exporter;
+ RETURN_NOT_OK(exporter.ExportType(type));
+ exporter.Finish(out);
+ return Status::OK();
+}
+
+Status ExportField(const Field& field, struct ArrowSchema* out) {
+ SchemaExporter exporter;
+ RETURN_NOT_OK(exporter.ExportField(field));
+ exporter.Finish(out);
+ return Status::OK();
+}
+
+Status ExportSchema(const Schema& schema, struct ArrowSchema* out) {
+ SchemaExporter exporter;
+ RETURN_NOT_OK(exporter.ExportSchema(schema));
+ exporter.Finish(out);
+ return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C data export
+
+namespace {
+
+struct ExportedArrayPrivateData : PoolAllocationMixin<ExportedArrayPrivateData> {
+ // The buffers are owned by the ArrayData member
+ PoolVector<const void*> buffers_;
+ struct ArrowArray dictionary_;
+ PoolVector<struct ArrowArray> children_;
+ PoolVector<struct ArrowArray*> child_pointers_;
+
+ std::shared_ptr<ArrayData> data_;
+
+ ExportedArrayPrivateData() = default;
+ ARROW_DEFAULT_MOVE_AND_ASSIGN(ExportedArrayPrivateData);
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ExportedArrayPrivateData);
+};
+
+void ReleaseExportedArray(struct ArrowArray* array) {
+ if (ArrowArrayIsReleased(array)) {
+ return;
+ }
+ for (int64_t i = 0; i < array->n_children; ++i) {
+ struct ArrowArray* child = array->children[i];
+ ArrowArrayRelease(child);
+ DCHECK(ArrowArrayIsReleased(child))
+ << "Child release callback should have marked it released";
+ }
+ struct ArrowArray* dict = array->dictionary;
+ if (dict != nullptr) {
+ ArrowArrayRelease(dict);
+ DCHECK(ArrowArrayIsReleased(dict))
+ << "Dictionary release callback should have marked it released";
+ }
+ DCHECK_NE(array->private_data, nullptr);
+ delete reinterpret_cast<ExportedArrayPrivateData*>(array->private_data);
+
+ ArrowArrayMarkReleased(array);
+}
+
+struct ArrayExporter {
+ Status Export(const std::shared_ptr<ArrayData>& data) {
+ // Force computing null count.
+ // This is because ARROW-9037 is in version 0.17 and 0.17.1, and they are
+ // not able to import arrays without a null bitmap and null_count == -1.
+ data->GetNullCount();
+ // Store buffer pointers
+ export_.buffers_.resize(data->buffers.size());
+ std::transform(data->buffers.begin(), data->buffers.end(), export_.buffers_.begin(),
+ [](const std::shared_ptr<Buffer>& buffer) -> const void* {
+ return buffer ? buffer->data() : nullptr;
+ });
+
+ // Export dictionary
+ if (data->dictionary != nullptr) {
+ dict_exporter_.reset(new ArrayExporter());
+ RETURN_NOT_OK(dict_exporter_->Export(data->dictionary));
+ }
+
+ // Export children
+ export_.children_.resize(data->child_data.size());
+ child_exporters_.resize(data->child_data.size());
+ for (size_t i = 0; i < data->child_data.size(); ++i) {
+ RETURN_NOT_OK(child_exporters_[i].Export(data->child_data[i]));
+ }
+
+ // Store owning pointer to ArrayData
+ export_.data_ = data;
+
+ return Status::OK();
+ }
+
+ // Finalize exporting by setting C struct fields and allocating
+ // autonomous private data for each array node.
+ //
+ // This function can't fail, as properly reclaiming memory in case of error
+ // would be too fragile. After this function returns, memory is reclaimed
+ // by calling the release() pointer in the top level ArrowArray struct.
+ void Finish(struct ArrowArray* c_struct_) {
+ // First, create permanent ExportedArrayPrivateData, to make sure that
+ // child ArrayData pointers don't get invalidated.
+ auto pdata = new ExportedArrayPrivateData(std::move(export_));
+ const ArrayData& data = *pdata->data_;
+
+ // Second, finish dictionary and children.
+ if (dict_exporter_) {
+ dict_exporter_->Finish(&pdata->dictionary_);
+ }
+ pdata->child_pointers_.resize(data.child_data.size(), nullptr);
+ for (size_t i = 0; i < data.child_data.size(); ++i) {
+ auto ptr = &pdata->children_[i];
+ pdata->child_pointers_[i] = ptr;
+ child_exporters_[i].Finish(ptr);
+ }
+
+ // Third, fill C struct.
+ DCHECK_NE(c_struct_, nullptr);
+ memset(c_struct_, 0, sizeof(*c_struct_));
+
+ c_struct_->length = data.length;
+ c_struct_->null_count = data.null_count;
+ c_struct_->offset = data.offset;
+ c_struct_->n_buffers = static_cast<int64_t>(pdata->buffers_.size());
+ c_struct_->n_children = static_cast<int64_t>(pdata->child_pointers_.size());
+ c_struct_->buffers = pdata->buffers_.data();
+ c_struct_->children = pdata->child_pointers_.data();
+ c_struct_->dictionary = dict_exporter_ ? &pdata->dictionary_ : nullptr;
+ c_struct_->private_data = pdata;
+ c_struct_->release = ReleaseExportedArray;
+ }
+
+ ExportedArrayPrivateData export_;
+ std::unique_ptr<ArrayExporter> dict_exporter_;
+ std::vector<ArrayExporter> child_exporters_;
+};
+
+} // namespace
+
+Status ExportArray(const Array& array, struct ArrowArray* out,
+ struct ArrowSchema* out_schema) {
+ SchemaExportGuard guard(out_schema);
+ if (out_schema != nullptr) {
+ RETURN_NOT_OK(ExportType(*array.type(), out_schema));
+ }
+ ArrayExporter exporter;
+ RETURN_NOT_OK(exporter.Export(array.data()));
+ exporter.Finish(out);
+ guard.Detach();
+ return Status::OK();
+}
+
+Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
+ struct ArrowSchema* out_schema) {
+ // XXX perhaps bypass ToStructArray() for speed?
+ ARROW_ASSIGN_OR_RAISE(auto array, batch.ToStructArray());
+
+ SchemaExportGuard guard(out_schema);
+ if (out_schema != nullptr) {
+ // Export the schema, not the struct type, so as not to lose top-level metadata
+ RETURN_NOT_OK(ExportSchema(*batch.schema(), out_schema));
+ }
+ ArrayExporter exporter;
+ RETURN_NOT_OK(exporter.Export(array->data()));
+ exporter.Finish(out);
+ guard.Detach();
+ return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C schema import
+
+namespace {
+
+static constexpr int64_t kMaxImportRecursionLevel = 64;
+
+Status InvalidFormatString(util::string_view v) {
+ return Status::Invalid("Invalid or unsupported format string: '", v, "'");
+}
+
+class FormatStringParser {
+ public:
+ FormatStringParser() {}
+
+ explicit FormatStringParser(util::string_view v) : view_(v), index_(0) {}
+
+ bool AtEnd() const { return index_ >= view_.length(); }
+
+ char Next() { return view_[index_++]; }
+
+ util::string_view Rest() { return view_.substr(index_); }
+
+ Status CheckNext(char c) {
+ if (AtEnd() || Next() != c) {
+ return Invalid();
+ }
+ return Status::OK();
+ }
+
+ Status CheckHasNext() {
+ if (AtEnd()) {
+ return Invalid();
+ }
+ return Status::OK();
+ }
+
+ Status CheckAtEnd() {
+ if (!AtEnd()) {
+ return Invalid();
+ }
+ return Status::OK();
+ }
+
+ template <typename IntType = int32_t>
+ Result<IntType> ParseInt(util::string_view v) {
+ using ArrowIntType = typename CTypeTraits<IntType>::ArrowType;
+ IntType value;
+ if (!internal::ParseValue<ArrowIntType>(v.data(), v.size(), &value)) {
+ return Invalid();
+ }
+ return value;
+ }
+
+ Result<TimeUnit::type> ParseTimeUnit() {
+ RETURN_NOT_OK(CheckHasNext());
+ switch (Next()) {
+ case 's':
+ return TimeUnit::SECOND;
+ case 'm':
+ return TimeUnit::MILLI;
+ case 'u':
+ return TimeUnit::MICRO;
+ case 'n':
+ return TimeUnit::NANO;
+ default:
+ return Invalid();
+ }
+ }
+
+ std::vector<util::string_view> Split(util::string_view v, char delim = ',') {
+ std::vector<util::string_view> parts;
+ size_t start = 0, end;
+ while (true) {
+ end = v.find_first_of(delim, start);
+ parts.push_back(v.substr(start, end - start));
+ if (end == util::string_view::npos) {
+ break;
+ }
+ start = end + 1;
+ }
+ return parts;
+ }
+
+ template <typename IntType = int32_t>
+ Result<std::vector<IntType>> ParseInts(util::string_view v) {
+ auto parts = Split(v);
+ std::vector<IntType> result;
+ result.reserve(parts.size());
+ for (const auto& p : parts) {
+ ARROW_ASSIGN_OR_RAISE(auto i, ParseInt<IntType>(p));
+ result.push_back(i);
+ }
+ return result;
+ }
+
+ Status Invalid() { return InvalidFormatString(view_); }
+
+ protected:
+ util::string_view view_;
+ size_t index_;
+};
+
+Result<std::shared_ptr<KeyValueMetadata>> DecodeMetadata(const char* metadata) {
+ auto read_int32 = [&](int32_t* out) -> Status {
+ int32_t v;
+ memcpy(&v, metadata, 4);
+ metadata += 4;
+ *out = v;
+ if (*out < 0) {
+ return Status::Invalid("Invalid encoded metadata string");
+ }
+ return Status::OK();
+ };
+
+ auto read_string = [&](std::string* out) -> Status {
+ int32_t len;
+ RETURN_NOT_OK(read_int32(&len));
+ out->resize(len);
+ if (len > 0) {
+ memcpy(&(*out)[0], metadata, len);
+ metadata += len;
+ }
+ return Status::OK();
+ };
+
+ if (metadata == nullptr) {
+ return nullptr;
+ }
+ int32_t npairs;
+ RETURN_NOT_OK(read_int32(&npairs));
+ if (npairs == 0) {
+ return nullptr;
+ }
+ std::vector<std::string> keys(npairs);
+ std::vector<std::string> values(npairs);
+ for (int32_t i = 0; i < npairs; ++i) {
+ RETURN_NOT_OK(read_string(&keys[i]));
+ RETURN_NOT_OK(read_string(&values[i]));
+ }
+ return key_value_metadata(std::move(keys), std::move(values));
+}
+
+struct SchemaImporter {
+ SchemaImporter() : c_struct_(nullptr), guard_(nullptr) {}
+
+ Status Import(struct ArrowSchema* src) {
+ if (ArrowSchemaIsReleased(src)) {
+ return Status::Invalid("Cannot import released ArrowSchema");
+ }
+ guard_.Reset(src);
+ recursion_level_ = 0;
+ c_struct_ = src;
+ return DoImport();
+ }
+
+ Result<std::shared_ptr<Field>> MakeField() const {
+ ARROW_ASSIGN_OR_RAISE(auto metadata, DecodeMetadata(c_struct_->metadata));
+ const char* name = c_struct_->name ? c_struct_->name : "";
+ bool nullable = (c_struct_->flags & ARROW_FLAG_NULLABLE) != 0;
+ return field(name, type_, nullable, std::move(metadata));
+ }
+
+ Result<std::shared_ptr<Schema>> MakeSchema() const {
+ if (type_->id() != Type::STRUCT) {
+ return Status::Invalid(
+ "Cannot import schema: ArrowSchema describes non-struct type ",
+ type_->ToString());
+ }
+ ARROW_ASSIGN_OR_RAISE(auto metadata, DecodeMetadata(c_struct_->metadata));
+ return schema(type_->fields(), std::move(metadata));
+ }
+
+ Result<std::shared_ptr<DataType>> MakeType() const { return type_; }
+
+ protected:
+ Status ImportChild(const SchemaImporter* parent, struct ArrowSchema* src) {
+ if (ArrowSchemaIsReleased(src)) {
+ return Status::Invalid("Cannot import released ArrowSchema");
+ }
+ recursion_level_ = parent->recursion_level_ + 1;
+ if (recursion_level_ >= kMaxImportRecursionLevel) {
+ return Status::Invalid("Recursion level in ArrowSchema struct exceeded");
+ }
+ // The ArrowSchema is owned by its parent, so don't release it ourselves
+ c_struct_ = src;
+ return DoImport();
+ }
+
+ Status ImportDict(const SchemaImporter* parent, struct ArrowSchema* src) {
+ return ImportChild(parent, src);
+ }
+
+ Status DoImport() {
+ // First import children (required for reconstituting parent type)
+ child_importers_.resize(c_struct_->n_children);
+ for (int64_t i = 0; i < c_struct_->n_children; ++i) {
+ DCHECK_NE(c_struct_->children[i], nullptr);
+ RETURN_NOT_OK(child_importers_[i].ImportChild(this, c_struct_->children[i]));
+ }
+
+ // Import main type
+ RETURN_NOT_OK(ProcessFormat());
+ DCHECK_NE(type_, nullptr);
+
+ // Import dictionary type
+ if (c_struct_->dictionary != nullptr) {
+ // Check this index type
+ if (!is_integer(type_->id())) {
+ return Status::Invalid(
+ "ArrowSchema struct has a dictionary but is not an integer type: ",
+ type_->ToString());
+ }
+ SchemaImporter dict_importer;
+ RETURN_NOT_OK(dict_importer.ImportDict(this, c_struct_->dictionary));
+ bool ordered = (c_struct_->flags & ARROW_FLAG_DICTIONARY_ORDERED) != 0;
+ type_ = dictionary(type_, dict_importer.type_, ordered);
+ }
+ return Status::OK();
+ }
+
+ Status ProcessFormat() {
+ f_parser_ = FormatStringParser(c_struct_->format);
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ switch (f_parser_.Next()) {
+ case 'n':
+ return ProcessPrimitive(null());
+ case 'b':
+ return ProcessPrimitive(boolean());
+ case 'c':
+ return ProcessPrimitive(int8());
+ case 'C':
+ return ProcessPrimitive(uint8());
+ case 's':
+ return ProcessPrimitive(int16());
+ case 'S':
+ return ProcessPrimitive(uint16());
+ case 'i':
+ return ProcessPrimitive(int32());
+ case 'I':
+ return ProcessPrimitive(uint32());
+ case 'l':
+ return ProcessPrimitive(int64());
+ case 'L':
+ return ProcessPrimitive(uint64());
+ case 'e':
+ return ProcessPrimitive(float16());
+ case 'f':
+ return ProcessPrimitive(float32());
+ case 'g':
+ return ProcessPrimitive(float64());
+ case 'u':
+ return ProcessPrimitive(utf8());
+ case 'U':
+ return ProcessPrimitive(large_utf8());
+ case 'z':
+ return ProcessPrimitive(binary());
+ case 'Z':
+ return ProcessPrimitive(large_binary());
+ case 'w':
+ return ProcessFixedSizeBinary();
+ case 'd':
+ return ProcessDecimal();
+ case 't':
+ return ProcessTemporal();
+ case '+':
+ return ProcessNested();
+ }
+ return f_parser_.Invalid();
+ }
+
+ Status ProcessTemporal() {
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ switch (f_parser_.Next()) {
+ case 'd':
+ return ProcessDate();
+ case 't':
+ return ProcessTime();
+ case 'D':
+ return ProcessDuration();
+ case 'i':
+ return ProcessInterval();
+ case 's':
+ return ProcessTimestamp();
+ }
+ return f_parser_.Invalid();
+ }
+
+ Status ProcessNested() {
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ switch (f_parser_.Next()) {
+ case 'l':
+ return ProcessListLike<ListType>();
+ case 'L':
+ return ProcessListLike<LargeListType>();
+ case 'w':
+ return ProcessFixedSizeList();
+ case 's':
+ return ProcessStruct();
+ case 'm':
+ return ProcessMap();
+ case 'u':
+ return ProcessUnion();
+ }
+ return f_parser_.Invalid();
+ }
+
+ Status ProcessDate() {
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ switch (f_parser_.Next()) {
+ case 'D':
+ return ProcessPrimitive(date32());
+ case 'm':
+ return ProcessPrimitive(date64());
+ }
+ return f_parser_.Invalid();
+ }
+
+ Status ProcessInterval() {
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ switch (f_parser_.Next()) {
+ case 'D':
+ return ProcessPrimitive(day_time_interval());
+ case 'M':
+ return ProcessPrimitive(month_interval());
+ }
+ return f_parser_.Invalid();
+ }
+
+ Status ProcessTime() {
+ ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
+ if (unit == TimeUnit::SECOND || unit == TimeUnit::MILLI) {
+ return ProcessPrimitive(time32(unit));
+ } else {
+ return ProcessPrimitive(time64(unit));
+ }
+ }
+
+ Status ProcessDuration() {
+ ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
+ return ProcessPrimitive(duration(unit));
+ }
+
+ Status ProcessTimestamp() {
+ ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
+ RETURN_NOT_OK(f_parser_.CheckNext(':'));
+ type_ = timestamp(unit, std::string(f_parser_.Rest()));
+ return Status::OK();
+ }
+
+ Status ProcessFixedSizeBinary() {
+ RETURN_NOT_OK(f_parser_.CheckNext(':'));
+ ARROW_ASSIGN_OR_RAISE(auto byte_width, f_parser_.ParseInt(f_parser_.Rest()));
+ if (byte_width < 0) {
+ return f_parser_.Invalid();
+ }
+ type_ = fixed_size_binary(byte_width);
+ return Status::OK();
+ }
+
+ Status ProcessDecimal() {
+ RETURN_NOT_OK(f_parser_.CheckNext(':'));
+ ARROW_ASSIGN_OR_RAISE(auto prec_scale, f_parser_.ParseInts(f_parser_.Rest()));
// 3 elements indicates bit width was communicated as well.
if (prec_scale.size() != 2 && prec_scale.size() != 3) {
- return f_parser_.Invalid();
- }
+ return f_parser_.Invalid();
+ }
if (prec_scale[0] <= 0) {
- return f_parser_.Invalid();
- }
+ return f_parser_.Invalid();
+ }
if (prec_scale.size() == 2 || prec_scale[2] == 128) {
type_ = decimal128(prec_scale[0], prec_scale[1]);
} else if (prec_scale[2] == 256) {
@@ -994,719 +994,719 @@ struct SchemaImporter {
} else {
return f_parser_.Invalid();
}
- return Status::OK();
- }
-
- Status ProcessPrimitive(const std::shared_ptr<DataType>& type) {
- RETURN_NOT_OK(f_parser_.CheckAtEnd());
- type_ = type;
- return CheckNoChildren(type);
- }
-
- template <typename ListType>
- Status ProcessListLike() {
- RETURN_NOT_OK(f_parser_.CheckAtEnd());
- RETURN_NOT_OK(CheckNumChildren(1));
- ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
- type_ = std::make_shared<ListType>(field);
- return Status::OK();
- }
-
- Status ProcessMap() {
- RETURN_NOT_OK(f_parser_.CheckAtEnd());
- RETURN_NOT_OK(CheckNumChildren(1));
- ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
- const auto& value_type = field->type();
- if (value_type->id() != Type::STRUCT) {
- return Status::Invalid("Imported map array has unexpected child field type: ",
- field->ToString());
- }
- if (value_type->num_fields() != 2) {
- return Status::Invalid("Imported map array has unexpected child field type: ",
- field->ToString());
- }
-
- bool keys_sorted = (c_struct_->flags & ARROW_FLAG_MAP_KEYS_SORTED);
- type_ = map(value_type->field(0)->type(), value_type->field(1)->type(), keys_sorted);
- return Status::OK();
- }
-
- Status ProcessFixedSizeList() {
- RETURN_NOT_OK(f_parser_.CheckNext(':'));
- ARROW_ASSIGN_OR_RAISE(auto list_size, f_parser_.ParseInt(f_parser_.Rest()));
- if (list_size < 0) {
- return f_parser_.Invalid();
- }
- RETURN_NOT_OK(CheckNumChildren(1));
- ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
- type_ = fixed_size_list(field, list_size);
- return Status::OK();
- }
-
- Status ProcessStruct() {
- RETURN_NOT_OK(f_parser_.CheckAtEnd());
- ARROW_ASSIGN_OR_RAISE(auto fields, MakeChildFields());
- type_ = struct_(std::move(fields));
- return Status::OK();
- }
-
- Status ProcessUnion() {
- RETURN_NOT_OK(f_parser_.CheckHasNext());
- UnionMode::type mode;
- switch (f_parser_.Next()) {
- case 'd':
- mode = UnionMode::DENSE;
- break;
- case 's':
- mode = UnionMode::SPARSE;
- break;
- default:
- return f_parser_.Invalid();
- }
- RETURN_NOT_OK(f_parser_.CheckNext(':'));
- ARROW_ASSIGN_OR_RAISE(auto type_codes, f_parser_.ParseInts<int8_t>(f_parser_.Rest()));
- ARROW_ASSIGN_OR_RAISE(auto fields, MakeChildFields());
- if (fields.size() != type_codes.size()) {
- return Status::Invalid(
- "ArrowArray struct number of children incompatible with format string "
- "(mismatching number of union type codes) ",
- "'", c_struct_->format, "'");
- }
- for (const auto code : type_codes) {
- if (code < 0) {
- return Status::Invalid("Negative type code in union: format string '",
- c_struct_->format, "'");
- }
- }
- if (mode == UnionMode::SPARSE) {
- type_ = sparse_union(std::move(fields), std::move(type_codes));
- } else {
- type_ = dense_union(std::move(fields), std::move(type_codes));
- }
- return Status::OK();
- }
-
- Result<std::shared_ptr<Field>> MakeChildField(int64_t child_id) {
- const auto& child = child_importers_[child_id];
- if (child.c_struct_->name == nullptr) {
- return Status::Invalid("Expected non-null name in imported array child");
- }
- return child.MakeField();
- }
-
- Result<std::vector<std::shared_ptr<Field>>> MakeChildFields() {
- std::vector<std::shared_ptr<Field>> fields(child_importers_.size());
- for (int64_t i = 0; i < static_cast<int64_t>(child_importers_.size()); ++i) {
- ARROW_ASSIGN_OR_RAISE(fields[i], MakeChildField(i));
- }
- return fields;
- }
-
- Status CheckNoChildren(const std::shared_ptr<DataType>& type) {
- return CheckNumChildren(type, 0);
- }
-
- Status CheckNumChildren(const std::shared_ptr<DataType>& type, int64_t n_children) {
- if (c_struct_->n_children != n_children) {
- return Status::Invalid("Expected ", n_children, " children for imported type ",
- *type, ", ArrowArray struct has ", c_struct_->n_children);
- }
- return Status::OK();
- }
-
- Status CheckNumChildren(int64_t n_children) {
- if (c_struct_->n_children != n_children) {
- return Status::Invalid("Expected ", n_children, " children for imported format '",
- c_struct_->format, "', ArrowArray struct has ",
- c_struct_->n_children);
- }
- return Status::OK();
- }
-
- struct ArrowSchema* c_struct_;
- SchemaExportGuard guard_;
- FormatStringParser f_parser_;
- int64_t recursion_level_;
- std::vector<SchemaImporter> child_importers_;
- std::shared_ptr<DataType> type_;
-};
-
-} // namespace
-
-Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema) {
- SchemaImporter importer;
- RETURN_NOT_OK(importer.Import(schema));
- return importer.MakeType();
-}
-
-Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema) {
- SchemaImporter importer;
- RETURN_NOT_OK(importer.Import(schema));
- return importer.MakeField();
-}
-
-Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema) {
- SchemaImporter importer;
- RETURN_NOT_OK(importer.Import(schema));
- return importer.MakeSchema();
-}
-
-//////////////////////////////////////////////////////////////////////////
-// C data import
-
-namespace {
-
-// A wrapper struct for an imported C ArrowArray.
-// The ArrowArray is released on destruction.
-struct ImportedArrayData {
- struct ArrowArray array_;
-
- ImportedArrayData() {
- ArrowArrayMarkReleased(&array_); // Initially released
- }
-
- void Release() {
- if (!ArrowArrayIsReleased(&array_)) {
- ArrowArrayRelease(&array_);
- DCHECK(ArrowArrayIsReleased(&array_));
- }
- }
-
- ~ImportedArrayData() { Release(); }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(ImportedArrayData);
-};
-
-// A buffer wrapping an imported piece of data.
-class ImportedBuffer : public Buffer {
- public:
- ImportedBuffer(const uint8_t* data, int64_t size,
- std::shared_ptr<ImportedArrayData> import)
- : Buffer(data, size), import_(std::move(import)) {}
-
- ~ImportedBuffer() override {}
-
- protected:
- std::shared_ptr<ImportedArrayData> import_;
-};
-
-struct ArrayImporter {
- explicit ArrayImporter(const std::shared_ptr<DataType>& type) : type_(type) {}
-
- Status Import(struct ArrowArray* src) {
- if (ArrowArrayIsReleased(src)) {
- return Status::Invalid("Cannot import released ArrowArray");
- }
- recursion_level_ = 0;
- import_ = std::make_shared<ImportedArrayData>();
- c_struct_ = &import_->array_;
- ArrowArrayMove(src, c_struct_);
- return DoImport();
- }
-
- Result<std::shared_ptr<Array>> MakeArray() {
- DCHECK_NE(data_, nullptr);
- return ::arrow::MakeArray(data_);
- }
-
- std::shared_ptr<ArrayData> GetArrayData() {
- DCHECK_NE(data_, nullptr);
- return data_;
- }
-
- Result<std::shared_ptr<RecordBatch>> MakeRecordBatch(std::shared_ptr<Schema> schema) {
- DCHECK_NE(data_, nullptr);
- if (data_->GetNullCount() != 0) {
- return Status::Invalid(
- "ArrowArray struct has non-zero null count, "
- "cannot be imported as RecordBatch");
- }
- if (data_->offset != 0) {
- return Status::Invalid(
- "ArrowArray struct has non-zero offset, "
- "cannot be imported as RecordBatch");
- }
- return RecordBatch::Make(std::move(schema), data_->length,
- std::move(data_->child_data));
- }
-
- Status ImportChild(const ArrayImporter* parent, struct ArrowArray* src) {
- if (ArrowArrayIsReleased(src)) {
- return Status::Invalid("Cannot import released ArrowArray");
- }
- recursion_level_ = parent->recursion_level_ + 1;
- if (recursion_level_ >= kMaxImportRecursionLevel) {
- return Status::Invalid("Recursion level in ArrowArray struct exceeded");
- }
- // Child buffers will keep the entire parent import alive.
- // Perhaps we can move the child structs to an owned area
- // when the parent ImportedArrayData::Release() gets called,
- // but that is another level of complication.
- import_ = parent->import_;
- // The ArrowArray shouldn't be moved, it's owned by its parent
- c_struct_ = src;
- return DoImport();
- }
-
- Status ImportDict(const ArrayImporter* parent, struct ArrowArray* src) {
- return ImportChild(parent, src);
- }
-
- Status DoImport() {
- // First import children (required for reconstituting parent array data)
- const auto& fields = type_->fields();
- if (c_struct_->n_children != static_cast<int64_t>(fields.size())) {
- return Status::Invalid("ArrowArray struct has ", c_struct_->n_children,
- " children, expected ", fields.size(), " for type ",
- type_->ToString());
- }
- child_importers_.reserve(fields.size());
- for (int64_t i = 0; i < c_struct_->n_children; ++i) {
- DCHECK_NE(c_struct_->children[i], nullptr);
- child_importers_.emplace_back(fields[i]->type());
- RETURN_NOT_OK(child_importers_.back().ImportChild(this, c_struct_->children[i]));
- }
-
- // Import main data
- RETURN_NOT_OK(ImportMainData());
-
- bool is_dict_type = (type_->id() == Type::DICTIONARY);
- if (c_struct_->dictionary != nullptr) {
- if (!is_dict_type) {
- return Status::Invalid("Import type is ", type_->ToString(),
- " but dictionary field in ArrowArray struct is not null");
- }
- const auto& dict_type = checked_cast<const DictionaryType&>(*type_);
- // Import dictionary values
- ArrayImporter dict_importer(dict_type.value_type());
- RETURN_NOT_OK(dict_importer.ImportDict(this, c_struct_->dictionary));
- data_->dictionary = dict_importer.GetArrayData();
- } else {
- if (is_dict_type) {
- return Status::Invalid("Import type is ", type_->ToString(),
- " but dictionary field in ArrowArray struct is null");
- }
- }
- return Status::OK();
- }
-
- Status ImportMainData() { return VisitTypeInline(*type_, this); }
-
- Status Visit(const DataType& type) {
- return Status::NotImplemented("Cannot import array of type ", type_->ToString());
- }
-
- Status Visit(const FixedWidthType& type) { return ImportFixedSizePrimitive(); }
-
- Status Visit(const NullType& type) {
- RETURN_NOT_OK(CheckNoChildren());
- // XXX should we be lenient on the number of buffers?
- RETURN_NOT_OK(CheckNumBuffers(1));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportBitsBuffer(0));
- return Status::OK();
- }
-
- Status Visit(const StringType& type) { return ImportStringLike(type); }
-
- Status Visit(const BinaryType& type) { return ImportStringLike(type); }
-
- Status Visit(const LargeStringType& type) { return ImportStringLike(type); }
-
- Status Visit(const LargeBinaryType& type) { return ImportStringLike(type); }
-
- Status Visit(const ListType& type) { return ImportListLike(type); }
-
- Status Visit(const LargeListType& type) { return ImportListLike(type); }
-
- Status Visit(const FixedSizeListType& type) {
- RETURN_NOT_OK(CheckNumChildren(1));
- RETURN_NOT_OK(CheckNumBuffers(1));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- return Status::OK();
- }
-
- Status Visit(const StructType& type) {
- RETURN_NOT_OK(CheckNumBuffers(1));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- return Status::OK();
- }
-
- Status Visit(const UnionType& type) {
- auto mode = type.mode();
- if (mode == UnionMode::SPARSE) {
- RETURN_NOT_OK(CheckNumBuffers(2));
- } else {
- RETURN_NOT_OK(CheckNumBuffers(3));
- }
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- RETURN_NOT_OK(ImportFixedSizeBuffer(1, sizeof(int8_t)));
- if (mode == UnionMode::DENSE) {
- RETURN_NOT_OK(ImportFixedSizeBuffer(2, sizeof(int32_t)));
- }
- return Status::OK();
- }
-
- Status ImportFixedSizePrimitive() {
- const auto& fw_type = checked_cast<const FixedWidthType&>(*type_);
- RETURN_NOT_OK(CheckNoChildren());
- RETURN_NOT_OK(CheckNumBuffers(2));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- if (BitUtil::IsMultipleOf8(fw_type.bit_width())) {
- RETURN_NOT_OK(ImportFixedSizeBuffer(1, fw_type.bit_width() / 8));
- } else {
- DCHECK_EQ(fw_type.bit_width(), 1);
- RETURN_NOT_OK(ImportBitsBuffer(1));
- }
- return Status::OK();
- }
-
- template <typename StringType>
- Status ImportStringLike(const StringType& type) {
- RETURN_NOT_OK(CheckNoChildren());
- RETURN_NOT_OK(CheckNumBuffers(3));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- RETURN_NOT_OK(ImportOffsetsBuffer<typename StringType::offset_type>(1));
- RETURN_NOT_OK(ImportStringValuesBuffer<typename StringType::offset_type>(1, 2));
- return Status::OK();
- }
-
- template <typename ListType>
- Status ImportListLike(const ListType& type) {
- RETURN_NOT_OK(CheckNumChildren(1));
- RETURN_NOT_OK(CheckNumBuffers(2));
- RETURN_NOT_OK(AllocateArrayData());
- RETURN_NOT_OK(ImportNullBitmap());
- RETURN_NOT_OK(ImportOffsetsBuffer<typename ListType::offset_type>(1));
- return Status::OK();
- }
-
- Status CheckNoChildren() { return CheckNumChildren(0); }
-
- Status CheckNumChildren(int64_t n_children) {
- if (c_struct_->n_children != n_children) {
- return Status::Invalid("Expected ", n_children, " children for imported type ",
- type_->ToString(), ", ArrowArray struct has ",
- c_struct_->n_children);
- }
- return Status::OK();
- }
-
- Status CheckNumBuffers(int64_t n_buffers) {
- if (n_buffers != c_struct_->n_buffers) {
- return Status::Invalid("Expected ", n_buffers, " buffers for imported type ",
- type_->ToString(), ", ArrowArray struct has ",
- c_struct_->n_buffers);
- }
- return Status::OK();
- }
-
- Status AllocateArrayData() {
- DCHECK_EQ(data_, nullptr);
- data_ = std::make_shared<ArrayData>(type_, c_struct_->length, c_struct_->null_count,
- c_struct_->offset);
- data_->buffers.resize(static_cast<size_t>(c_struct_->n_buffers));
- data_->child_data.resize(static_cast<size_t>(c_struct_->n_children));
- DCHECK_EQ(child_importers_.size(), data_->child_data.size());
- std::transform(child_importers_.begin(), child_importers_.end(),
- data_->child_data.begin(),
- [](const ArrayImporter& child) { return child.data_; });
- return Status::OK();
- }
-
- Status ImportNullBitmap(int32_t buffer_id = 0) {
- RETURN_NOT_OK(ImportBitsBuffer(buffer_id));
- if (data_->null_count > 0 && data_->buffers[buffer_id] == nullptr) {
- return Status::Invalid(
- "ArrowArray struct has null bitmap buffer but non-zero null_count ",
- data_->null_count);
- }
- return Status::OK();
- }
-
- Status ImportBitsBuffer(int32_t buffer_id) {
- // Compute visible size of buffer
- int64_t buffer_size = BitUtil::BytesForBits(c_struct_->length + c_struct_->offset);
- return ImportBuffer(buffer_id, buffer_size);
- }
-
- Status ImportFixedSizeBuffer(int32_t buffer_id, int64_t byte_width) {
- // Compute visible size of buffer
- int64_t buffer_size = byte_width * (c_struct_->length + c_struct_->offset);
- return ImportBuffer(buffer_id, buffer_size);
- }
-
- template <typename OffsetType>
- Status ImportOffsetsBuffer(int32_t buffer_id) {
- // Compute visible size of buffer
- int64_t buffer_size =
- sizeof(OffsetType) * (c_struct_->length + c_struct_->offset + 1);
- return ImportBuffer(buffer_id, buffer_size);
- }
-
- template <typename OffsetType>
- Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
- int64_t byte_width = 1) {
- auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
- // Compute visible size of buffer
- int64_t buffer_size = byte_width * offsets[c_struct_->length];
- return ImportBuffer(buffer_id, buffer_size);
- }
-
- Status ImportBuffer(int32_t buffer_id, int64_t buffer_size) {
- std::shared_ptr<Buffer>* out = &data_->buffers[buffer_id];
- auto data = reinterpret_cast<const uint8_t*>(c_struct_->buffers[buffer_id]);
- if (data != nullptr) {
- *out = std::make_shared<ImportedBuffer>(data, buffer_size, import_);
- } else {
- out->reset();
- }
- return Status::OK();
- }
-
- struct ArrowArray* c_struct_;
- int64_t recursion_level_;
- const std::shared_ptr<DataType>& type_;
-
- std::shared_ptr<ImportedArrayData> import_;
- std::shared_ptr<ArrayData> data_;
- std::vector<ArrayImporter> child_importers_;
-};
-
-} // namespace
-
-Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
- std::shared_ptr<DataType> type) {
- ArrayImporter importer(type);
- RETURN_NOT_OK(importer.Import(array));
- return importer.MakeArray();
-}
-
-Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
- struct ArrowSchema* type) {
- auto maybe_type = ImportType(type);
- if (!maybe_type.ok()) {
- ArrowArrayRelease(array);
- return maybe_type.status();
- }
- return ImportArray(array, *maybe_type);
-}
-
-Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
- std::shared_ptr<Schema> schema) {
- auto type = struct_(schema->fields());
- ArrayImporter importer(type);
- RETURN_NOT_OK(importer.Import(array));
- return importer.MakeRecordBatch(std::move(schema));
-}
-
-Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
- struct ArrowSchema* schema) {
- auto maybe_schema = ImportSchema(schema);
- if (!maybe_schema.ok()) {
- ArrowArrayRelease(array);
- return maybe_schema.status();
- }
- return ImportRecordBatch(array, *maybe_schema);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// C stream export
-
-namespace {
-
-class ExportedArrayStream {
- public:
- struct PrivateData {
- explicit PrivateData(std::shared_ptr<RecordBatchReader> reader)
- : reader_(std::move(reader)) {}
-
- std::shared_ptr<RecordBatchReader> reader_;
- std::string last_error_;
-
- PrivateData() = default;
- ARROW_DISALLOW_COPY_AND_ASSIGN(PrivateData);
- };
-
- explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {}
-
- Status GetSchema(struct ArrowSchema* out_schema) {
- return ExportSchema(*reader()->schema(), out_schema);
- }
-
- Status GetNext(struct ArrowArray* out_array) {
- std::shared_ptr<RecordBatch> batch;
- RETURN_NOT_OK(reader()->ReadNext(&batch));
- if (batch == nullptr) {
- // End of stream
- ArrowArrayMarkReleased(out_array);
- return Status::OK();
- } else {
- return ExportRecordBatch(*batch, out_array);
- }
- }
-
- const char* GetLastError() {
- const auto& last_error = private_data()->last_error_;
- return last_error.empty() ? nullptr : last_error.c_str();
- }
-
- void Release() {
- if (ArrowArrayStreamIsReleased(stream_)) {
- return;
- }
- DCHECK_NE(private_data(), nullptr);
- delete private_data();
-
- ArrowArrayStreamMarkReleased(stream_);
- }
-
- // C-compatible callbacks
-
- static int StaticGetSchema(struct ArrowArrayStream* stream,
- struct ArrowSchema* out_schema) {
- ExportedArrayStream self{stream};
- return self.ToCError(self.GetSchema(out_schema));
- }
-
- static int StaticGetNext(struct ArrowArrayStream* stream,
- struct ArrowArray* out_array) {
- ExportedArrayStream self{stream};
- return self.ToCError(self.GetNext(out_array));
- }
-
- static void StaticRelease(struct ArrowArrayStream* stream) {
- ExportedArrayStream{stream}.Release();
- }
-
- static const char* StaticGetLastError(struct ArrowArrayStream* stream) {
- return ExportedArrayStream{stream}.GetLastError();
- }
-
- private:
- int ToCError(const Status& status) {
- if (ARROW_PREDICT_TRUE(status.ok())) {
- private_data()->last_error_.clear();
- return 0;
- }
- private_data()->last_error_ = status.ToString();
- switch (status.code()) {
- case StatusCode::IOError:
- return EIO;
- case StatusCode::NotImplemented:
- return ENOSYS;
- case StatusCode::OutOfMemory:
- return ENOMEM;
- default:
- return EINVAL; // Fallback for Invalid, TypeError, etc.
- }
- }
-
- PrivateData* private_data() {
- return reinterpret_cast<PrivateData*>(stream_->private_data);
- }
-
- const std::shared_ptr<RecordBatchReader>& reader() { return private_data()->reader_; }
-
- struct ArrowArrayStream* stream_;
-};
-
-} // namespace
-
-Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
- struct ArrowArrayStream* out) {
- out->get_schema = ExportedArrayStream::StaticGetSchema;
- out->get_next = ExportedArrayStream::StaticGetNext;
- out->get_last_error = ExportedArrayStream::StaticGetLastError;
- out->release = ExportedArrayStream::StaticRelease;
- out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
- return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-// C stream import
-
-namespace {
-
-class ArrayStreamBatchReader : public RecordBatchReader {
- public:
- explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) {
- ArrowArrayStreamMove(stream, &stream_);
- DCHECK(!ArrowArrayStreamIsReleased(&stream_));
- }
-
- ~ArrayStreamBatchReader() {
- ArrowArrayStreamRelease(&stream_);
- DCHECK(ArrowArrayStreamIsReleased(&stream_));
- }
-
- std::shared_ptr<Schema> schema() const override { return CacheSchema(); }
-
- Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
- struct ArrowArray c_array;
- RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
- if (ArrowArrayIsReleased(&c_array)) {
- // End of stream
- batch->reset();
- return Status::OK();
- } else {
- return ImportRecordBatch(&c_array, CacheSchema()).Value(batch);
- }
- }
-
- private:
- std::shared_ptr<Schema> CacheSchema() const {
- if (!schema_) {
- struct ArrowSchema c_schema;
- ARROW_CHECK_OK(StatusFromCError(stream_.get_schema(&stream_, &c_schema)));
- schema_ = ImportSchema(&c_schema).ValueOrDie();
- }
- return schema_;
- }
-
- Status StatusFromCError(int errno_like) const {
- if (ARROW_PREDICT_TRUE(errno_like == 0)) {
- return Status::OK();
- }
- StatusCode code;
- switch (errno_like) {
- case EDOM:
- case EINVAL:
- case ERANGE:
- code = StatusCode::Invalid;
- break;
- case ENOMEM:
- code = StatusCode::OutOfMemory;
- break;
- case ENOSYS:
- code = StatusCode::NotImplemented;
- default:
- code = StatusCode::IOError;
- break;
- }
- const char* last_error = stream_.get_last_error(&stream_);
- return Status(code, last_error ? std::string(last_error) : "");
- }
-
- mutable struct ArrowArrayStream stream_;
- mutable std::shared_ptr<Schema> schema_;
-};
-
-} // namespace
-
-Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
- struct ArrowArrayStream* stream) {
- if (ArrowArrayStreamIsReleased(stream)) {
- return Status::Invalid("Cannot import released ArrowArrayStream");
- }
- // XXX should we call get_schema() here to avoid crashing on error?
- return std::make_shared<ArrayStreamBatchReader>(stream);
-}
-
-} // namespace arrow
+ return Status::OK();
+ }
+
+ Status ProcessPrimitive(const std::shared_ptr<DataType>& type) {
+ RETURN_NOT_OK(f_parser_.CheckAtEnd());
+ type_ = type;
+ return CheckNoChildren(type);
+ }
+
+ template <typename ListType>
+ Status ProcessListLike() {
+ RETURN_NOT_OK(f_parser_.CheckAtEnd());
+ RETURN_NOT_OK(CheckNumChildren(1));
+ ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
+ type_ = std::make_shared<ListType>(field);
+ return Status::OK();
+ }
+
+ Status ProcessMap() {
+ RETURN_NOT_OK(f_parser_.CheckAtEnd());
+ RETURN_NOT_OK(CheckNumChildren(1));
+ ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
+ const auto& value_type = field->type();
+ if (value_type->id() != Type::STRUCT) {
+ return Status::Invalid("Imported map array has unexpected child field type: ",
+ field->ToString());
+ }
+ if (value_type->num_fields() != 2) {
+ return Status::Invalid("Imported map array has unexpected child field type: ",
+ field->ToString());
+ }
+
+ bool keys_sorted = (c_struct_->flags & ARROW_FLAG_MAP_KEYS_SORTED);
+ type_ = map(value_type->field(0)->type(), value_type->field(1)->type(), keys_sorted);
+ return Status::OK();
+ }
+
+ Status ProcessFixedSizeList() {
+ RETURN_NOT_OK(f_parser_.CheckNext(':'));
+ ARROW_ASSIGN_OR_RAISE(auto list_size, f_parser_.ParseInt(f_parser_.Rest()));
+ if (list_size < 0) {
+ return f_parser_.Invalid();
+ }
+ RETURN_NOT_OK(CheckNumChildren(1));
+ ARROW_ASSIGN_OR_RAISE(auto field, MakeChildField(0));
+ type_ = fixed_size_list(field, list_size);
+ return Status::OK();
+ }
+
+ Status ProcessStruct() {
+ RETURN_NOT_OK(f_parser_.CheckAtEnd());
+ ARROW_ASSIGN_OR_RAISE(auto fields, MakeChildFields());
+ type_ = struct_(std::move(fields));
+ return Status::OK();
+ }
+
+ Status ProcessUnion() {
+ RETURN_NOT_OK(f_parser_.CheckHasNext());
+ UnionMode::type mode;
+ switch (f_parser_.Next()) {
+ case 'd':
+ mode = UnionMode::DENSE;
+ break;
+ case 's':
+ mode = UnionMode::SPARSE;
+ break;
+ default:
+ return f_parser_.Invalid();
+ }
+ RETURN_NOT_OK(f_parser_.CheckNext(':'));
+ ARROW_ASSIGN_OR_RAISE(auto type_codes, f_parser_.ParseInts<int8_t>(f_parser_.Rest()));
+ ARROW_ASSIGN_OR_RAISE(auto fields, MakeChildFields());
+ if (fields.size() != type_codes.size()) {
+ return Status::Invalid(
+ "ArrowArray struct number of children incompatible with format string "
+ "(mismatching number of union type codes) ",
+ "'", c_struct_->format, "'");
+ }
+ for (const auto code : type_codes) {
+ if (code < 0) {
+ return Status::Invalid("Negative type code in union: format string '",
+ c_struct_->format, "'");
+ }
+ }
+ if (mode == UnionMode::SPARSE) {
+ type_ = sparse_union(std::move(fields), std::move(type_codes));
+ } else {
+ type_ = dense_union(std::move(fields), std::move(type_codes));
+ }
+ return Status::OK();
+ }
+
+ Result<std::shared_ptr<Field>> MakeChildField(int64_t child_id) {
+ const auto& child = child_importers_[child_id];
+ if (child.c_struct_->name == nullptr) {
+ return Status::Invalid("Expected non-null name in imported array child");
+ }
+ return child.MakeField();
+ }
+
+ Result<std::vector<std::shared_ptr<Field>>> MakeChildFields() {
+ std::vector<std::shared_ptr<Field>> fields(child_importers_.size());
+ for (int64_t i = 0; i < static_cast<int64_t>(child_importers_.size()); ++i) {
+ ARROW_ASSIGN_OR_RAISE(fields[i], MakeChildField(i));
+ }
+ return fields;
+ }
+
+ Status CheckNoChildren(const std::shared_ptr<DataType>& type) {
+ return CheckNumChildren(type, 0);
+ }
+
+ Status CheckNumChildren(const std::shared_ptr<DataType>& type, int64_t n_children) {
+ if (c_struct_->n_children != n_children) {
+ return Status::Invalid("Expected ", n_children, " children for imported type ",
+ *type, ", ArrowArray struct has ", c_struct_->n_children);
+ }
+ return Status::OK();
+ }
+
+ Status CheckNumChildren(int64_t n_children) {
+ if (c_struct_->n_children != n_children) {
+ return Status::Invalid("Expected ", n_children, " children for imported format '",
+ c_struct_->format, "', ArrowArray struct has ",
+ c_struct_->n_children);
+ }
+ return Status::OK();
+ }
+
+ struct ArrowSchema* c_struct_;
+ SchemaExportGuard guard_;
+ FormatStringParser f_parser_;
+ int64_t recursion_level_;
+ std::vector<SchemaImporter> child_importers_;
+ std::shared_ptr<DataType> type_;
+};
+
+} // namespace
+
+Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema) {
+ SchemaImporter importer;
+ RETURN_NOT_OK(importer.Import(schema));
+ return importer.MakeType();
+}
+
+Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema) {
+ SchemaImporter importer;
+ RETURN_NOT_OK(importer.Import(schema));
+ return importer.MakeField();
+}
+
+Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema) {
+ SchemaImporter importer;
+ RETURN_NOT_OK(importer.Import(schema));
+ return importer.MakeSchema();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C data import
+
+namespace {
+
+// A wrapper struct for an imported C ArrowArray.
+// The ArrowArray is released on destruction.
+struct ImportedArrayData {
+ struct ArrowArray array_;
+
+ ImportedArrayData() {
+ ArrowArrayMarkReleased(&array_); // Initially released
+ }
+
+ void Release() {
+ if (!ArrowArrayIsReleased(&array_)) {
+ ArrowArrayRelease(&array_);
+ DCHECK(ArrowArrayIsReleased(&array_));
+ }
+ }
+
+ ~ImportedArrayData() { Release(); }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ImportedArrayData);
+};
+
+// A buffer wrapping an imported piece of data.
+class ImportedBuffer : public Buffer {
+ public:
+ ImportedBuffer(const uint8_t* data, int64_t size,
+ std::shared_ptr<ImportedArrayData> import)
+ : Buffer(data, size), import_(std::move(import)) {}
+
+ ~ImportedBuffer() override {}
+
+ protected:
+ std::shared_ptr<ImportedArrayData> import_;
+};
+
+struct ArrayImporter {
+ explicit ArrayImporter(const std::shared_ptr<DataType>& type) : type_(type) {}
+
+ Status Import(struct ArrowArray* src) {
+ if (ArrowArrayIsReleased(src)) {
+ return Status::Invalid("Cannot import released ArrowArray");
+ }
+ recursion_level_ = 0;
+ import_ = std::make_shared<ImportedArrayData>();
+ c_struct_ = &import_->array_;
+ ArrowArrayMove(src, c_struct_);
+ return DoImport();
+ }
+
+ Result<std::shared_ptr<Array>> MakeArray() {
+ DCHECK_NE(data_, nullptr);
+ return ::arrow::MakeArray(data_);
+ }
+
+ std::shared_ptr<ArrayData> GetArrayData() {
+ DCHECK_NE(data_, nullptr);
+ return data_;
+ }
+
+ Result<std::shared_ptr<RecordBatch>> MakeRecordBatch(std::shared_ptr<Schema> schema) {
+ DCHECK_NE(data_, nullptr);
+ if (data_->GetNullCount() != 0) {
+ return Status::Invalid(
+ "ArrowArray struct has non-zero null count, "
+ "cannot be imported as RecordBatch");
+ }
+ if (data_->offset != 0) {
+ return Status::Invalid(
+ "ArrowArray struct has non-zero offset, "
+ "cannot be imported as RecordBatch");
+ }
+ return RecordBatch::Make(std::move(schema), data_->length,
+ std::move(data_->child_data));
+ }
+
+ Status ImportChild(const ArrayImporter* parent, struct ArrowArray* src) {
+ if (ArrowArrayIsReleased(src)) {
+ return Status::Invalid("Cannot import released ArrowArray");
+ }
+ recursion_level_ = parent->recursion_level_ + 1;
+ if (recursion_level_ >= kMaxImportRecursionLevel) {
+ return Status::Invalid("Recursion level in ArrowArray struct exceeded");
+ }
+ // Child buffers will keep the entire parent import alive.
+ // Perhaps we can move the child structs to an owned area
+ // when the parent ImportedArrayData::Release() gets called,
+ // but that is another level of complication.
+ import_ = parent->import_;
+ // The ArrowArray shouldn't be moved, it's owned by its parent
+ c_struct_ = src;
+ return DoImport();
+ }
+
+ Status ImportDict(const ArrayImporter* parent, struct ArrowArray* src) {
+ return ImportChild(parent, src);
+ }
+
+ Status DoImport() {
+ // First import children (required for reconstituting parent array data)
+ const auto& fields = type_->fields();
+ if (c_struct_->n_children != static_cast<int64_t>(fields.size())) {
+ return Status::Invalid("ArrowArray struct has ", c_struct_->n_children,
+ " children, expected ", fields.size(), " for type ",
+ type_->ToString());
+ }
+ child_importers_.reserve(fields.size());
+ for (int64_t i = 0; i < c_struct_->n_children; ++i) {
+ DCHECK_NE(c_struct_->children[i], nullptr);
+ child_importers_.emplace_back(fields[i]->type());
+ RETURN_NOT_OK(child_importers_.back().ImportChild(this, c_struct_->children[i]));
+ }
+
+ // Import main data
+ RETURN_NOT_OK(ImportMainData());
+
+ bool is_dict_type = (type_->id() == Type::DICTIONARY);
+ if (c_struct_->dictionary != nullptr) {
+ if (!is_dict_type) {
+ return Status::Invalid("Import type is ", type_->ToString(),
+ " but dictionary field in ArrowArray struct is not null");
+ }
+ const auto& dict_type = checked_cast<const DictionaryType&>(*type_);
+ // Import dictionary values
+ ArrayImporter dict_importer(dict_type.value_type());
+ RETURN_NOT_OK(dict_importer.ImportDict(this, c_struct_->dictionary));
+ data_->dictionary = dict_importer.GetArrayData();
+ } else {
+ if (is_dict_type) {
+ return Status::Invalid("Import type is ", type_->ToString(),
+ " but dictionary field in ArrowArray struct is null");
+ }
+ }
+ return Status::OK();
+ }
+
+ Status ImportMainData() { return VisitTypeInline(*type_, this); }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("Cannot import array of type ", type_->ToString());
+ }
+
+ Status Visit(const FixedWidthType& type) { return ImportFixedSizePrimitive(); }
+
+ Status Visit(const NullType& type) {
+ RETURN_NOT_OK(CheckNoChildren());
+ // XXX should we be lenient on the number of buffers?
+ RETURN_NOT_OK(CheckNumBuffers(1));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportBitsBuffer(0));
+ return Status::OK();
+ }
+
+ Status Visit(const StringType& type) { return ImportStringLike(type); }
+
+ Status Visit(const BinaryType& type) { return ImportStringLike(type); }
+
+ Status Visit(const LargeStringType& type) { return ImportStringLike(type); }
+
+ Status Visit(const LargeBinaryType& type) { return ImportStringLike(type); }
+
+ Status Visit(const ListType& type) { return ImportListLike(type); }
+
+ Status Visit(const LargeListType& type) { return ImportListLike(type); }
+
+ Status Visit(const FixedSizeListType& type) {
+ RETURN_NOT_OK(CheckNumChildren(1));
+ RETURN_NOT_OK(CheckNumBuffers(1));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ return Status::OK();
+ }
+
+ Status Visit(const StructType& type) {
+ RETURN_NOT_OK(CheckNumBuffers(1));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& type) {
+ auto mode = type.mode();
+ if (mode == UnionMode::SPARSE) {
+ RETURN_NOT_OK(CheckNumBuffers(2));
+ } else {
+ RETURN_NOT_OK(CheckNumBuffers(3));
+ }
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ RETURN_NOT_OK(ImportFixedSizeBuffer(1, sizeof(int8_t)));
+ if (mode == UnionMode::DENSE) {
+ RETURN_NOT_OK(ImportFixedSizeBuffer(2, sizeof(int32_t)));
+ }
+ return Status::OK();
+ }
+
+ Status ImportFixedSizePrimitive() {
+ const auto& fw_type = checked_cast<const FixedWidthType&>(*type_);
+ RETURN_NOT_OK(CheckNoChildren());
+ RETURN_NOT_OK(CheckNumBuffers(2));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ if (BitUtil::IsMultipleOf8(fw_type.bit_width())) {
+ RETURN_NOT_OK(ImportFixedSizeBuffer(1, fw_type.bit_width() / 8));
+ } else {
+ DCHECK_EQ(fw_type.bit_width(), 1);
+ RETURN_NOT_OK(ImportBitsBuffer(1));
+ }
+ return Status::OK();
+ }
+
+ template <typename StringType>
+ Status ImportStringLike(const StringType& type) {
+ RETURN_NOT_OK(CheckNoChildren());
+ RETURN_NOT_OK(CheckNumBuffers(3));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ RETURN_NOT_OK(ImportOffsetsBuffer<typename StringType::offset_type>(1));
+ RETURN_NOT_OK(ImportStringValuesBuffer<typename StringType::offset_type>(1, 2));
+ return Status::OK();
+ }
+
+ template <typename ListType>
+ Status ImportListLike(const ListType& type) {
+ RETURN_NOT_OK(CheckNumChildren(1));
+ RETURN_NOT_OK(CheckNumBuffers(2));
+ RETURN_NOT_OK(AllocateArrayData());
+ RETURN_NOT_OK(ImportNullBitmap());
+ RETURN_NOT_OK(ImportOffsetsBuffer<typename ListType::offset_type>(1));
+ return Status::OK();
+ }
+
+ Status CheckNoChildren() { return CheckNumChildren(0); }
+
+ Status CheckNumChildren(int64_t n_children) {
+ if (c_struct_->n_children != n_children) {
+ return Status::Invalid("Expected ", n_children, " children for imported type ",
+ type_->ToString(), ", ArrowArray struct has ",
+ c_struct_->n_children);
+ }
+ return Status::OK();
+ }
+
+ Status CheckNumBuffers(int64_t n_buffers) {
+ if (n_buffers != c_struct_->n_buffers) {
+ return Status::Invalid("Expected ", n_buffers, " buffers for imported type ",
+ type_->ToString(), ", ArrowArray struct has ",
+ c_struct_->n_buffers);
+ }
+ return Status::OK();
+ }
+
+ Status AllocateArrayData() {
+ DCHECK_EQ(data_, nullptr);
+ data_ = std::make_shared<ArrayData>(type_, c_struct_->length, c_struct_->null_count,
+ c_struct_->offset);
+ data_->buffers.resize(static_cast<size_t>(c_struct_->n_buffers));
+ data_->child_data.resize(static_cast<size_t>(c_struct_->n_children));
+ DCHECK_EQ(child_importers_.size(), data_->child_data.size());
+ std::transform(child_importers_.begin(), child_importers_.end(),
+ data_->child_data.begin(),
+ [](const ArrayImporter& child) { return child.data_; });
+ return Status::OK();
+ }
+
+ Status ImportNullBitmap(int32_t buffer_id = 0) {
+ RETURN_NOT_OK(ImportBitsBuffer(buffer_id));
+ if (data_->null_count > 0 && data_->buffers[buffer_id] == nullptr) {
+ return Status::Invalid(
+ "ArrowArray struct has null bitmap buffer but non-zero null_count ",
+ data_->null_count);
+ }
+ return Status::OK();
+ }
+
+ Status ImportBitsBuffer(int32_t buffer_id) {
+ // Compute visible size of buffer
+ int64_t buffer_size = BitUtil::BytesForBits(c_struct_->length + c_struct_->offset);
+ return ImportBuffer(buffer_id, buffer_size);
+ }
+
+ Status ImportFixedSizeBuffer(int32_t buffer_id, int64_t byte_width) {
+ // Compute visible size of buffer
+ int64_t buffer_size = byte_width * (c_struct_->length + c_struct_->offset);
+ return ImportBuffer(buffer_id, buffer_size);
+ }
+
+ template <typename OffsetType>
+ Status ImportOffsetsBuffer(int32_t buffer_id) {
+ // Compute visible size of buffer
+ int64_t buffer_size =
+ sizeof(OffsetType) * (c_struct_->length + c_struct_->offset + 1);
+ return ImportBuffer(buffer_id, buffer_size);
+ }
+
+ template <typename OffsetType>
+ Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
+ int64_t byte_width = 1) {
+ auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+ // Compute visible size of buffer
+ int64_t buffer_size = byte_width * offsets[c_struct_->length];
+ return ImportBuffer(buffer_id, buffer_size);
+ }
+
+ Status ImportBuffer(int32_t buffer_id, int64_t buffer_size) {
+ std::shared_ptr<Buffer>* out = &data_->buffers[buffer_id];
+ auto data = reinterpret_cast<const uint8_t*>(c_struct_->buffers[buffer_id]);
+ if (data != nullptr) {
+ *out = std::make_shared<ImportedBuffer>(data, buffer_size, import_);
+ } else {
+ out->reset();
+ }
+ return Status::OK();
+ }
+
+ struct ArrowArray* c_struct_;
+ int64_t recursion_level_;
+ const std::shared_ptr<DataType>& type_;
+
+ std::shared_ptr<ImportedArrayData> import_;
+ std::shared_ptr<ArrayData> data_;
+ std::vector<ArrayImporter> child_importers_;
+};
+
+} // namespace
+
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+ std::shared_ptr<DataType> type) {
+ ArrayImporter importer(type);
+ RETURN_NOT_OK(importer.Import(array));
+ return importer.MakeArray();
+}
+
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+ struct ArrowSchema* type) {
+ auto maybe_type = ImportType(type);
+ if (!maybe_type.ok()) {
+ ArrowArrayRelease(array);
+ return maybe_type.status();
+ }
+ return ImportArray(array, *maybe_type);
+}
+
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+ std::shared_ptr<Schema> schema) {
+ auto type = struct_(schema->fields());
+ ArrayImporter importer(type);
+ RETURN_NOT_OK(importer.Import(array));
+ return importer.MakeRecordBatch(std::move(schema));
+}
+
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+ struct ArrowSchema* schema) {
+ auto maybe_schema = ImportSchema(schema);
+ if (!maybe_schema.ok()) {
+ ArrowArrayRelease(array);
+ return maybe_schema.status();
+ }
+ return ImportRecordBatch(array, *maybe_schema);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C stream export
+
+namespace {
+
+class ExportedArrayStream {
+ public:
+ struct PrivateData {
+ explicit PrivateData(std::shared_ptr<RecordBatchReader> reader)
+ : reader_(std::move(reader)) {}
+
+ std::shared_ptr<RecordBatchReader> reader_;
+ std::string last_error_;
+
+ PrivateData() = default;
+ ARROW_DISALLOW_COPY_AND_ASSIGN(PrivateData);
+ };
+
+ explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {}
+
+ Status GetSchema(struct ArrowSchema* out_schema) {
+ return ExportSchema(*reader()->schema(), out_schema);
+ }
+
+ Status GetNext(struct ArrowArray* out_array) {
+ std::shared_ptr<RecordBatch> batch;
+ RETURN_NOT_OK(reader()->ReadNext(&batch));
+ if (batch == nullptr) {
+ // End of stream
+ ArrowArrayMarkReleased(out_array);
+ return Status::OK();
+ } else {
+ return ExportRecordBatch(*batch, out_array);
+ }
+ }
+
+ const char* GetLastError() {
+ const auto& last_error = private_data()->last_error_;
+ return last_error.empty() ? nullptr : last_error.c_str();
+ }
+
+ void Release() {
+ if (ArrowArrayStreamIsReleased(stream_)) {
+ return;
+ }
+ DCHECK_NE(private_data(), nullptr);
+ delete private_data();
+
+ ArrowArrayStreamMarkReleased(stream_);
+ }
+
+ // C-compatible callbacks
+
+ static int StaticGetSchema(struct ArrowArrayStream* stream,
+ struct ArrowSchema* out_schema) {
+ ExportedArrayStream self{stream};
+ return self.ToCError(self.GetSchema(out_schema));
+ }
+
+ static int StaticGetNext(struct ArrowArrayStream* stream,
+ struct ArrowArray* out_array) {
+ ExportedArrayStream self{stream};
+ return self.ToCError(self.GetNext(out_array));
+ }
+
+ static void StaticRelease(struct ArrowArrayStream* stream) {
+ ExportedArrayStream{stream}.Release();
+ }
+
+ static const char* StaticGetLastError(struct ArrowArrayStream* stream) {
+ return ExportedArrayStream{stream}.GetLastError();
+ }
+
+ private:
+ int ToCError(const Status& status) {
+ if (ARROW_PREDICT_TRUE(status.ok())) {
+ private_data()->last_error_.clear();
+ return 0;
+ }
+ private_data()->last_error_ = status.ToString();
+ switch (status.code()) {
+ case StatusCode::IOError:
+ return EIO;
+ case StatusCode::NotImplemented:
+ return ENOSYS;
+ case StatusCode::OutOfMemory:
+ return ENOMEM;
+ default:
+ return EINVAL; // Fallback for Invalid, TypeError, etc.
+ }
+ }
+
+ PrivateData* private_data() {
+ return reinterpret_cast<PrivateData*>(stream_->private_data);
+ }
+
+ const std::shared_ptr<RecordBatchReader>& reader() { return private_data()->reader_; }
+
+ struct ArrowArrayStream* stream_;
+};
+
+} // namespace
+
+Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
+ struct ArrowArrayStream* out) {
+ out->get_schema = ExportedArrayStream::StaticGetSchema;
+ out->get_next = ExportedArrayStream::StaticGetNext;
+ out->get_last_error = ExportedArrayStream::StaticGetLastError;
+ out->release = ExportedArrayStream::StaticRelease;
+ out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
+ return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C stream import
+
+namespace {
+
+class ArrayStreamBatchReader : public RecordBatchReader {
+ public:
+ explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) {
+ ArrowArrayStreamMove(stream, &stream_);
+ DCHECK(!ArrowArrayStreamIsReleased(&stream_));
+ }
+
+ ~ArrayStreamBatchReader() {
+ ArrowArrayStreamRelease(&stream_);
+ DCHECK(ArrowArrayStreamIsReleased(&stream_));
+ }
+
+ std::shared_ptr<Schema> schema() const override { return CacheSchema(); }
+
+ Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+ struct ArrowArray c_array;
+ RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
+ if (ArrowArrayIsReleased(&c_array)) {
+ // End of stream
+ batch->reset();
+ return Status::OK();
+ } else {
+ return ImportRecordBatch(&c_array, CacheSchema()).Value(batch);
+ }
+ }
+
+ private:
+ std::shared_ptr<Schema> CacheSchema() const {
+ if (!schema_) {
+ struct ArrowSchema c_schema;
+ ARROW_CHECK_OK(StatusFromCError(stream_.get_schema(&stream_, &c_schema)));
+ schema_ = ImportSchema(&c_schema).ValueOrDie();
+ }
+ return schema_;
+ }
+
+ Status StatusFromCError(int errno_like) const {
+ if (ARROW_PREDICT_TRUE(errno_like == 0)) {
+ return Status::OK();
+ }
+ StatusCode code;
+ switch (errno_like) {
+ case EDOM:
+ case EINVAL:
+ case ERANGE:
+ code = StatusCode::Invalid;
+ break;
+ case ENOMEM:
+ code = StatusCode::OutOfMemory;
+ break;
+ case ENOSYS:
+ code = StatusCode::NotImplemented;
+ default:
+ code = StatusCode::IOError;
+ break;
+ }
+ const char* last_error = stream_.get_last_error(&stream_);
+ return Status(code, last_error ? std::string(last_error) : "");
+ }
+
+ mutable struct ArrowArrayStream stream_;
+ mutable std::shared_ptr<Schema> schema_;
+};
+
+} // namespace
+
+Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
+ struct ArrowArrayStream* stream) {
+ if (ArrowArrayStreamIsReleased(stream)) {
+ return Status::Invalid("Cannot import released ArrowArrayStream");
+ }
+ // XXX should we call get_schema() here to avoid crashing on error?
+ return std::make_shared<ArrayStreamBatchReader>(stream);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.h b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.h
index 294f53e49fb..a60a8031b1e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.h
@@ -1,197 +1,197 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include "arrow/c/abi.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \defgroup c-data-interface Functions for working with the C data interface.
-///
-/// @{
-
-/// \brief Export C++ DataType using the C data interface format.
-///
-/// The root type is considered to have empty name and metadata.
-/// If you want the root type to have a name and/or metadata, pass
-/// a Field instead.
-///
-/// \param[in] type DataType object to export
-/// \param[out] out C struct where to export the datatype
-ARROW_EXPORT
-Status ExportType(const DataType& type, struct ArrowSchema* out);
-
-/// \brief Export C++ Field using the C data interface format.
-///
-/// \param[in] field Field object to export
-/// \param[out] out C struct where to export the field
-ARROW_EXPORT
-Status ExportField(const Field& field, struct ArrowSchema* out);
-
-/// \brief Export C++ Schema using the C data interface format.
-///
-/// \param[in] schema Schema object to export
-/// \param[out] out C struct where to export the field
-ARROW_EXPORT
-Status ExportSchema(const Schema& schema, struct ArrowSchema* out);
-
-/// \brief Export C++ Array using the C data interface format.
-///
-/// The resulting ArrowArray struct keeps the array data and buffers alive
-/// until its release callback is called by the consumer.
-///
-/// \param[in] array Array object to export
-/// \param[out] out C struct where to export the array
-/// \param[out] out_schema optional C struct where to export the array type
-ARROW_EXPORT
-Status ExportArray(const Array& array, struct ArrowArray* out,
- struct ArrowSchema* out_schema = NULLPTR);
-
-/// \brief Export C++ RecordBatch using the C data interface format.
-///
-/// The record batch is exported as if it were a struct array.
-/// The resulting ArrowArray struct keeps the record batch data and buffers alive
-/// until its release callback is called by the consumer.
-///
-/// \param[in] batch Record batch to export
-/// \param[out] out C struct where to export the record batch
-/// \param[out] out_schema optional C struct where to export the record batch schema
-ARROW_EXPORT
-Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
- struct ArrowSchema* out_schema = NULLPTR);
-
-/// \brief Import C++ DataType from the C data interface.
-///
-/// The given ArrowSchema struct is released (as per the C data interface
-/// specification), even if this function fails.
-///
-/// \param[in,out] schema C data interface struct representing the data type
-/// \return Imported type object
-ARROW_EXPORT
-Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema);
-
-/// \brief Import C++ Field from the C data interface.
-///
-/// The given ArrowSchema struct is released (as per the C data interface
-/// specification), even if this function fails.
-///
-/// \param[in,out] schema C data interface struct representing the field
-/// \return Imported field object
-ARROW_EXPORT
-Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema);
-
-/// \brief Import C++ Schema from the C data interface.
-///
-/// The given ArrowSchema struct is released (as per the C data interface
-/// specification), even if this function fails.
-///
-/// \param[in,out] schema C data interface struct representing the field
-/// \return Imported field object
-ARROW_EXPORT
-Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema);
-
-/// \brief Import C++ array from the C data interface.
-///
-/// The ArrowArray struct has its contents moved (as per the C data interface
-/// specification) to a private object held alive by the resulting array.
-///
-/// \param[in,out] array C data interface struct holding the array data
-/// \param[in] type type of the imported array
-/// \return Imported array object
-ARROW_EXPORT
-Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
- std::shared_ptr<DataType> type);
-
-/// \brief Import C++ array and its type from the C data interface.
-///
-/// The ArrowArray struct has its contents moved (as per the C data interface
-/// specification) to a private object held alive by the resulting array.
-/// The ArrowSchema struct is released, even if this function fails.
-///
-/// \param[in,out] array C data interface struct holding the array data
-/// \param[in,out] type C data interface struct holding the array type
-/// \return Imported array object
-ARROW_EXPORT
-Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
- struct ArrowSchema* type);
-
-/// \brief Import C++ record batch from the C data interface.
-///
-/// The ArrowArray struct has its contents moved (as per the C data interface
-/// specification) to a private object held alive by the resulting record batch.
-///
-/// \param[in,out] array C data interface struct holding the record batch data
-/// \param[in] schema schema of the imported record batch
-/// \return Imported record batch object
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
- std::shared_ptr<Schema> schema);
-
-/// \brief Import C++ record batch and its schema from the C data interface.
-///
-/// The type represented by the ArrowSchema struct must be a struct type array.
-/// The ArrowArray struct has its contents moved (as per the C data interface
-/// specification) to a private object held alive by the resulting record batch.
-/// The ArrowSchema struct is released, even if this function fails.
-///
-/// \param[in,out] array C data interface struct holding the record batch data
-/// \param[in,out] schema C data interface struct holding the record batch schema
-/// \return Imported record batch object
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
- struct ArrowSchema* schema);
-
-/// @}
-
-/// \defgroup c-stream-interface Functions for working with the C data interface.
-///
-/// @{
-
-/// \brief EXPERIMENTAL: Export C++ RecordBatchReader using the C stream interface.
-///
-/// The resulting ArrowArrayStream struct keeps the record batch reader alive
-/// until its release callback is called by the consumer.
-///
-/// \param[in] reader RecordBatchReader object to export
-/// \param[out] out C struct where to export the stream
-ARROW_EXPORT
-Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
- struct ArrowArrayStream* out);
-
-/// \brief EXPERIMENTAL: Import C++ RecordBatchReader from the C stream interface.
-///
-/// The ArrowArrayStream struct has its contents moved to a private object
-/// held alive by the resulting record batch reader.
-///
-/// \param[in,out] stream C stream interface struct
-/// \return Imported RecordBatchReader object
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
- struct ArrowArrayStream* stream);
-
-/// @}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/c/abi.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \defgroup c-data-interface Functions for working with the C data interface.
+///
+/// @{
+
+/// \brief Export C++ DataType using the C data interface format.
+///
+/// The root type is considered to have empty name and metadata.
+/// If you want the root type to have a name and/or metadata, pass
+/// a Field instead.
+///
+/// \param[in] type DataType object to export
+/// \param[out] out C struct where to export the datatype
+ARROW_EXPORT
+Status ExportType(const DataType& type, struct ArrowSchema* out);
+
+/// \brief Export C++ Field using the C data interface format.
+///
+/// \param[in] field Field object to export
+/// \param[out] out C struct where to export the field
+ARROW_EXPORT
+Status ExportField(const Field& field, struct ArrowSchema* out);
+
+/// \brief Export C++ Schema using the C data interface format.
+///
+/// \param[in] schema Schema object to export
+/// \param[out] out C struct where to export the field
+ARROW_EXPORT
+Status ExportSchema(const Schema& schema, struct ArrowSchema* out);
+
+/// \brief Export C++ Array using the C data interface format.
+///
+/// The resulting ArrowArray struct keeps the array data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] array Array object to export
+/// \param[out] out C struct where to export the array
+/// \param[out] out_schema optional C struct where to export the array type
+ARROW_EXPORT
+Status ExportArray(const Array& array, struct ArrowArray* out,
+ struct ArrowSchema* out_schema = NULLPTR);
+
+/// \brief Export C++ RecordBatch using the C data interface format.
+///
+/// The record batch is exported as if it were a struct array.
+/// The resulting ArrowArray struct keeps the record batch data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] batch Record batch to export
+/// \param[out] out C struct where to export the record batch
+/// \param[out] out_schema optional C struct where to export the record batch schema
+ARROW_EXPORT
+Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
+ struct ArrowSchema* out_schema = NULLPTR);
+
+/// \brief Import C++ DataType from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the data type
+/// \return Imported type object
+ARROW_EXPORT
+Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema);
+
+/// \brief Import C++ Field from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the field
+/// \return Imported field object
+ARROW_EXPORT
+Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema);
+
+/// \brief Import C++ Schema from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the field
+/// \return Imported field object
+ARROW_EXPORT
+Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema);
+
+/// \brief Import C++ array from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting array.
+///
+/// \param[in,out] array C data interface struct holding the array data
+/// \param[in] type type of the imported array
+/// \return Imported array object
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+ std::shared_ptr<DataType> type);
+
+/// \brief Import C++ array and its type from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting array.
+/// The ArrowSchema struct is released, even if this function fails.
+///
+/// \param[in,out] array C data interface struct holding the array data
+/// \param[in,out] type C data interface struct holding the array type
+/// \return Imported array object
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+ struct ArrowSchema* type);
+
+/// \brief Import C++ record batch from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting record batch.
+///
+/// \param[in,out] array C data interface struct holding the record batch data
+/// \param[in] schema schema of the imported record batch
+/// \return Imported record batch object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+ std::shared_ptr<Schema> schema);
+
+/// \brief Import C++ record batch and its schema from the C data interface.
+///
+/// The type represented by the ArrowSchema struct must be a struct type array.
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting record batch.
+/// The ArrowSchema struct is released, even if this function fails.
+///
+/// \param[in,out] array C data interface struct holding the record batch data
+/// \param[in,out] schema C data interface struct holding the record batch schema
+/// \return Imported record batch object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+ struct ArrowSchema* schema);
+
+/// @}
+
+/// \defgroup c-stream-interface Functions for working with the C data interface.
+///
+/// @{
+
+/// \brief EXPERIMENTAL: Export C++ RecordBatchReader using the C stream interface.
+///
+/// The resulting ArrowArrayStream struct keeps the record batch reader alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] reader RecordBatchReader object to export
+/// \param[out] out C struct where to export the stream
+ARROW_EXPORT
+Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
+ struct ArrowArrayStream* out);
+
+/// \brief EXPERIMENTAL: Import C++ RecordBatchReader from the C stream interface.
+///
+/// The ArrowArrayStream struct has its contents moved to a private object
+/// held alive by the resulting record batch reader.
+///
+/// \param[in,out] stream C stream interface struct
+/// \return Imported RecordBatchReader object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
+ struct ArrowArrayStream* stream);
+
+/// @}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/helpers.h b/contrib/libs/apache/arrow/cpp/src/arrow/c/helpers.h
index a5c1f6fe4ba..112f38f12a1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/helpers.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/helpers.h
@@ -1,117 +1,117 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <assert.h>
-#include <string.h>
-
-#include "arrow/c/abi.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// Query whether the C schema is released
-inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
- return schema->release == NULL;
-}
-
-/// Mark the C schema released (for use in release callbacks)
-inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
- schema->release = NULL;
-}
-
-/// Move the C schema from `src` to `dest`
-///
-/// Note `dest` must *not* point to a valid schema already, otherwise there
-/// will be a memory leak.
-inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
- assert(dest != src);
- assert(!ArrowSchemaIsReleased(src));
- memcpy(dest, src, sizeof(struct ArrowSchema));
- ArrowSchemaMarkReleased(src);
-}
-
-/// Release the C schema, if necessary, by calling its release callback
-inline void ArrowSchemaRelease(struct ArrowSchema* schema) {
- if (!ArrowSchemaIsReleased(schema)) {
- schema->release(schema);
- assert(ArrowSchemaIsReleased(schema));
- }
-}
-
-/// Query whether the C array is released
-inline int ArrowArrayIsReleased(const struct ArrowArray* array) {
- return array->release == NULL;
-}
-
-/// Mark the C array released (for use in release callbacks)
-inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; }
-
-/// Move the C array from `src` to `dest`
-///
-/// Note `dest` must *not* point to a valid array already, otherwise there
-/// will be a memory leak.
-inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
- assert(dest != src);
- assert(!ArrowArrayIsReleased(src));
- memcpy(dest, src, sizeof(struct ArrowArray));
- ArrowArrayMarkReleased(src);
-}
-
-/// Release the C array, if necessary, by calling its release callback
-inline void ArrowArrayRelease(struct ArrowArray* array) {
- if (!ArrowArrayIsReleased(array)) {
- array->release(array);
- assert(ArrowArrayIsReleased(array));
- }
-}
-
-/// Query whether the C array stream is released
-inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
- return stream->release == NULL;
-}
-
-/// Mark the C array stream released (for use in release callbacks)
-inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
- stream->release = NULL;
-}
-
-/// Move the C array stream from `src` to `dest`
-///
-/// Note `dest` must *not* point to a valid stream already, otherwise there
-/// will be a memory leak.
-inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
- struct ArrowArrayStream* dest) {
- assert(dest != src);
- assert(!ArrowArrayStreamIsReleased(src));
- memcpy(dest, src, sizeof(struct ArrowArrayStream));
- ArrowArrayStreamMarkReleased(src);
-}
-
-/// Release the C array stream, if necessary, by calling its release callback
-inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
- if (!ArrowArrayStreamIsReleased(stream)) {
- stream->release(stream);
- assert(ArrowArrayStreamIsReleased(stream));
- }
-}
-
-#ifdef __cplusplus
-}
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <assert.h>
+#include <string.h>
+
+#include "arrow/c/abi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Query whether the C schema is released
+inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
+ return schema->release == NULL;
+}
+
+/// Mark the C schema released (for use in release callbacks)
+inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
+ schema->release = NULL;
+}
+
+/// Move the C schema from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid schema already, otherwise there
+/// will be a memory leak.
+inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
+ assert(dest != src);
+ assert(!ArrowSchemaIsReleased(src));
+ memcpy(dest, src, sizeof(struct ArrowSchema));
+ ArrowSchemaMarkReleased(src);
+}
+
+/// Release the C schema, if necessary, by calling its release callback
+inline void ArrowSchemaRelease(struct ArrowSchema* schema) {
+ if (!ArrowSchemaIsReleased(schema)) {
+ schema->release(schema);
+ assert(ArrowSchemaIsReleased(schema));
+ }
+}
+
+/// Query whether the C array is released
+inline int ArrowArrayIsReleased(const struct ArrowArray* array) {
+ return array->release == NULL;
+}
+
+/// Mark the C array released (for use in release callbacks)
+inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; }
+
+/// Move the C array from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid array already, otherwise there
+/// will be a memory leak.
+inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
+ assert(dest != src);
+ assert(!ArrowArrayIsReleased(src));
+ memcpy(dest, src, sizeof(struct ArrowArray));
+ ArrowArrayMarkReleased(src);
+}
+
+/// Release the C array, if necessary, by calling its release callback
+inline void ArrowArrayRelease(struct ArrowArray* array) {
+ if (!ArrowArrayIsReleased(array)) {
+ array->release(array);
+ assert(ArrowArrayIsReleased(array));
+ }
+}
+
+/// Query whether the C array stream is released
+inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
+ return stream->release == NULL;
+}
+
+/// Mark the C array stream released (for use in release callbacks)
+inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
+ stream->release = NULL;
+}
+
+/// Move the C array stream from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid stream already, otherwise there
+/// will be a memory leak.
+inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
+ struct ArrowArrayStream* dest) {
+ assert(dest != src);
+ assert(!ArrowArrayStreamIsReleased(src));
+ memcpy(dest, src, sizeof(struct ArrowArrayStream));
+ ArrowArrayStreamMarkReleased(src);
+}
+
+/// Release the C array stream, if necessary, by calling its release callback
+inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
+ if (!ArrowArrayStreamIsReleased(stream)) {
+ stream->release(stream);
+ assert(ArrowArrayStreamIsReleased(stream));
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/c/util_internal.h
index 6a33be9b0da..58e035372ce 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/util_internal.h
@@ -1,85 +1,85 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/c/helpers.h"
-
-namespace arrow {
-namespace internal {
-
-struct SchemaExportTraits {
- typedef struct ArrowSchema CType;
- static constexpr auto IsReleasedFunc = &ArrowSchemaIsReleased;
- static constexpr auto ReleaseFunc = &ArrowSchemaRelease;
-};
-
-struct ArrayExportTraits {
- typedef struct ArrowArray CType;
- static constexpr auto IsReleasedFunc = &ArrowArrayIsReleased;
- static constexpr auto ReleaseFunc = &ArrowArrayRelease;
-};
-
-struct ArrayStreamExportTraits {
- typedef struct ArrowArrayStream CType;
- static constexpr auto IsReleasedFunc = &ArrowArrayStreamIsReleased;
- static constexpr auto ReleaseFunc = &ArrowArrayStreamRelease;
-};
-
-// A RAII-style object to release a C Array / Schema struct at block scope exit.
-template <typename Traits>
-class ExportGuard {
- public:
- using CType = typename Traits::CType;
-
- explicit ExportGuard(CType* c_export) : c_export_(c_export) {}
-
- ExportGuard(ExportGuard&& other) : c_export_(other.c_export_) {
- other.c_export_ = nullptr;
- }
-
- ExportGuard& operator=(ExportGuard&& other) {
- Release();
- c_export_ = other.c_export_;
- other.c_export_ = nullptr;
- }
-
- ~ExportGuard() { Release(); }
-
- void Detach() { c_export_ = nullptr; }
-
- void Reset(CType* c_export) { c_export_ = c_export; }
-
- void Release() {
- if (c_export_) {
- Traits::ReleaseFunc(c_export_);
- c_export_ = nullptr;
- }
- }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(ExportGuard);
-
- CType* c_export_;
-};
-
-using SchemaExportGuard = ExportGuard<SchemaExportTraits>;
-using ArrayExportGuard = ExportGuard<ArrayExportTraits>;
-using ArrayStreamExportGuard = ExportGuard<ArrayStreamExportTraits>;
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/c/helpers.h"
+
+namespace arrow {
+namespace internal {
+
+struct SchemaExportTraits {
+ typedef struct ArrowSchema CType;
+ static constexpr auto IsReleasedFunc = &ArrowSchemaIsReleased;
+ static constexpr auto ReleaseFunc = &ArrowSchemaRelease;
+};
+
+struct ArrayExportTraits {
+ typedef struct ArrowArray CType;
+ static constexpr auto IsReleasedFunc = &ArrowArrayIsReleased;
+ static constexpr auto ReleaseFunc = &ArrowArrayRelease;
+};
+
+struct ArrayStreamExportTraits {
+ typedef struct ArrowArrayStream CType;
+ static constexpr auto IsReleasedFunc = &ArrowArrayStreamIsReleased;
+ static constexpr auto ReleaseFunc = &ArrowArrayStreamRelease;
+};
+
+// A RAII-style object to release a C Array / Schema struct at block scope exit.
+template <typename Traits>
+class ExportGuard {
+ public:
+ using CType = typename Traits::CType;
+
+ explicit ExportGuard(CType* c_export) : c_export_(c_export) {}
+
+ ExportGuard(ExportGuard&& other) : c_export_(other.c_export_) {
+ other.c_export_ = nullptr;
+ }
+
+ ExportGuard& operator=(ExportGuard&& other) {
+ Release();
+ c_export_ = other.c_export_;
+ other.c_export_ = nullptr;
+ }
+
+ ~ExportGuard() { Release(); }
+
+ void Detach() { c_export_ = nullptr; }
+
+ void Reset(CType* c_export) { c_export_ = c_export; }
+
+ void Release() {
+ if (c_export_) {
+ Traits::ReleaseFunc(c_export_);
+ c_export_ = nullptr;
+ }
+ }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ExportGuard);
+
+ CType* c_export_;
+};
+
+using SchemaExportGuard = ExportGuard<SchemaExportTraits>;
+using ArrayExportGuard = ExportGuard<ArrayExportTraits>;
+using ArrayStreamExportGuard = ExportGuard<ArrayStreamExportTraits>;
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
index 142bd0d8c89..7d0d5cab1b2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
@@ -1,123 +1,123 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/chunked_array.h"
-
-#include <algorithm>
-#include <cstdlib>
-#include <memory>
-#include <sstream>
-#include <utility>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_nested.h"
-#include "arrow/array/validate.h"
-#include "arrow/pretty_print.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-class MemoryPool;
-
-// ----------------------------------------------------------------------
-// ChunkedArray methods
-
-ChunkedArray::ChunkedArray(ArrayVector chunks) : chunks_(std::move(chunks)) {
- length_ = 0;
- null_count_ = 0;
-
- ARROW_CHECK_GT(chunks_.size(), 0)
- << "cannot construct ChunkedArray from empty vector and omitted type";
- type_ = chunks_[0]->type();
- for (const std::shared_ptr<Array>& chunk : chunks_) {
- length_ += chunk->length();
- null_count_ += chunk->null_count();
- }
-}
-
-ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
- : chunks_(std::move(chunks)), type_(std::move(type)) {
- length_ = 0;
- null_count_ = 0;
- for (const std::shared_ptr<Array>& chunk : chunks_) {
- length_ += chunk->length();
- null_count_ += chunk->null_count();
- }
-}
-
-Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
- std::shared_ptr<DataType> type) {
- if (type == nullptr) {
- if (chunks.size() == 0) {
- return Status::Invalid(
- "cannot construct ChunkedArray from empty vector "
- "and omitted type");
- }
- type = chunks[0]->type();
- }
- for (size_t i = 0; i < chunks.size(); ++i) {
- if (!chunks[i]->type()->Equals(*type)) {
- return Status::Invalid("Array chunks must all be same type");
- }
- }
- return std::make_shared<ChunkedArray>(std::move(chunks), std::move(type));
-}
-
-bool ChunkedArray::Equals(const ChunkedArray& other) const {
- if (length_ != other.length()) {
- return false;
- }
- if (null_count_ != other.null_count()) {
- return false;
- }
- // We cannot toggle check_metadata here yet, so we don't check it
- if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
- return false;
- }
-
- // Check contents of the underlying arrays. This checks for equality of
- // the underlying data independently of the chunk size.
- return internal::ApplyBinaryChunked(
- *this, other,
- [](const Array& left_piece, const Array& right_piece,
- int64_t ARROW_ARG_UNUSED(position)) {
- if (!left_piece.Equals(right_piece)) {
- return Status::Invalid("Unequal piece");
- }
- return Status::OK();
- })
- .ok();
-}
-
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
- if (this == other.get()) {
- return true;
- }
- if (!other) {
- return false;
- }
- return Equals(*other.get());
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/chunked_array.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/validate.h"
+#include "arrow/pretty_print.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+class MemoryPool;
+
+// ----------------------------------------------------------------------
+// ChunkedArray methods
+
+ChunkedArray::ChunkedArray(ArrayVector chunks) : chunks_(std::move(chunks)) {
+ length_ = 0;
+ null_count_ = 0;
+
+ ARROW_CHECK_GT(chunks_.size(), 0)
+ << "cannot construct ChunkedArray from empty vector and omitted type";
+ type_ = chunks_[0]->type();
+ for (const std::shared_ptr<Array>& chunk : chunks_) {
+ length_ += chunk->length();
+ null_count_ += chunk->null_count();
+ }
+}
+
+ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
+ : chunks_(std::move(chunks)), type_(std::move(type)) {
+ length_ = 0;
+ null_count_ = 0;
+ for (const std::shared_ptr<Array>& chunk : chunks_) {
+ length_ += chunk->length();
+ null_count_ += chunk->null_count();
+ }
+}
+
+Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
+ std::shared_ptr<DataType> type) {
+ if (type == nullptr) {
+ if (chunks.size() == 0) {
+ return Status::Invalid(
+ "cannot construct ChunkedArray from empty vector "
+ "and omitted type");
+ }
+ type = chunks[0]->type();
+ }
+ for (size_t i = 0; i < chunks.size(); ++i) {
+ if (!chunks[i]->type()->Equals(*type)) {
+ return Status::Invalid("Array chunks must all be same type");
+ }
+ }
+ return std::make_shared<ChunkedArray>(std::move(chunks), std::move(type));
+}
+
+bool ChunkedArray::Equals(const ChunkedArray& other) const {
+ if (length_ != other.length()) {
+ return false;
+ }
+ if (null_count_ != other.null_count()) {
+ return false;
+ }
+ // We cannot toggle check_metadata here yet, so we don't check it
+ if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
+ return false;
+ }
+
+ // Check contents of the underlying arrays. This checks for equality of
+ // the underlying data independently of the chunk size.
+ return internal::ApplyBinaryChunked(
+ *this, other,
+ [](const Array& left_piece, const Array& right_piece,
+ int64_t ARROW_ARG_UNUSED(position)) {
+ if (!left_piece.Equals(right_piece)) {
+ return Status::Invalid("Unequal piece");
+ }
+ return Status::OK();
+ })
+ .ok();
+}
+
+bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
+ if (this == other.get()) {
+ return true;
+ }
+ if (!other) {
+ return false;
+ }
+ return Equals(*other.get());
+}
+
bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
const EqualOptions& equal_options) const {
if (length_ != other.length()) {
@@ -145,150 +145,150 @@ bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
.ok();
}
-std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
- ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
- bool offset_equals_length = offset == length_;
- int curr_chunk = 0;
- while (curr_chunk < num_chunks() && offset >= chunk(curr_chunk)->length()) {
- offset -= chunk(curr_chunk)->length();
- curr_chunk++;
- }
-
- ArrayVector new_chunks;
- if (num_chunks() > 0 && (offset_equals_length || length == 0)) {
- // Special case the zero-length slice to make sure there is at least 1 Array
- // in the result. When there are zero chunks we return zero chunks
- new_chunks.push_back(chunk(std::min(curr_chunk, num_chunks() - 1))->Slice(0, 0));
- } else {
- while (curr_chunk < num_chunks() && length > 0) {
- new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
- length -= chunk(curr_chunk)->length() - offset;
- offset = 0;
- curr_chunk++;
- }
- }
-
- return std::make_shared<ChunkedArray>(new_chunks, type_);
-}
-
-std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
- return Slice(offset, length_);
-}
-
-Result<std::vector<std::shared_ptr<ChunkedArray>>> ChunkedArray::Flatten(
- MemoryPool* pool) const {
- if (type()->id() != Type::STRUCT) {
- // Emulate nonexistent copy constructor
- return std::vector<std::shared_ptr<ChunkedArray>>{
- std::make_shared<ChunkedArray>(chunks_, type_)};
- }
-
- std::vector<ArrayVector> flattened_chunks(type()->num_fields());
- for (const auto& chunk : chunks_) {
- ARROW_ASSIGN_OR_RAISE(auto arrays,
- checked_cast<const StructArray&>(*chunk).Flatten(pool));
- DCHECK_EQ(arrays.size(), flattened_chunks.size());
- for (size_t i = 0; i < arrays.size(); ++i) {
- flattened_chunks[i].push_back(arrays[i]);
- }
- }
-
- std::vector<std::shared_ptr<ChunkedArray>> flattened(type()->num_fields());
- for (size_t i = 0; i < flattened.size(); ++i) {
- auto child_type = type()->field(static_cast<int>(i))->type();
- flattened[i] =
- std::make_shared<ChunkedArray>(std::move(flattened_chunks[i]), child_type);
- }
- return flattened;
-}
-
-Result<std::shared_ptr<ChunkedArray>> ChunkedArray::View(
- const std::shared_ptr<DataType>& type) const {
- ArrayVector out_chunks(this->num_chunks());
- for (int i = 0; i < this->num_chunks(); ++i) {
- ARROW_ASSIGN_OR_RAISE(out_chunks[i], chunks_[i]->View(type));
- }
- return std::make_shared<ChunkedArray>(out_chunks, type);
-}
-
-std::string ChunkedArray::ToString() const {
- std::stringstream ss;
- ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
- return ss.str();
-}
-
-Status ChunkedArray::Validate() const {
- if (chunks_.size() == 0) {
- return Status::OK();
- }
-
- const auto& type = *chunks_[0]->type();
- // Make sure chunks all have the same type
- for (size_t i = 1; i < chunks_.size(); ++i) {
- const Array& chunk = *chunks_[i];
- if (!chunk.type()->Equals(type)) {
- return Status::Invalid("In chunk ", i, " expected type ", type.ToString(),
- " but saw ", chunk.type()->ToString());
- }
- }
- // Validate the chunks themselves
- for (size_t i = 0; i < chunks_.size(); ++i) {
- const Array& chunk = *chunks_[i];
- const Status st = internal::ValidateArray(chunk);
- if (!st.ok()) {
- return Status::Invalid("In chunk ", i, ": ", st.ToString());
- }
- }
- return Status::OK();
-}
-
-Status ChunkedArray::ValidateFull() const {
- RETURN_NOT_OK(Validate());
- for (size_t i = 0; i < chunks_.size(); ++i) {
- const Array& chunk = *chunks_[i];
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
+ ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
+ bool offset_equals_length = offset == length_;
+ int curr_chunk = 0;
+ while (curr_chunk < num_chunks() && offset >= chunk(curr_chunk)->length()) {
+ offset -= chunk(curr_chunk)->length();
+ curr_chunk++;
+ }
+
+ ArrayVector new_chunks;
+ if (num_chunks() > 0 && (offset_equals_length || length == 0)) {
+ // Special case the zero-length slice to make sure there is at least 1 Array
+ // in the result. When there are zero chunks we return zero chunks
+ new_chunks.push_back(chunk(std::min(curr_chunk, num_chunks() - 1))->Slice(0, 0));
+ } else {
+ while (curr_chunk < num_chunks() && length > 0) {
+ new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
+ length -= chunk(curr_chunk)->length() - offset;
+ offset = 0;
+ curr_chunk++;
+ }
+ }
+
+ return std::make_shared<ChunkedArray>(new_chunks, type_);
+}
+
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
+ return Slice(offset, length_);
+}
+
+Result<std::vector<std::shared_ptr<ChunkedArray>>> ChunkedArray::Flatten(
+ MemoryPool* pool) const {
+ if (type()->id() != Type::STRUCT) {
+ // Emulate nonexistent copy constructor
+ return std::vector<std::shared_ptr<ChunkedArray>>{
+ std::make_shared<ChunkedArray>(chunks_, type_)};
+ }
+
+ std::vector<ArrayVector> flattened_chunks(type()->num_fields());
+ for (const auto& chunk : chunks_) {
+ ARROW_ASSIGN_OR_RAISE(auto arrays,
+ checked_cast<const StructArray&>(*chunk).Flatten(pool));
+ DCHECK_EQ(arrays.size(), flattened_chunks.size());
+ for (size_t i = 0; i < arrays.size(); ++i) {
+ flattened_chunks[i].push_back(arrays[i]);
+ }
+ }
+
+ std::vector<std::shared_ptr<ChunkedArray>> flattened(type()->num_fields());
+ for (size_t i = 0; i < flattened.size(); ++i) {
+ auto child_type = type()->field(static_cast<int>(i))->type();
+ flattened[i] =
+ std::make_shared<ChunkedArray>(std::move(flattened_chunks[i]), child_type);
+ }
+ return flattened;
+}
+
+Result<std::shared_ptr<ChunkedArray>> ChunkedArray::View(
+ const std::shared_ptr<DataType>& type) const {
+ ArrayVector out_chunks(this->num_chunks());
+ for (int i = 0; i < this->num_chunks(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(out_chunks[i], chunks_[i]->View(type));
+ }
+ return std::make_shared<ChunkedArray>(out_chunks, type);
+}
+
+std::string ChunkedArray::ToString() const {
+ std::stringstream ss;
+ ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
+ return ss.str();
+}
+
+Status ChunkedArray::Validate() const {
+ if (chunks_.size() == 0) {
+ return Status::OK();
+ }
+
+ const auto& type = *chunks_[0]->type();
+ // Make sure chunks all have the same type
+ for (size_t i = 1; i < chunks_.size(); ++i) {
+ const Array& chunk = *chunks_[i];
+ if (!chunk.type()->Equals(type)) {
+ return Status::Invalid("In chunk ", i, " expected type ", type.ToString(),
+ " but saw ", chunk.type()->ToString());
+ }
+ }
+ // Validate the chunks themselves
+ for (size_t i = 0; i < chunks_.size(); ++i) {
+ const Array& chunk = *chunks_[i];
+ const Status st = internal::ValidateArray(chunk);
+ if (!st.ok()) {
+ return Status::Invalid("In chunk ", i, ": ", st.ToString());
+ }
+ }
+ return Status::OK();
+}
+
+Status ChunkedArray::ValidateFull() const {
+ RETURN_NOT_OK(Validate());
+ for (size_t i = 0; i < chunks_.size(); ++i) {
+ const Array& chunk = *chunks_[i];
const Status st = internal::ValidateArrayFull(chunk);
- if (!st.ok()) {
- return Status::Invalid("In chunk ", i, ": ", st.ToString());
- }
- }
- return Status::OK();
-}
-
-namespace internal {
-
-bool MultipleChunkIterator::Next(std::shared_ptr<Array>* next_left,
- std::shared_ptr<Array>* next_right) {
- if (pos_ == length_) return false;
-
- // Find non-empty chunk
- std::shared_ptr<Array> chunk_left, chunk_right;
- while (true) {
- chunk_left = left_.chunk(chunk_idx_left_);
- chunk_right = right_.chunk(chunk_idx_right_);
- if (chunk_pos_left_ == chunk_left->length()) {
- chunk_pos_left_ = 0;
- ++chunk_idx_left_;
- continue;
- }
- if (chunk_pos_right_ == chunk_right->length()) {
- chunk_pos_right_ = 0;
- ++chunk_idx_right_;
- continue;
- }
- break;
- }
- // Determine how big of a section to return
- int64_t iteration_size = std::min(chunk_left->length() - chunk_pos_left_,
- chunk_right->length() - chunk_pos_right_);
-
- *next_left = chunk_left->Slice(chunk_pos_left_, iteration_size);
- *next_right = chunk_right->Slice(chunk_pos_right_, iteration_size);
-
- pos_ += iteration_size;
- chunk_pos_left_ += iteration_size;
- chunk_pos_right_ += iteration_size;
- return true;
-}
-
-} // namespace internal
-} // namespace arrow
+ if (!st.ok()) {
+ return Status::Invalid("In chunk ", i, ": ", st.ToString());
+ }
+ }
+ return Status::OK();
+}
+
+namespace internal {
+
+bool MultipleChunkIterator::Next(std::shared_ptr<Array>* next_left,
+ std::shared_ptr<Array>* next_right) {
+ if (pos_ == length_) return false;
+
+ // Find non-empty chunk
+ std::shared_ptr<Array> chunk_left, chunk_right;
+ while (true) {
+ chunk_left = left_.chunk(chunk_idx_left_);
+ chunk_right = right_.chunk(chunk_idx_right_);
+ if (chunk_pos_left_ == chunk_left->length()) {
+ chunk_pos_left_ = 0;
+ ++chunk_idx_left_;
+ continue;
+ }
+ if (chunk_pos_right_ == chunk_right->length()) {
+ chunk_pos_right_ = 0;
+ ++chunk_idx_right_;
+ continue;
+ }
+ break;
+ }
+ // Determine how big of a section to return
+ int64_t iteration_size = std::min(chunk_left->length() - chunk_pos_left_,
+ chunk_right->length() - chunk_pos_right_);
+
+ *next_left = chunk_left->Slice(chunk_pos_left_, iteration_size);
+ *next_right = chunk_right->Slice(chunk_pos_right_, iteration_size);
+
+ pos_ += iteration_size;
+ chunk_pos_left_ += iteration_size;
+ chunk_pos_right_ += iteration_size;
+ return true;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
index 2ace045c2bf..8766bbedf5d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
@@ -1,252 +1,252 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class DataType;
-class MemoryPool;
-
-/// \class ChunkedArray
-/// \brief A data structure managing a list of primitive Arrow arrays logically
-/// as one large array
-///
-/// Data chunking is treated throughout this project largely as an
-/// implementation detail for performance and memory use optimization.
-/// ChunkedArray allows Array objects to be collected and interpreted
-/// as a single logical array without requiring an expensive concatenation
-/// step.
-///
-/// In some cases, data produced by a function may exceed the capacity of an
-/// Array (like BinaryArray or StringArray) and so returning multiple Arrays is
-/// the only possibility. In these cases, we recommend returning a ChunkedArray
-/// instead of vector of Arrays or some alternative.
-///
-/// When data is processed in parallel, it may not be practical or possible to
-/// create large contiguous memory allocations and write output into them. With
-/// some data types, like binary and string types, it is not possible at all to
-/// produce non-chunked array outputs without requiring a concatenation step at
-/// the end of processing.
-///
-/// Application developers may tune chunk sizes based on analysis of
-/// performance profiles but many developer-users will not need to be
-/// especially concerned with the chunking details.
-///
-/// Preserving the chunk layout/sizes in processing steps is generally not
-/// considered to be a contract in APIs. A function may decide to alter the
-/// chunking of its result. Similarly, APIs accepting multiple ChunkedArray
-/// inputs should not expect the chunk layout to be the same in each input.
-class ARROW_EXPORT ChunkedArray {
- public:
- /// \brief Construct a chunked array from a vector of arrays
- ///
- /// The vector must be non-empty and all its elements must have the same
- /// data type.
- explicit ChunkedArray(ArrayVector chunks);
-
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+class MemoryPool;
+
+/// \class ChunkedArray
+/// \brief A data structure managing a list of primitive Arrow arrays logically
+/// as one large array
+///
+/// Data chunking is treated throughout this project largely as an
+/// implementation detail for performance and memory use optimization.
+/// ChunkedArray allows Array objects to be collected and interpreted
+/// as a single logical array without requiring an expensive concatenation
+/// step.
+///
+/// In some cases, data produced by a function may exceed the capacity of an
+/// Array (like BinaryArray or StringArray) and so returning multiple Arrays is
+/// the only possibility. In these cases, we recommend returning a ChunkedArray
+/// instead of vector of Arrays or some alternative.
+///
+/// When data is processed in parallel, it may not be practical or possible to
+/// create large contiguous memory allocations and write output into them. With
+/// some data types, like binary and string types, it is not possible at all to
+/// produce non-chunked array outputs without requiring a concatenation step at
+/// the end of processing.
+///
+/// Application developers may tune chunk sizes based on analysis of
+/// performance profiles but many developer-users will not need to be
+/// especially concerned with the chunking details.
+///
+/// Preserving the chunk layout/sizes in processing steps is generally not
+/// considered to be a contract in APIs. A function may decide to alter the
+/// chunking of its result. Similarly, APIs accepting multiple ChunkedArray
+/// inputs should not expect the chunk layout to be the same in each input.
+class ARROW_EXPORT ChunkedArray {
+ public:
+ /// \brief Construct a chunked array from a vector of arrays
+ ///
+ /// The vector must be non-empty and all its elements must have the same
+ /// data type.
+ explicit ChunkedArray(ArrayVector chunks);
+
ChunkedArray(ChunkedArray&&) = default;
ChunkedArray& operator=(ChunkedArray&&) = default;
- /// \brief Construct a chunked array from a single Array
- explicit ChunkedArray(std::shared_ptr<Array> chunk)
- : ChunkedArray(ArrayVector{std::move(chunk)}) {}
-
- /// \brief Construct a chunked array from a vector of arrays and a data type
- ///
- /// As the data type is passed explicitly, the vector may be empty.
- ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type);
-
- // \brief Constructor with basic input validation.
- static Result<std::shared_ptr<ChunkedArray>> Make(
- ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
-
- /// \return the total length of the chunked array; computed on construction
- int64_t length() const { return length_; }
-
- /// \return the total number of nulls among all chunks
- int64_t null_count() const { return null_count_; }
-
- int num_chunks() const { return static_cast<int>(chunks_.size()); }
-
- /// \return chunk a particular chunk from the chunked array
- std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; }
-
- const ArrayVector& chunks() const { return chunks_; }
-
- /// \brief Construct a zero-copy slice of the chunked array with the
- /// indicated offset and length
- ///
- /// \param[in] offset the position of the first element in the constructed
- /// slice
- /// \param[in] length the length of the slice. If there are not enough
- /// elements in the chunked array, the length will be adjusted accordingly
- ///
- /// \return a new object wrapped in std::shared_ptr<ChunkedArray>
- std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
-
- /// \brief Slice from offset until end of the chunked array
- std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
-
- /// \brief Flatten this chunked array as a vector of chunked arrays, one
- /// for each struct field
- ///
- /// \param[in] pool The pool for buffer allocations, if any
- Result<std::vector<std::shared_ptr<ChunkedArray>>> Flatten(
- MemoryPool* pool = default_memory_pool()) const;
-
- /// Construct a zero-copy view of this chunked array with the given
- /// type. Calls Array::View on each constituent chunk. Always succeeds if
- /// there are zero chunks
- Result<std::shared_ptr<ChunkedArray>> View(const std::shared_ptr<DataType>& type) const;
-
- std::shared_ptr<DataType> type() const { return type_; }
-
- /// \brief Determine if two chunked arrays are equal.
- ///
- /// Two chunked arrays can be equal only if they have equal datatypes.
- /// However, they may be equal even if they have different chunkings.
- bool Equals(const ChunkedArray& other) const;
- /// \brief Determine if two chunked arrays are equal.
- bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+ /// \brief Construct a chunked array from a single Array
+ explicit ChunkedArray(std::shared_ptr<Array> chunk)
+ : ChunkedArray(ArrayVector{std::move(chunk)}) {}
+
+ /// \brief Construct a chunked array from a vector of arrays and a data type
+ ///
+ /// As the data type is passed explicitly, the vector may be empty.
+ ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type);
+
+ // \brief Constructor with basic input validation.
+ static Result<std::shared_ptr<ChunkedArray>> Make(
+ ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
+
+ /// \return the total length of the chunked array; computed on construction
+ int64_t length() const { return length_; }
+
+ /// \return the total number of nulls among all chunks
+ int64_t null_count() const { return null_count_; }
+
+ int num_chunks() const { return static_cast<int>(chunks_.size()); }
+
+ /// \return chunk a particular chunk from the chunked array
+ std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; }
+
+ const ArrayVector& chunks() const { return chunks_; }
+
+ /// \brief Construct a zero-copy slice of the chunked array with the
+ /// indicated offset and length
+ ///
+ /// \param[in] offset the position of the first element in the constructed
+ /// slice
+ /// \param[in] length the length of the slice. If there are not enough
+ /// elements in the chunked array, the length will be adjusted accordingly
+ ///
+ /// \return a new object wrapped in std::shared_ptr<ChunkedArray>
+ std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
+
+ /// \brief Slice from offset until end of the chunked array
+ std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
+
+ /// \brief Flatten this chunked array as a vector of chunked arrays, one
+ /// for each struct field
+ ///
+ /// \param[in] pool The pool for buffer allocations, if any
+ Result<std::vector<std::shared_ptr<ChunkedArray>>> Flatten(
+ MemoryPool* pool = default_memory_pool()) const;
+
+ /// Construct a zero-copy view of this chunked array with the given
+ /// type. Calls Array::View on each constituent chunk. Always succeeds if
+ /// there are zero chunks
+ Result<std::shared_ptr<ChunkedArray>> View(const std::shared_ptr<DataType>& type) const;
+
+ std::shared_ptr<DataType> type() const { return type_; }
+
+ /// \brief Determine if two chunked arrays are equal.
+ ///
+ /// Two chunked arrays can be equal only if they have equal datatypes.
+ /// However, they may be equal even if they have different chunkings.
+ bool Equals(const ChunkedArray& other) const;
+ /// \brief Determine if two chunked arrays are equal.
+ bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
/// \brief Determine if two chunked arrays approximately equal
bool ApproxEquals(const ChunkedArray& other,
const EqualOptions& = EqualOptions::Defaults()) const;
-
- /// \return PrettyPrint representation suitable for debugging
- std::string ToString() const;
-
- /// \brief Perform cheap validation checks to determine obvious inconsistencies
- /// within the chunk array's internal data.
- ///
- /// This is O(k*m) where k is the number of array descendents,
- /// and m is the number of chunks.
- ///
- /// \return Status
- Status Validate() const;
-
- /// \brief Perform extensive validation checks to determine inconsistencies
- /// within the chunk array's internal data.
- ///
- /// This is O(k*n) where k is the number of array descendents,
- /// and n is the length in elements.
- ///
- /// \return Status
- Status ValidateFull() const;
-
- protected:
- ArrayVector chunks_;
- int64_t length_;
- int64_t null_count_;
- std::shared_ptr<DataType> type_;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
-};
-
-namespace internal {
-
-/// \brief EXPERIMENTAL: Utility for incremental iteration over contiguous
-/// pieces of potentially differently-chunked ChunkedArray objects
-class ARROW_EXPORT MultipleChunkIterator {
- public:
- MultipleChunkIterator(const ChunkedArray& left, const ChunkedArray& right)
- : left_(left),
- right_(right),
- pos_(0),
- length_(left.length()),
- chunk_idx_left_(0),
- chunk_idx_right_(0),
- chunk_pos_left_(0),
- chunk_pos_right_(0) {}
-
- bool Next(std::shared_ptr<Array>* next_left, std::shared_ptr<Array>* next_right);
-
- int64_t position() const { return pos_; }
-
- private:
- const ChunkedArray& left_;
- const ChunkedArray& right_;
-
- // The amount of the entire ChunkedArray consumed
- int64_t pos_;
-
- // Length of the chunked array(s)
- int64_t length_;
-
- // Current left chunk
- int chunk_idx_left_;
-
- // Current right chunk
- int chunk_idx_right_;
-
- // Offset into the current left chunk
- int64_t chunk_pos_left_;
-
- // Offset into the current right chunk
- int64_t chunk_pos_right_;
-};
-
-/// \brief Evaluate binary function on two ChunkedArray objects having possibly
-/// different chunk layouts. The passed binary function / functor should have
-/// the following signature.
-///
-/// Status(const Array&, const Array&, int64_t)
-///
-/// The third argument is the absolute position relative to the start of each
-/// ChunkedArray. The function is executed against each contiguous pair of
-/// array segments, slicing if necessary.
-///
-/// For example, if two arrays have chunk sizes
-///
-/// left: [10, 10, 20]
-/// right: [15, 10, 15]
-///
-/// Then the following invocations take place (pseudocode)
-///
-/// func(left.chunk[0][0:10], right.chunk[0][0:10], 0)
-/// func(left.chunk[1][0:5], right.chunk[0][10:15], 10)
-/// func(left.chunk[1][5:10], right.chunk[1][0:5], 15)
-/// func(left.chunk[2][0:5], right.chunk[1][5:10], 20)
-/// func(left.chunk[2][5:20], right.chunk[2][:], 25)
-template <typename Action>
-Status ApplyBinaryChunked(const ChunkedArray& left, const ChunkedArray& right,
- Action&& action) {
- MultipleChunkIterator iterator(left, right);
- std::shared_ptr<Array> left_piece, right_piece;
- while (iterator.Next(&left_piece, &right_piece)) {
- ARROW_RETURN_NOT_OK(action(*left_piece, *right_piece, iterator.position()));
- }
- return Status::OK();
-}
-
-} // namespace internal
-} // namespace arrow
+
+ /// \return PrettyPrint representation suitable for debugging
+ std::string ToString() const;
+
+ /// \brief Perform cheap validation checks to determine obvious inconsistencies
+ /// within the chunk array's internal data.
+ ///
+ /// This is O(k*m) where k is the number of array descendents,
+ /// and m is the number of chunks.
+ ///
+ /// \return Status
+ Status Validate() const;
+
+ /// \brief Perform extensive validation checks to determine inconsistencies
+ /// within the chunk array's internal data.
+ ///
+ /// This is O(k*n) where k is the number of array descendents,
+ /// and n is the length in elements.
+ ///
+ /// \return Status
+ Status ValidateFull() const;
+
+ protected:
+ ArrayVector chunks_;
+ int64_t length_;
+ int64_t null_count_;
+ std::shared_ptr<DataType> type_;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
+};
+
+namespace internal {
+
+/// \brief EXPERIMENTAL: Utility for incremental iteration over contiguous
+/// pieces of potentially differently-chunked ChunkedArray objects
+class ARROW_EXPORT MultipleChunkIterator {
+ public:
+ MultipleChunkIterator(const ChunkedArray& left, const ChunkedArray& right)
+ : left_(left),
+ right_(right),
+ pos_(0),
+ length_(left.length()),
+ chunk_idx_left_(0),
+ chunk_idx_right_(0),
+ chunk_pos_left_(0),
+ chunk_pos_right_(0) {}
+
+ bool Next(std::shared_ptr<Array>* next_left, std::shared_ptr<Array>* next_right);
+
+ int64_t position() const { return pos_; }
+
+ private:
+ const ChunkedArray& left_;
+ const ChunkedArray& right_;
+
+ // The amount of the entire ChunkedArray consumed
+ int64_t pos_;
+
+ // Length of the chunked array(s)
+ int64_t length_;
+
+ // Current left chunk
+ int chunk_idx_left_;
+
+ // Current right chunk
+ int chunk_idx_right_;
+
+ // Offset into the current left chunk
+ int64_t chunk_pos_left_;
+
+ // Offset into the current right chunk
+ int64_t chunk_pos_right_;
+};
+
+/// \brief Evaluate binary function on two ChunkedArray objects having possibly
+/// different chunk layouts. The passed binary function / functor should have
+/// the following signature.
+///
+/// Status(const Array&, const Array&, int64_t)
+///
+/// The third argument is the absolute position relative to the start of each
+/// ChunkedArray. The function is executed against each contiguous pair of
+/// array segments, slicing if necessary.
+///
+/// For example, if two arrays have chunk sizes
+///
+/// left: [10, 10, 20]
+/// right: [15, 10, 15]
+///
+/// Then the following invocations take place (pseudocode)
+///
+/// func(left.chunk[0][0:10], right.chunk[0][0:10], 0)
+/// func(left.chunk[1][0:5], right.chunk[0][10:15], 10)
+/// func(left.chunk[1][5:10], right.chunk[1][0:5], 15)
+/// func(left.chunk[2][0:5], right.chunk[1][5:10], 20)
+/// func(left.chunk[2][5:20], right.chunk[2][:], 25)
+template <typename Action>
+Status ApplyBinaryChunked(const ChunkedArray& left, const ChunkedArray& right,
+ Action&& action) {
+ MultipleChunkIterator iterator(left, right);
+ std::shared_ptr<Array> left_piece, right_piece;
+ while (iterator.Next(&left_piece, &right_piece)) {
+ ARROW_RETURN_NOT_OK(action(*left_piece, *right_piece, iterator.position()));
+ }
+ return Status::OK();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
index 4c6f97faf95..a085abcb5b7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
@@ -1,75 +1,75 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Functions for comparing Arrow data structures
-
-#include "arrow/compare.h"
-
-#include <climits>
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/array/diff.h"
-#include "arrow/buffer.h"
-#include "arrow/scalar.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/tensor.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for comparing Arrow data structures
+
+#include "arrow/compare.h"
+
+#include <climits>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/diff.h"
+#include "arrow/buffer.h"
+#include "arrow/scalar.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_reader.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/memory.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::BitmapEquals;
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/memory.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapEquals;
using internal::BitmapReader;
using internal::BitmapUInt64Reader;
-using internal::checked_cast;
+using internal::checked_cast;
using internal::OptionalBitmapEquals;
-
-// ----------------------------------------------------------------------
-// Public method implementations
-
-namespace {
-
+
+// ----------------------------------------------------------------------
+// Public method implementations
+
+namespace {
+
// TODO also handle HALF_FLOAT NaNs
-
+
enum FloatingEqualityFlags : int8_t { Approximate = 1, NansEqual = 2 };
-
+
template <typename T, int8_t Flags>
struct FloatingEquality {
bool operator()(T x, T y) { return x == y; }
};
-
+
template <typename T>
struct FloatingEquality<T, NansEqual> {
bool operator()(T x, T y) { return (x == y) || (std::isnan(x) && std::isnan(y)); }
@@ -105,42 +105,42 @@ void VisitFloatingEquality(const EqualOptions& options, bool floating_approximat
visit(FloatingEquality<T, NansEqual | Approximate>{options});
} else {
visit(FloatingEquality<T, NansEqual>{});
- }
- } else {
+ }
+ } else {
if (floating_approximate) {
visit(FloatingEquality<T, Approximate>{options});
} else {
visit(FloatingEquality<T, 0>{});
- }
- }
-}
-
+ }
+ }
+}
+
inline bool IdentityImpliesEqualityNansNotEqual(const DataType& type) {
if (type.id() == Type::FLOAT || type.id() == Type::DOUBLE) {
return false;
- }
+ }
for (const auto& child : type.fields()) {
if (!IdentityImpliesEqualityNansNotEqual(*child->type())) {
return false;
}
}
return true;
-}
-
+}
+
inline bool IdentityImpliesEquality(const DataType& type, const EqualOptions& options) {
if (options.nans_equal()) {
return true;
- }
+ }
return IdentityImpliesEqualityNansNotEqual(type);
-}
-
+}
+
bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
int64_t left_start_idx, int64_t left_end_idx,
int64_t right_start_idx, const EqualOptions& options,
bool floating_approximate);
-
+
class RangeDataEqualsImpl {
- public:
+ public:
// PRE-CONDITIONS:
// - the types are equal
// - the ranges are in bounds
@@ -152,11 +152,11 @@ class RangeDataEqualsImpl {
floating_approximate_(floating_approximate),
left_(left),
right_(right),
- left_start_idx_(left_start_idx),
- right_start_idx_(right_start_idx),
+ left_start_idx_(left_start_idx),
+ right_start_idx_(right_start_idx),
range_length_(range_length),
- result_(false) {}
-
+ result_(false) {}
+
bool Compare() {
// Compare null bitmaps
if (left_start_idx_ == 0 && right_start_idx_ == 0 && range_length_ == left_.length &&
@@ -164,8 +164,8 @@ class RangeDataEqualsImpl {
// If we're comparing entire arrays, we can first compare the cached null counts
if (left_.GetNullCount() != right_.GetNullCount()) {
return false;
- }
- }
+ }
+ }
if (!OptionalBitmapEquals(left_.buffers[0], left_.offset + left_start_idx_,
right_.buffers[0], right_.offset + right_start_idx_,
range_length_)) {
@@ -173,28 +173,28 @@ class RangeDataEqualsImpl {
}
// Compare values
return CompareWithType(*left_.type);
- }
-
+ }
+
bool CompareWithType(const DataType& type) {
result_ = true;
if (range_length_ != 0) {
ARROW_CHECK_OK(VisitTypeInline(type, this));
- }
+ }
return result_;
- }
-
+ }
+
Status Visit(const NullType&) { return Status::OK(); }
-
+
template <typename TypeClass>
enable_if_primitive_ctype<TypeClass, Status> Visit(const TypeClass& type) {
return ComparePrimitive(type);
- }
-
+ }
+
template <typename TypeClass>
enable_if_t<is_temporal_type<TypeClass>::value, Status> Visit(const TypeClass& type) {
return ComparePrimitive(type);
}
-
+
Status Visit(const BooleanType&) {
const uint8_t* left_bits = left_.GetValues<uint8_t>(1, 0);
const uint8_t* right_bits = right_.GetValues<uint8_t>(1, 0);
@@ -207,7 +207,7 @@ class RangeDataEqualsImpl {
return false;
}
}
- return true;
+ return true;
} else if (length <= 1024) {
BitmapUInt64Reader left_reader(left_bits, left_start_idx_ + left_.offset + i,
length);
@@ -223,23 +223,23 @@ class RangeDataEqualsImpl {
// BitmapEquals is the fastest method on large runs
return BitmapEquals(left_bits, left_start_idx_ + left_.offset + i, right_bits,
right_start_idx_ + right_.offset + i, length);
- }
+ }
return true;
- };
+ };
VisitValidRuns(compare_runs);
return Status::OK();
- }
-
+ }
+
Status Visit(const FloatType& type) { return CompareFloating(type); }
-
+
Status Visit(const DoubleType& type) { return CompareFloating(type); }
-
+
// Also matches StringType
Status Visit(const BinaryType& type) { return CompareBinary(type); }
-
+
// Also matches LargeStringType
Status Visit(const LargeBinaryType& type) { return CompareBinary(type); }
-
+
Status Visit(const FixedSizeBinaryType& type) {
const auto byte_width = type.byte_width();
const uint8_t* left_data = left_.GetValues<uint8_t>(1, 0);
@@ -255,20 +255,20 @@ class RangeDataEqualsImpl {
} else {
auto compare_runs = [&](int64_t i, int64_t length) -> bool { return true; };
VisitValidRuns(compare_runs);
- }
+ }
return Status::OK();
}
-
+
// Also matches MapType
Status Visit(const ListType& type) { return CompareList(type); }
-
+
Status Visit(const LargeListType& type) { return CompareList(type); }
-
+
Status Visit(const FixedSizeListType& type) {
const auto list_size = type.list_size();
const ArrayData& left_data = *left_.child_data[0];
const ArrayData& right_data = *right_.child_data[0];
-
+
auto compare_runs = [&](int64_t i, int64_t length) -> bool {
RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data,
(left_start_idx_ + left_.offset + i) * list_size,
@@ -279,10 +279,10 @@ class RangeDataEqualsImpl {
VisitValidRuns(compare_runs);
return Status::OK();
}
-
+
Status Visit(const StructType& type) {
const int32_t num_fields = type.num_fields();
-
+
auto compare_runs = [&](int64_t i, int64_t length) -> bool {
for (int32_t f = 0; f < num_fields; ++f) {
RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f],
@@ -290,27 +290,27 @@ class RangeDataEqualsImpl {
left_start_idx_ + left_.offset + i,
right_start_idx_ + right_.offset + i, length);
if (!impl.Compare()) {
- return false;
- }
- }
+ return false;
+ }
+ }
return true;
};
VisitValidRuns(compare_runs);
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
Status Visit(const SparseUnionType& type) {
const auto& child_ids = type.child_ids();
const int8_t* left_codes = left_.GetValues<int8_t>(1);
const int8_t* right_codes = right_.GetValues<int8_t>(1);
-
+
// Unions don't have a null bitmap
for (int64_t i = 0; i < range_length_; ++i) {
const auto type_id = left_codes[left_start_idx_ + i];
if (type_id != right_codes[right_start_idx_ + i]) {
- result_ = false;
+ result_ = false;
break;
- }
+ }
const auto child_num = child_ids[type_id];
// XXX can we instead detect runs of same-child union values?
RangeDataEqualsImpl impl(
@@ -318,20 +318,20 @@ class RangeDataEqualsImpl {
*right_.child_data[child_num], left_start_idx_ + left_.offset + i,
right_start_idx_ + right_.offset + i, 1);
if (!impl.Compare()) {
- result_ = false;
+ result_ = false;
break;
- }
- }
- return Status::OK();
- }
-
+ }
+ }
+ return Status::OK();
+ }
+
Status Visit(const DenseUnionType& type) {
const auto& child_ids = type.child_ids();
const int8_t* left_codes = left_.GetValues<int8_t>(1);
const int8_t* right_codes = right_.GetValues<int8_t>(1);
const int32_t* left_offsets = left_.GetValues<int32_t>(2);
const int32_t* right_offsets = right_.GetValues<int32_t>(2);
-
+
for (int64_t i = 0; i < range_length_; ++i) {
const auto type_id = left_codes[left_start_idx_ + i];
if (type_id != right_codes[right_start_idx_ + i]) {
@@ -348,9 +348,9 @@ class RangeDataEqualsImpl {
break;
}
}
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
Status Visit(const DictionaryType& type) {
// Compare dictionaries
result_ &= CompareArrayRanges(
@@ -361,24 +361,24 @@ class RangeDataEqualsImpl {
if (result_) {
// Compare indices
result_ &= CompareWithType(*type.index_type());
- }
- return Status::OK();
- }
-
+ }
+ return Status::OK();
+ }
+
Status Visit(const ExtensionType& type) {
// Compare storages
result_ &= CompareWithType(*type.storage_type());
- return Status::OK();
- }
-
- protected:
+ return Status::OK();
+ }
+
+ protected:
// For CompareFloating (templated local classes or lambdas not supported in C++11)
template <typename CType>
struct ComparatorVisitor {
RangeDataEqualsImpl* impl;
const CType* left_values;
const CType* right_values;
-
+
template <typename CompareFunction>
void operator()(CompareFunction&& compare) {
impl->VisitValues([&](int64_t i) {
@@ -388,10 +388,10 @@ class RangeDataEqualsImpl {
});
}
};
-
+
template <typename CType>
friend struct ComparatorVisitor;
-
+
template <typename TypeClass, typename CType = typename TypeClass::c_type>
Status ComparePrimitive(const TypeClass&) {
const CType* left_values = left_.GetValues<CType>(1);
@@ -401,79 +401,79 @@ class RangeDataEqualsImpl {
right_values + right_start_idx_ + i, length * sizeof(CType)) == 0;
});
return Status::OK();
- }
-
+ }
+
template <typename TypeClass>
Status CompareFloating(const TypeClass&) {
using CType = typename TypeClass::c_type;
const CType* left_values = left_.GetValues<CType>(1);
const CType* right_values = right_.GetValues<CType>(1);
-
+
ComparatorVisitor<CType> visitor{this, left_values, right_values};
VisitFloatingEquality<CType>(options_, floating_approximate_, visitor);
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
template <typename TypeClass>
Status CompareBinary(const TypeClass&) {
const uint8_t* left_data = left_.GetValues<uint8_t>(2, 0);
const uint8_t* right_data = right_.GetValues<uint8_t>(2, 0);
-
+
if (left_data != nullptr && right_data != nullptr) {
const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset,
int64_t length) -> bool {
return memcmp(left_data + left_offset, right_data + right_offset, length) == 0;
};
CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges);
- } else {
+ } else {
// One of the arrays is an array of empty strings and nulls.
// We just need to compare the offsets.
// (note we must not call memcmp() with null data pointers)
CompareWithOffsets<typename TypeClass::offset_type>(1, [](...) { return true; });
- }
- return Status::OK();
- }
-
+ }
+ return Status::OK();
+ }
+
template <typename TypeClass>
Status CompareList(const TypeClass&) {
const ArrayData& left_data = *left_.child_data[0];
const ArrayData& right_data = *right_.child_data[0];
-
+
const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset,
int64_t length) -> bool {
RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data,
left_offset, right_offset, length);
return impl.Compare();
};
-
+
CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges);
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
template <typename offset_type, typename CompareRanges>
void CompareWithOffsets(int offsets_buffer_index, CompareRanges&& compare_ranges) {
const offset_type* left_offsets =
left_.GetValues<offset_type>(offsets_buffer_index) + left_start_idx_;
const offset_type* right_offsets =
right_.GetValues<offset_type>(offsets_buffer_index) + right_start_idx_;
-
+
const auto compare_runs = [&](int64_t i, int64_t length) {
for (int64_t j = i; j < i + length; ++j) {
if (left_offsets[j + 1] - left_offsets[j] !=
right_offsets[j + 1] - right_offsets[j]) {
- return false;
- }
- }
+ return false;
+ }
+ }
if (!compare_ranges(left_offsets[i], right_offsets[i],
left_offsets[i + length] - left_offsets[i])) {
return false;
}
- return true;
+ return true;
};
VisitValidRuns(compare_runs);
- }
-
+ }
+
template <typename CompareValues>
void VisitValues(CompareValues&& compare_values) {
internal::VisitSetBitRunsVoid(left_.buffers[0], left_.offset + left_start_idx_,
@@ -483,7 +483,7 @@ class RangeDataEqualsImpl {
}
});
}
-
+
// Visit and compare runs of non-null values
template <typename CompareRuns>
void VisitValidRuns(CompareRuns&& compare_runs) {
@@ -491,21 +491,21 @@ class RangeDataEqualsImpl {
if (left_null_bitmap == nullptr) {
result_ = compare_runs(0, range_length_);
return;
- }
+ }
internal::SetBitRunReader reader(left_null_bitmap, left_.offset + left_start_idx_,
range_length_);
while (true) {
const auto run = reader.NextRun();
if (run.length == 0) {
return;
- }
+ }
if (!compare_runs(run.position, run.length)) {
result_ = false;
return;
- }
- }
- }
-
+ }
+ }
+ }
+
const EqualOptions& options_;
const bool floating_approximate_;
const ArrayData& left_;
@@ -513,10 +513,10 @@ class RangeDataEqualsImpl {
const int64_t left_start_idx_;
const int64_t right_start_idx_;
const int64_t range_length_;
-
+
bool result_;
-};
-
+};
+
bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
int64_t left_start_idx, int64_t left_end_idx,
int64_t right_start_idx, const EqualOptions& options,
@@ -524,160 +524,160 @@ bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
if (left.type->id() != right.type->id() ||
!TypeEquals(*left.type, *right.type, false /* check_metadata */)) {
return false;
- }
-
+ }
+
const int64_t range_length = left_end_idx - left_start_idx;
DCHECK_GE(range_length, 0);
if (left_start_idx + range_length > left.length) {
// Left range too small
- return false;
- }
+ return false;
+ }
if (right_start_idx + range_length > right.length) {
// Right range too small
- return false;
- }
+ return false;
+ }
if (&left == &right && left_start_idx == right_start_idx &&
IdentityImpliesEquality(*left.type, options)) {
return true;
- }
+ }
// Compare values
RangeDataEqualsImpl impl(options, floating_approximate, left, right, left_start_idx,
right_start_idx, range_length);
return impl.Compare();
-}
-
-class TypeEqualsVisitor {
- public:
- explicit TypeEqualsVisitor(const DataType& right, bool check_metadata)
- : right_(right), check_metadata_(check_metadata), result_(false) {}
-
- Status VisitChildren(const DataType& left) {
- if (left.num_fields() != right_.num_fields()) {
- result_ = false;
- return Status::OK();
- }
-
- for (int i = 0; i < left.num_fields(); ++i) {
- if (!left.field(i)->Equals(right_.field(i), check_metadata_)) {
- result_ = false;
- return Status::OK();
- }
- }
- result_ = true;
- return Status::OK();
- }
-
- template <typename T>
- enable_if_t<is_null_type<T>::value || is_primitive_ctype<T>::value ||
- is_base_binary_type<T>::value,
- Status>
- Visit(const T&) {
- result_ = true;
- return Status::OK();
- }
-
- template <typename T>
- enable_if_interval<T, Status> Visit(const T& left) {
- const auto& right = checked_cast<const IntervalType&>(right_);
- result_ = right.interval_type() == left.interval_type();
- return Status::OK();
- }
-
- template <typename T>
- enable_if_t<is_time_type<T>::value || is_date_type<T>::value ||
- is_duration_type<T>::value,
- Status>
- Visit(const T& left) {
- const auto& right = checked_cast<const T&>(right_);
- result_ = left.unit() == right.unit();
- return Status::OK();
- }
-
- Status Visit(const TimestampType& left) {
- const auto& right = checked_cast<const TimestampType&>(right_);
- result_ = left.unit() == right.unit() && left.timezone() == right.timezone();
- return Status::OK();
- }
-
- Status Visit(const FixedSizeBinaryType& left) {
- const auto& right = checked_cast<const FixedSizeBinaryType&>(right_);
- result_ = left.byte_width() == right.byte_width();
- return Status::OK();
- }
-
- Status Visit(const Decimal128Type& left) {
- const auto& right = checked_cast<const Decimal128Type&>(right_);
- result_ = left.precision() == right.precision() && left.scale() == right.scale();
- return Status::OK();
- }
-
+}
+
+class TypeEqualsVisitor {
+ public:
+ explicit TypeEqualsVisitor(const DataType& right, bool check_metadata)
+ : right_(right), check_metadata_(check_metadata), result_(false) {}
+
+ Status VisitChildren(const DataType& left) {
+ if (left.num_fields() != right_.num_fields()) {
+ result_ = false;
+ return Status::OK();
+ }
+
+ for (int i = 0; i < left.num_fields(); ++i) {
+ if (!left.field(i)->Equals(right_.field(i), check_metadata_)) {
+ result_ = false;
+ return Status::OK();
+ }
+ }
+ result_ = true;
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_t<is_null_type<T>::value || is_primitive_ctype<T>::value ||
+ is_base_binary_type<T>::value,
+ Status>
+ Visit(const T&) {
+ result_ = true;
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_interval<T, Status> Visit(const T& left) {
+ const auto& right = checked_cast<const IntervalType&>(right_);
+ result_ = right.interval_type() == left.interval_type();
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_t<is_time_type<T>::value || is_date_type<T>::value ||
+ is_duration_type<T>::value,
+ Status>
+ Visit(const T& left) {
+ const auto& right = checked_cast<const T&>(right_);
+ result_ = left.unit() == right.unit();
+ return Status::OK();
+ }
+
+ Status Visit(const TimestampType& left) {
+ const auto& right = checked_cast<const TimestampType&>(right_);
+ result_ = left.unit() == right.unit() && left.timezone() == right.timezone();
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeBinaryType& left) {
+ const auto& right = checked_cast<const FixedSizeBinaryType&>(right_);
+ result_ = left.byte_width() == right.byte_width();
+ return Status::OK();
+ }
+
+ Status Visit(const Decimal128Type& left) {
+ const auto& right = checked_cast<const Decimal128Type&>(right_);
+ result_ = left.precision() == right.precision() && left.scale() == right.scale();
+ return Status::OK();
+ }
+
Status Visit(const Decimal256Type& left) {
const auto& right = checked_cast<const Decimal256Type&>(right_);
result_ = left.precision() == right.precision() && left.scale() == right.scale();
return Status::OK();
}
- template <typename T>
- enable_if_t<is_list_like_type<T>::value || is_struct_type<T>::value, Status> Visit(
- const T& left) {
- return VisitChildren(left);
- }
-
- Status Visit(const MapType& left) {
- const auto& right = checked_cast<const MapType&>(right_);
- if (left.keys_sorted() != right.keys_sorted()) {
- result_ = false;
- return Status::OK();
- }
- result_ = left.key_type()->Equals(*right.key_type(), check_metadata_) &&
- left.item_type()->Equals(*right.item_type(), check_metadata_);
- return Status::OK();
- }
-
- Status Visit(const UnionType& left) {
- const auto& right = checked_cast<const UnionType&>(right_);
-
- if (left.mode() != right.mode() || left.type_codes() != right.type_codes()) {
- result_ = false;
- return Status::OK();
- }
-
- result_ = std::equal(
- left.fields().begin(), left.fields().end(), right.fields().begin(),
- [this](const std::shared_ptr<Field>& l, const std::shared_ptr<Field>& r) {
- return l->Equals(r, check_metadata_);
- });
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& left) {
- const auto& right = checked_cast<const DictionaryType&>(right_);
- result_ = left.index_type()->Equals(right.index_type()) &&
- left.value_type()->Equals(right.value_type()) &&
- (left.ordered() == right.ordered());
- return Status::OK();
- }
-
- Status Visit(const ExtensionType& left) {
- result_ = left.ExtensionEquals(static_cast<const ExtensionType&>(right_));
- return Status::OK();
- }
-
- bool result() const { return result_; }
-
- protected:
- const DataType& right_;
- bool check_metadata_;
- bool result_;
-};
-
+ template <typename T>
+ enable_if_t<is_list_like_type<T>::value || is_struct_type<T>::value, Status> Visit(
+ const T& left) {
+ return VisitChildren(left);
+ }
+
+ Status Visit(const MapType& left) {
+ const auto& right = checked_cast<const MapType&>(right_);
+ if (left.keys_sorted() != right.keys_sorted()) {
+ result_ = false;
+ return Status::OK();
+ }
+ result_ = left.key_type()->Equals(*right.key_type(), check_metadata_) &&
+ left.item_type()->Equals(*right.item_type(), check_metadata_);
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& left) {
+ const auto& right = checked_cast<const UnionType&>(right_);
+
+ if (left.mode() != right.mode() || left.type_codes() != right.type_codes()) {
+ result_ = false;
+ return Status::OK();
+ }
+
+ result_ = std::equal(
+ left.fields().begin(), left.fields().end(), right.fields().begin(),
+ [this](const std::shared_ptr<Field>& l, const std::shared_ptr<Field>& r) {
+ return l->Equals(r, check_metadata_);
+ });
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& left) {
+ const auto& right = checked_cast<const DictionaryType&>(right_);
+ result_ = left.index_type()->Equals(right.index_type()) &&
+ left.value_type()->Equals(right.value_type()) &&
+ (left.ordered() == right.ordered());
+ return Status::OK();
+ }
+
+ Status Visit(const ExtensionType& left) {
+ result_ = left.ExtensionEquals(static_cast<const ExtensionType&>(right_));
+ return Status::OK();
+ }
+
+ bool result() const { return result_; }
+
+ protected:
+ const DataType& right_;
+ bool check_metadata_;
+ bool result_;
+};
+
bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts,
bool floating_approximate);
bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options,
bool floating_approximate);
-class ScalarEqualsVisitor {
- public:
+class ScalarEqualsVisitor {
+ public:
// PRE-CONDITIONS:
// - the types are equal
// - the scalars are non-null
@@ -687,121 +687,121 @@ class ScalarEqualsVisitor {
options_(opts),
floating_approximate_(floating_approximate),
result_(false) {}
-
- Status Visit(const NullScalar& left) {
- result_ = true;
- return Status::OK();
- }
-
- Status Visit(const BooleanScalar& left) {
- const auto& right = checked_cast<const BooleanScalar&>(right_);
- result_ = left.value == right.value;
- return Status::OK();
- }
-
- template <typename T>
+
+ Status Visit(const NullScalar& left) {
+ result_ = true;
+ return Status::OK();
+ }
+
+ Status Visit(const BooleanScalar& left) {
+ const auto& right = checked_cast<const BooleanScalar&>(right_);
+ result_ = left.value == right.value;
+ return Status::OK();
+ }
+
+ template <typename T>
typename std::enable_if<(is_primitive_ctype<typename T::TypeClass>::value ||
is_temporal_type<typename T::TypeClass>::value),
- Status>::type
- Visit(const T& left_) {
- const auto& right = checked_cast<const T&>(right_);
- result_ = right.value == left_.value;
- return Status::OK();
- }
-
+ Status>::type
+ Visit(const T& left_) {
+ const auto& right = checked_cast<const T&>(right_);
+ result_ = right.value == left_.value;
+ return Status::OK();
+ }
+
Status Visit(const FloatScalar& left) { return CompareFloating(left); }
Status Visit(const DoubleScalar& left) { return CompareFloating(left); }
- template <typename T>
- typename std::enable_if<std::is_base_of<BaseBinaryScalar, T>::value, Status>::type
- Visit(const T& left) {
- const auto& right = checked_cast<const BaseBinaryScalar&>(right_);
- result_ = internal::SharedPtrEquals(left.value, right.value);
- return Status::OK();
- }
-
- Status Visit(const Decimal128Scalar& left) {
- const auto& right = checked_cast<const Decimal128Scalar&>(right_);
- result_ = left.value == right.value;
- return Status::OK();
- }
-
+ template <typename T>
+ typename std::enable_if<std::is_base_of<BaseBinaryScalar, T>::value, Status>::type
+ Visit(const T& left) {
+ const auto& right = checked_cast<const BaseBinaryScalar&>(right_);
+ result_ = internal::SharedPtrEquals(left.value, right.value);
+ return Status::OK();
+ }
+
+ Status Visit(const Decimal128Scalar& left) {
+ const auto& right = checked_cast<const Decimal128Scalar&>(right_);
+ result_ = left.value == right.value;
+ return Status::OK();
+ }
+
Status Visit(const Decimal256Scalar& left) {
const auto& right = checked_cast<const Decimal256Scalar&>(right_);
result_ = left.value == right.value;
return Status::OK();
}
- Status Visit(const ListScalar& left) {
- const auto& right = checked_cast<const ListScalar&>(right_);
+ Status Visit(const ListScalar& left) {
+ const auto& right = checked_cast<const ListScalar&>(right_);
result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
- return Status::OK();
- }
-
- Status Visit(const LargeListScalar& left) {
- const auto& right = checked_cast<const LargeListScalar&>(right_);
+ return Status::OK();
+ }
+
+ Status Visit(const LargeListScalar& left) {
+ const auto& right = checked_cast<const LargeListScalar&>(right_);
result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
- return Status::OK();
- }
-
- Status Visit(const MapScalar& left) {
- const auto& right = checked_cast<const MapScalar&>(right_);
+ return Status::OK();
+ }
+
+ Status Visit(const MapScalar& left) {
+ const auto& right = checked_cast<const MapScalar&>(right_);
result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
- return Status::OK();
- }
-
- Status Visit(const FixedSizeListScalar& left) {
- const auto& right = checked_cast<const FixedSizeListScalar&>(right_);
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeListScalar& left) {
+ const auto& right = checked_cast<const FixedSizeListScalar&>(right_);
result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
- return Status::OK();
- }
-
- Status Visit(const StructScalar& left) {
- const auto& right = checked_cast<const StructScalar&>(right_);
-
- if (right.value.size() != left.value.size()) {
- result_ = false;
- } else {
- bool all_equals = true;
- for (size_t i = 0; i < left.value.size() && all_equals; i++) {
+ return Status::OK();
+ }
+
+ Status Visit(const StructScalar& left) {
+ const auto& right = checked_cast<const StructScalar&>(right_);
+
+ if (right.value.size() != left.value.size()) {
+ result_ = false;
+ } else {
+ bool all_equals = true;
+ for (size_t i = 0; i < left.value.size() && all_equals; i++) {
all_equals &= ScalarEquals(*left.value[i], *right.value[i], options_,
floating_approximate_);
- }
- result_ = all_equals;
- }
-
- return Status::OK();
- }
-
- Status Visit(const UnionScalar& left) {
- const auto& right = checked_cast<const UnionScalar&>(right_);
- if (left.is_valid && right.is_valid) {
+ }
+ result_ = all_equals;
+ }
+
+ return Status::OK();
+ }
+
+ Status Visit(const UnionScalar& left) {
+ const auto& right = checked_cast<const UnionScalar&>(right_);
+ if (left.is_valid && right.is_valid) {
result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_);
- } else if (!left.is_valid && !right.is_valid) {
- result_ = true;
- } else {
- result_ = false;
- }
- return Status::OK();
- }
-
- Status Visit(const DictionaryScalar& left) {
- const auto& right = checked_cast<const DictionaryScalar&>(right_);
+ } else if (!left.is_valid && !right.is_valid) {
+ result_ = true;
+ } else {
+ result_ = false;
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryScalar& left) {
+ const auto& right = checked_cast<const DictionaryScalar&>(right_);
result_ = ScalarEquals(*left.value.index, *right.value.index, options_,
floating_approximate_) &&
ArrayEquals(*left.value.dictionary, *right.value.dictionary, options_,
floating_approximate_);
- return Status::OK();
- }
-
- Status Visit(const ExtensionScalar& left) {
- return Status::NotImplemented("extension");
- }
-
- bool result() const { return result_; }
-
- protected:
+ return Status::OK();
+ }
+
+ Status Visit(const ExtensionScalar& left) {
+ return Status::NotImplemented("extension");
+ }
+
+ bool result() const { return result_; }
+
+ protected:
// For CompareFloating (templated local classes or lambdas not supported in C++11)
template <typename ScalarType>
struct ComparatorVisitor {
@@ -825,89 +825,89 @@ class ScalarEqualsVisitor {
return Status::OK();
}
- const Scalar& right_;
+ const Scalar& right_;
const EqualOptions options_;
const bool floating_approximate_;
- bool result_;
-};
-
+ bool result_;
+};
+
Status PrintDiff(const Array& left, const Array& right, std::ostream* os);
Status PrintDiff(const Array& left, const Array& right, int64_t left_offset,
int64_t left_length, int64_t right_offset, int64_t right_length,
std::ostream* os) {
- if (os == nullptr) {
- return Status::OK();
- }
-
- if (!left.type()->Equals(right.type())) {
- *os << "# Array types differed: " << *left.type() << " vs " << *right.type()
- << std::endl;
- return Status::OK();
- }
-
- if (left.type()->id() == Type::DICTIONARY) {
- *os << "# Dictionary arrays differed" << std::endl;
-
- const auto& left_dict = checked_cast<const DictionaryArray&>(left);
- const auto& right_dict = checked_cast<const DictionaryArray&>(right);
-
- *os << "## dictionary diff";
- auto pos = os->tellp();
- RETURN_NOT_OK(PrintDiff(*left_dict.dictionary(), *right_dict.dictionary(), os));
- if (os->tellp() == pos) {
- *os << std::endl;
- }
-
- *os << "## indices diff";
- pos = os->tellp();
- RETURN_NOT_OK(PrintDiff(*left_dict.indices(), *right_dict.indices(), os));
- if (os->tellp() == pos) {
- *os << std::endl;
- }
- return Status::OK();
- }
-
+ if (os == nullptr) {
+ return Status::OK();
+ }
+
+ if (!left.type()->Equals(right.type())) {
+ *os << "# Array types differed: " << *left.type() << " vs " << *right.type()
+ << std::endl;
+ return Status::OK();
+ }
+
+ if (left.type()->id() == Type::DICTIONARY) {
+ *os << "# Dictionary arrays differed" << std::endl;
+
+ const auto& left_dict = checked_cast<const DictionaryArray&>(left);
+ const auto& right_dict = checked_cast<const DictionaryArray&>(right);
+
+ *os << "## dictionary diff";
+ auto pos = os->tellp();
+ RETURN_NOT_OK(PrintDiff(*left_dict.dictionary(), *right_dict.dictionary(), os));
+ if (os->tellp() == pos) {
+ *os << std::endl;
+ }
+
+ *os << "## indices diff";
+ pos = os->tellp();
+ RETURN_NOT_OK(PrintDiff(*left_dict.indices(), *right_dict.indices(), os));
+ if (os->tellp() == pos) {
+ *os << std::endl;
+ }
+ return Status::OK();
+ }
+
const auto left_slice = left.Slice(left_offset, left_length);
const auto right_slice = right.Slice(right_offset, right_length);
ARROW_ASSIGN_OR_RAISE(auto edits,
Diff(*left_slice, *right_slice, default_memory_pool()));
- ARROW_ASSIGN_OR_RAISE(auto formatter, MakeUnifiedDiffFormatter(*left.type(), os));
+ ARROW_ASSIGN_OR_RAISE(auto formatter, MakeUnifiedDiffFormatter(*left.type(), os));
return formatter(*edits, *left_slice, *right_slice);
-}
-
+}
+
Status PrintDiff(const Array& left, const Array& right, std::ostream* os) {
return PrintDiff(left, right, 0, left.length(), 0, right.length(), os);
}
-
+
bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx,
int64_t left_end_idx, int64_t right_start_idx,
const EqualOptions& options, bool floating_approximate) {
bool are_equal =
CompareArrayRanges(*left.data(), *right.data(), left_start_idx, left_end_idx,
right_start_idx, options, floating_approximate);
- if (!are_equal) {
+ if (!are_equal) {
ARROW_IGNORE_EXPR(PrintDiff(
left, right, left_start_idx, left_end_idx, right_start_idx,
right_start_idx + (left_end_idx - left_start_idx), options.diff_sink()));
- }
- return are_equal;
-}
-
+ }
+ return are_equal;
+}
+
bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts,
bool floating_approximate) {
if (left.length() != right.length()) {
ARROW_IGNORE_EXPR(PrintDiff(left, right, opts.diff_sink()));
return false;
- }
+ }
return ArrayRangeEquals(left, right, 0, left.length(), 0, opts, floating_approximate);
-}
-
+}
+
bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options,
bool floating_approximate) {
if (&left == &right && IdentityImpliesEquality(*left.type, options)) {
return true;
- }
+ }
if (!left.type->Equals(right.type)) {
return false;
}
@@ -921,8 +921,8 @@ bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& o
auto error = VisitScalarInline(left, &visitor);
DCHECK_OK(error);
return visitor.result();
-}
-
+}
+
} // namespace
bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx,
@@ -962,343 +962,343 @@ bool ScalarApproxEquals(const Scalar& left, const Scalar& right,
return ScalarEquals(left, right, options, floating_approximate);
}
-namespace {
-
-bool StridedIntegerTensorContentEquals(const int dim_index, int64_t left_offset,
- int64_t right_offset, int elem_size,
- const Tensor& left, const Tensor& right) {
- const auto n = left.shape()[dim_index];
- const auto left_stride = left.strides()[dim_index];
- const auto right_stride = right.strides()[dim_index];
- if (dim_index == left.ndim() - 1) {
- for (int64_t i = 0; i < n; ++i) {
- if (memcmp(left.raw_data() + left_offset + i * left_stride,
- right.raw_data() + right_offset + i * right_stride, elem_size) != 0) {
- return false;
- }
- }
- return true;
- }
- for (int64_t i = 0; i < n; ++i) {
- if (!StridedIntegerTensorContentEquals(dim_index + 1, left_offset, right_offset,
- elem_size, left, right)) {
- return false;
- }
- left_offset += left_stride;
- right_offset += right_stride;
- }
- return true;
-}
-
-bool IntegerTensorEquals(const Tensor& left, const Tensor& right) {
- bool are_equal;
- // The arrays are the same object
- if (&left == &right) {
- are_equal = true;
- } else {
- const bool left_row_major_p = left.is_row_major();
- const bool left_column_major_p = left.is_column_major();
- const bool right_row_major_p = right.is_row_major();
- const bool right_column_major_p = right.is_column_major();
-
- if (!(left_row_major_p && right_row_major_p) &&
- !(left_column_major_p && right_column_major_p)) {
- const auto& type = checked_cast<const FixedWidthType&>(*left.type());
- are_equal = StridedIntegerTensorContentEquals(0, 0, 0, internal::GetByteWidth(type),
- left, right);
- } else {
- const int byte_width = internal::GetByteWidth(*left.type());
- DCHECK_GT(byte_width, 0);
-
- const uint8_t* left_data = left.data()->data();
- const uint8_t* right_data = right.data()->data();
-
- are_equal = memcmp(left_data, right_data,
- static_cast<size_t>(byte_width * left.size())) == 0;
- }
- }
- return are_equal;
-}
-
-template <typename DataType>
-bool StridedFloatTensorContentEquals(const int dim_index, int64_t left_offset,
- int64_t right_offset, const Tensor& left,
- const Tensor& right, const EqualOptions& opts) {
- using c_type = typename DataType::c_type;
- static_assert(std::is_floating_point<c_type>::value,
- "DataType must be a floating point type");
-
- const auto n = left.shape()[dim_index];
- const auto left_stride = left.strides()[dim_index];
- const auto right_stride = right.strides()[dim_index];
- if (dim_index == left.ndim() - 1) {
- auto left_data = left.raw_data();
- auto right_data = right.raw_data();
- if (opts.nans_equal()) {
- for (int64_t i = 0; i < n; ++i) {
- c_type left_value =
- *reinterpret_cast<const c_type*>(left_data + left_offset + i * left_stride);
- c_type right_value = *reinterpret_cast<const c_type*>(right_data + right_offset +
- i * right_stride);
- if (left_value != right_value &&
- !(std::isnan(left_value) && std::isnan(right_value))) {
- return false;
- }
- }
- } else {
- for (int64_t i = 0; i < n; ++i) {
- c_type left_value =
- *reinterpret_cast<const c_type*>(left_data + left_offset + i * left_stride);
- c_type right_value = *reinterpret_cast<const c_type*>(right_data + right_offset +
- i * right_stride);
- if (left_value != right_value) {
- return false;
- }
- }
- }
- return true;
- }
- for (int64_t i = 0; i < n; ++i) {
- if (!StridedFloatTensorContentEquals<DataType>(dim_index + 1, left_offset,
- right_offset, left, right, opts)) {
- return false;
- }
- left_offset += left_stride;
- right_offset += right_stride;
- }
- return true;
-}
-
-template <typename DataType>
-bool FloatTensorEquals(const Tensor& left, const Tensor& right,
- const EqualOptions& opts) {
- return StridedFloatTensorContentEquals<DataType>(0, 0, 0, left, right, opts);
-}
-
-} // namespace
-
-bool TensorEquals(const Tensor& left, const Tensor& right, const EqualOptions& opts) {
- if (left.type_id() != right.type_id()) {
- return false;
- } else if (left.size() == 0 && right.size() == 0) {
- return true;
- } else if (left.shape() != right.shape()) {
- return false;
- }
-
- switch (left.type_id()) {
- // TODO: Support half-float tensors
- // case Type::HALF_FLOAT:
- case Type::FLOAT:
- return FloatTensorEquals<FloatType>(left, right, opts);
-
- case Type::DOUBLE:
- return FloatTensorEquals<DoubleType>(left, right, opts);
-
- default:
- return IntegerTensorEquals(left, right);
- }
-}
-
-namespace {
-
-template <typename LeftSparseIndexType, typename RightSparseIndexType>
-struct SparseTensorEqualsImpl {
- static bool Compare(const SparseTensorImpl<LeftSparseIndexType>& left,
- const SparseTensorImpl<RightSparseIndexType>& right,
- const EqualOptions&) {
- // TODO(mrkn): should we support the equality among different formats?
- return false;
- }
-};
-
-bool IntegerSparseTensorDataEquals(const uint8_t* left_data, const uint8_t* right_data,
- const int byte_width, const int64_t length) {
- if (left_data == right_data) {
- return true;
- }
- return memcmp(left_data, right_data, static_cast<size_t>(byte_width * length)) == 0;
-}
-
-template <typename DataType>
-bool FloatSparseTensorDataEquals(const typename DataType::c_type* left_data,
- const typename DataType::c_type* right_data,
- const int64_t length, const EqualOptions& opts) {
- using c_type = typename DataType::c_type;
- static_assert(std::is_floating_point<c_type>::value,
- "DataType must be a floating point type");
- if (opts.nans_equal()) {
- if (left_data == right_data) {
- return true;
- }
-
- for (int64_t i = 0; i < length; ++i) {
- const auto left = left_data[i];
- const auto right = right_data[i];
- if (left != right && !(std::isnan(left) && std::isnan(right))) {
- return false;
- }
- }
- } else {
- for (int64_t i = 0; i < length; ++i) {
- if (left_data[i] != right_data[i]) {
- return false;
- }
- }
- }
- return true;
-}
-
-template <typename SparseIndexType>
-struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
- static bool Compare(const SparseTensorImpl<SparseIndexType>& left,
- const SparseTensorImpl<SparseIndexType>& right,
- const EqualOptions& opts) {
- DCHECK(left.type()->id() == right.type()->id());
- DCHECK(left.shape() == right.shape());
-
- const auto length = left.non_zero_length();
- DCHECK(length == right.non_zero_length());
-
- const auto& left_index = checked_cast<const SparseIndexType&>(*left.sparse_index());
- const auto& right_index = checked_cast<const SparseIndexType&>(*right.sparse_index());
-
- if (!left_index.Equals(right_index)) {
- return false;
- }
-
- const int byte_width = internal::GetByteWidth(*left.type());
- DCHECK_GT(byte_width, 0);
-
- const uint8_t* left_data = left.data()->data();
- const uint8_t* right_data = right.data()->data();
- switch (left.type()->id()) {
- // TODO: Support half-float tensors
- // case Type::HALF_FLOAT:
- case Type::FLOAT:
- return FloatSparseTensorDataEquals<FloatType>(
- reinterpret_cast<const float*>(left_data),
- reinterpret_cast<const float*>(right_data), length, opts);
-
- case Type::DOUBLE:
- return FloatSparseTensorDataEquals<DoubleType>(
- reinterpret_cast<const double*>(left_data),
- reinterpret_cast<const double*>(right_data), length, opts);
-
- default: // Integer cases
- return IntegerSparseTensorDataEquals(left_data, right_data, byte_width, length);
- }
- }
-};
-
-template <typename SparseIndexType>
-inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexType>& left,
- const SparseTensor& right,
- const EqualOptions& opts) {
- switch (right.format_id()) {
- case SparseTensorFormat::COO: {
- const auto& right_coo =
- checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
- return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(
- left, right_coo, opts);
- }
-
- case SparseTensorFormat::CSR: {
- const auto& right_csr =
- checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
- return SparseTensorEqualsImpl<SparseIndexType, SparseCSRIndex>::Compare(
- left, right_csr, opts);
- }
-
- case SparseTensorFormat::CSC: {
- const auto& right_csc =
- checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(right);
- return SparseTensorEqualsImpl<SparseIndexType, SparseCSCIndex>::Compare(
- left, right_csc, opts);
- }
-
- case SparseTensorFormat::CSF: {
- const auto& right_csf =
- checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(right);
- return SparseTensorEqualsImpl<SparseIndexType, SparseCSFIndex>::Compare(
- left, right_csf, opts);
- }
-
- default:
- return false;
- }
-}
-
-} // namespace
-
-bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right,
- const EqualOptions& opts) {
- if (left.type()->id() != right.type()->id()) {
- return false;
- } else if (left.size() == 0 && right.size() == 0) {
- return true;
- } else if (left.shape() != right.shape()) {
- return false;
- } else if (left.non_zero_length() != right.non_zero_length()) {
- return false;
- }
-
- switch (left.format_id()) {
- case SparseTensorFormat::COO: {
- const auto& left_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(left);
- return SparseTensorEqualsImplDispatch(left_coo, right, opts);
- }
-
- case SparseTensorFormat::CSR: {
- const auto& left_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(left);
- return SparseTensorEqualsImplDispatch(left_csr, right, opts);
- }
-
- case SparseTensorFormat::CSC: {
- const auto& left_csc = checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(left);
- return SparseTensorEqualsImplDispatch(left_csc, right, opts);
- }
-
- case SparseTensorFormat::CSF: {
- const auto& left_csf = checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(left);
- return SparseTensorEqualsImplDispatch(left_csf, right, opts);
- }
-
- default:
- return false;
- }
-}
-
-bool TypeEquals(const DataType& left, const DataType& right, bool check_metadata) {
- // The arrays are the same object
- if (&left == &right) {
- return true;
- } else if (left.id() != right.id()) {
- return false;
- } else {
- // First try to compute fingerprints
- if (check_metadata) {
- const auto& left_metadata_fp = left.metadata_fingerprint();
- const auto& right_metadata_fp = right.metadata_fingerprint();
- if (left_metadata_fp != right_metadata_fp) {
- return false;
- }
- }
-
- const auto& left_fp = left.fingerprint();
- const auto& right_fp = right.fingerprint();
- if (!left_fp.empty() && !right_fp.empty()) {
- return left_fp == right_fp;
- }
-
- // TODO remove check_metadata here?
- TypeEqualsVisitor visitor(right, check_metadata);
- auto error = VisitTypeInline(left, &visitor);
- if (!error.ok()) {
- DCHECK(false) << "Types are not comparable: " << error.ToString();
- }
- return visitor.result();
- }
-}
-
-} // namespace arrow
+namespace {
+
+bool StridedIntegerTensorContentEquals(const int dim_index, int64_t left_offset,
+ int64_t right_offset, int elem_size,
+ const Tensor& left, const Tensor& right) {
+ const auto n = left.shape()[dim_index];
+ const auto left_stride = left.strides()[dim_index];
+ const auto right_stride = right.strides()[dim_index];
+ if (dim_index == left.ndim() - 1) {
+ for (int64_t i = 0; i < n; ++i) {
+ if (memcmp(left.raw_data() + left_offset + i * left_stride,
+ right.raw_data() + right_offset + i * right_stride, elem_size) != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+ for (int64_t i = 0; i < n; ++i) {
+ if (!StridedIntegerTensorContentEquals(dim_index + 1, left_offset, right_offset,
+ elem_size, left, right)) {
+ return false;
+ }
+ left_offset += left_stride;
+ right_offset += right_stride;
+ }
+ return true;
+}
+
+bool IntegerTensorEquals(const Tensor& left, const Tensor& right) {
+ bool are_equal;
+ // The arrays are the same object
+ if (&left == &right) {
+ are_equal = true;
+ } else {
+ const bool left_row_major_p = left.is_row_major();
+ const bool left_column_major_p = left.is_column_major();
+ const bool right_row_major_p = right.is_row_major();
+ const bool right_column_major_p = right.is_column_major();
+
+ if (!(left_row_major_p && right_row_major_p) &&
+ !(left_column_major_p && right_column_major_p)) {
+ const auto& type = checked_cast<const FixedWidthType&>(*left.type());
+ are_equal = StridedIntegerTensorContentEquals(0, 0, 0, internal::GetByteWidth(type),
+ left, right);
+ } else {
+ const int byte_width = internal::GetByteWidth(*left.type());
+ DCHECK_GT(byte_width, 0);
+
+ const uint8_t* left_data = left.data()->data();
+ const uint8_t* right_data = right.data()->data();
+
+ are_equal = memcmp(left_data, right_data,
+ static_cast<size_t>(byte_width * left.size())) == 0;
+ }
+ }
+ return are_equal;
+}
+
+template <typename DataType>
+bool StridedFloatTensorContentEquals(const int dim_index, int64_t left_offset,
+ int64_t right_offset, const Tensor& left,
+ const Tensor& right, const EqualOptions& opts) {
+ using c_type = typename DataType::c_type;
+ static_assert(std::is_floating_point<c_type>::value,
+ "DataType must be a floating point type");
+
+ const auto n = left.shape()[dim_index];
+ const auto left_stride = left.strides()[dim_index];
+ const auto right_stride = right.strides()[dim_index];
+ if (dim_index == left.ndim() - 1) {
+ auto left_data = left.raw_data();
+ auto right_data = right.raw_data();
+ if (opts.nans_equal()) {
+ for (int64_t i = 0; i < n; ++i) {
+ c_type left_value =
+ *reinterpret_cast<const c_type*>(left_data + left_offset + i * left_stride);
+ c_type right_value = *reinterpret_cast<const c_type*>(right_data + right_offset +
+ i * right_stride);
+ if (left_value != right_value &&
+ !(std::isnan(left_value) && std::isnan(right_value))) {
+ return false;
+ }
+ }
+ } else {
+ for (int64_t i = 0; i < n; ++i) {
+ c_type left_value =
+ *reinterpret_cast<const c_type*>(left_data + left_offset + i * left_stride);
+ c_type right_value = *reinterpret_cast<const c_type*>(right_data + right_offset +
+ i * right_stride);
+ if (left_value != right_value) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ for (int64_t i = 0; i < n; ++i) {
+ if (!StridedFloatTensorContentEquals<DataType>(dim_index + 1, left_offset,
+ right_offset, left, right, opts)) {
+ return false;
+ }
+ left_offset += left_stride;
+ right_offset += right_stride;
+ }
+ return true;
+}
+
+template <typename DataType>
+bool FloatTensorEquals(const Tensor& left, const Tensor& right,
+ const EqualOptions& opts) {
+ return StridedFloatTensorContentEquals<DataType>(0, 0, 0, left, right, opts);
+}
+
+} // namespace
+
+bool TensorEquals(const Tensor& left, const Tensor& right, const EqualOptions& opts) {
+ if (left.type_id() != right.type_id()) {
+ return false;
+ } else if (left.size() == 0 && right.size() == 0) {
+ return true;
+ } else if (left.shape() != right.shape()) {
+ return false;
+ }
+
+ switch (left.type_id()) {
+ // TODO: Support half-float tensors
+ // case Type::HALF_FLOAT:
+ case Type::FLOAT:
+ return FloatTensorEquals<FloatType>(left, right, opts);
+
+ case Type::DOUBLE:
+ return FloatTensorEquals<DoubleType>(left, right, opts);
+
+ default:
+ return IntegerTensorEquals(left, right);
+ }
+}
+
+namespace {
+
+template <typename LeftSparseIndexType, typename RightSparseIndexType>
+struct SparseTensorEqualsImpl {
+ static bool Compare(const SparseTensorImpl<LeftSparseIndexType>& left,
+ const SparseTensorImpl<RightSparseIndexType>& right,
+ const EqualOptions&) {
+ // TODO(mrkn): should we support the equality among different formats?
+ return false;
+ }
+};
+
+bool IntegerSparseTensorDataEquals(const uint8_t* left_data, const uint8_t* right_data,
+ const int byte_width, const int64_t length) {
+ if (left_data == right_data) {
+ return true;
+ }
+ return memcmp(left_data, right_data, static_cast<size_t>(byte_width * length)) == 0;
+}
+
+template <typename DataType>
+bool FloatSparseTensorDataEquals(const typename DataType::c_type* left_data,
+ const typename DataType::c_type* right_data,
+ const int64_t length, const EqualOptions& opts) {
+ using c_type = typename DataType::c_type;
+ static_assert(std::is_floating_point<c_type>::value,
+ "DataType must be a floating point type");
+ if (opts.nans_equal()) {
+ if (left_data == right_data) {
+ return true;
+ }
+
+ for (int64_t i = 0; i < length; ++i) {
+ const auto left = left_data[i];
+ const auto right = right_data[i];
+ if (left != right && !(std::isnan(left) && std::isnan(right))) {
+ return false;
+ }
+ }
+ } else {
+ for (int64_t i = 0; i < length; ++i) {
+ if (left_data[i] != right_data[i]) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+template <typename SparseIndexType>
+struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
+ static bool Compare(const SparseTensorImpl<SparseIndexType>& left,
+ const SparseTensorImpl<SparseIndexType>& right,
+ const EqualOptions& opts) {
+ DCHECK(left.type()->id() == right.type()->id());
+ DCHECK(left.shape() == right.shape());
+
+ const auto length = left.non_zero_length();
+ DCHECK(length == right.non_zero_length());
+
+ const auto& left_index = checked_cast<const SparseIndexType&>(*left.sparse_index());
+ const auto& right_index = checked_cast<const SparseIndexType&>(*right.sparse_index());
+
+ if (!left_index.Equals(right_index)) {
+ return false;
+ }
+
+ const int byte_width = internal::GetByteWidth(*left.type());
+ DCHECK_GT(byte_width, 0);
+
+ const uint8_t* left_data = left.data()->data();
+ const uint8_t* right_data = right.data()->data();
+ switch (left.type()->id()) {
+ // TODO: Support half-float tensors
+ // case Type::HALF_FLOAT:
+ case Type::FLOAT:
+ return FloatSparseTensorDataEquals<FloatType>(
+ reinterpret_cast<const float*>(left_data),
+ reinterpret_cast<const float*>(right_data), length, opts);
+
+ case Type::DOUBLE:
+ return FloatSparseTensorDataEquals<DoubleType>(
+ reinterpret_cast<const double*>(left_data),
+ reinterpret_cast<const double*>(right_data), length, opts);
+
+ default: // Integer cases
+ return IntegerSparseTensorDataEquals(left_data, right_data, byte_width, length);
+ }
+ }
+};
+
+template <typename SparseIndexType>
+inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexType>& left,
+ const SparseTensor& right,
+ const EqualOptions& opts) {
+ switch (right.format_id()) {
+ case SparseTensorFormat::COO: {
+ const auto& right_coo =
+ checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
+ return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(
+ left, right_coo, opts);
+ }
+
+ case SparseTensorFormat::CSR: {
+ const auto& right_csr =
+ checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
+ return SparseTensorEqualsImpl<SparseIndexType, SparseCSRIndex>::Compare(
+ left, right_csr, opts);
+ }
+
+ case SparseTensorFormat::CSC: {
+ const auto& right_csc =
+ checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(right);
+ return SparseTensorEqualsImpl<SparseIndexType, SparseCSCIndex>::Compare(
+ left, right_csc, opts);
+ }
+
+ case SparseTensorFormat::CSF: {
+ const auto& right_csf =
+ checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(right);
+ return SparseTensorEqualsImpl<SparseIndexType, SparseCSFIndex>::Compare(
+ left, right_csf, opts);
+ }
+
+ default:
+ return false;
+ }
+}
+
+} // namespace
+
+bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right,
+ const EqualOptions& opts) {
+ if (left.type()->id() != right.type()->id()) {
+ return false;
+ } else if (left.size() == 0 && right.size() == 0) {
+ return true;
+ } else if (left.shape() != right.shape()) {
+ return false;
+ } else if (left.non_zero_length() != right.non_zero_length()) {
+ return false;
+ }
+
+ switch (left.format_id()) {
+ case SparseTensorFormat::COO: {
+ const auto& left_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(left);
+ return SparseTensorEqualsImplDispatch(left_coo, right, opts);
+ }
+
+ case SparseTensorFormat::CSR: {
+ const auto& left_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(left);
+ return SparseTensorEqualsImplDispatch(left_csr, right, opts);
+ }
+
+ case SparseTensorFormat::CSC: {
+ const auto& left_csc = checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(left);
+ return SparseTensorEqualsImplDispatch(left_csc, right, opts);
+ }
+
+ case SparseTensorFormat::CSF: {
+ const auto& left_csf = checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(left);
+ return SparseTensorEqualsImplDispatch(left_csf, right, opts);
+ }
+
+ default:
+ return false;
+ }
+}
+
+bool TypeEquals(const DataType& left, const DataType& right, bool check_metadata) {
+ // The arrays are the same object
+ if (&left == &right) {
+ return true;
+ } else if (left.id() != right.id()) {
+ return false;
+ } else {
+ // First try to compute fingerprints
+ if (check_metadata) {
+ const auto& left_metadata_fp = left.metadata_fingerprint();
+ const auto& right_metadata_fp = right.metadata_fingerprint();
+ if (left_metadata_fp != right_metadata_fp) {
+ return false;
+ }
+ }
+
+ const auto& left_fp = left.fingerprint();
+ const auto& right_fp = right.fingerprint();
+ if (!left_fp.empty() && !right_fp.empty()) {
+ return left_fp == right_fp;
+ }
+
+ // TODO remove check_metadata here?
+ TypeEqualsVisitor visitor(right, check_metadata);
+ auto error = VisitTypeInline(left, &visitor);
+ if (!error.ok()) {
+ DCHECK(false) << "Types are not comparable: " << error.ToString();
+ }
+ return visitor.result();
+ }
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compare.h b/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
index 6769b23867b..c4c2c7147ca 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
@@ -1,99 +1,99 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Functions for comparing Arrow data structures
-
-#pragma once
-
-#include <cstdint>
-#include <iosfwd>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class DataType;
-class Tensor;
-class SparseTensor;
-struct Scalar;
-
-static constexpr double kDefaultAbsoluteTolerance = 1E-5;
-
-/// A container of options for equality comparisons
-class EqualOptions {
- public:
- /// Whether or not NaNs are considered equal.
- bool nans_equal() const { return nans_equal_; }
-
- /// Return a new EqualOptions object with the "nans_equal" property changed.
- EqualOptions nans_equal(bool v) const {
- auto res = EqualOptions(*this);
- res.nans_equal_ = v;
- return res;
- }
-
- /// The absolute tolerance for approximate comparisons of floating-point values.
- double atol() const { return atol_; }
-
- /// Return a new EqualOptions object with the "atol" property changed.
- EqualOptions atol(double v) const {
- auto res = EqualOptions(*this);
- res.atol_ = v;
- return res;
- }
-
- /// The ostream to which a diff will be formatted if arrays disagree.
- /// If this is null (the default) no diff will be formatted.
- std::ostream* diff_sink() const { return diff_sink_; }
-
- /// Return a new EqualOptions object with the "diff_sink" property changed.
- /// This option will be ignored if diff formatting of the types of compared arrays is
- /// not supported.
- EqualOptions diff_sink(std::ostream* diff_sink) const {
- auto res = EqualOptions(*this);
- res.diff_sink_ = diff_sink;
- return res;
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for comparing Arrow data structures
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+class Tensor;
+class SparseTensor;
+struct Scalar;
+
+static constexpr double kDefaultAbsoluteTolerance = 1E-5;
+
+/// A container of options for equality comparisons
+class EqualOptions {
+ public:
+ /// Whether or not NaNs are considered equal.
+ bool nans_equal() const { return nans_equal_; }
+
+ /// Return a new EqualOptions object with the "nans_equal" property changed.
+ EqualOptions nans_equal(bool v) const {
+ auto res = EqualOptions(*this);
+ res.nans_equal_ = v;
+ return res;
+ }
+
+ /// The absolute tolerance for approximate comparisons of floating-point values.
+ double atol() const { return atol_; }
+
+ /// Return a new EqualOptions object with the "atol" property changed.
+ EqualOptions atol(double v) const {
+ auto res = EqualOptions(*this);
+ res.atol_ = v;
+ return res;
+ }
+
+ /// The ostream to which a diff will be formatted if arrays disagree.
+ /// If this is null (the default) no diff will be formatted.
+ std::ostream* diff_sink() const { return diff_sink_; }
+
+ /// Return a new EqualOptions object with the "diff_sink" property changed.
+ /// This option will be ignored if diff formatting of the types of compared arrays is
+ /// not supported.
+ EqualOptions diff_sink(std::ostream* diff_sink) const {
+ auto res = EqualOptions(*this);
+ res.diff_sink_ = diff_sink;
+ return res;
+ }
+
static EqualOptions Defaults() { return {}; }
-
- protected:
- double atol_ = kDefaultAbsoluteTolerance;
- bool nans_equal_ = false;
- std::ostream* diff_sink_ = NULLPTR;
-};
-
-/// Returns true if the arrays are exactly equal
-bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right,
- const EqualOptions& = EqualOptions::Defaults());
-
-/// Returns true if the arrays are approximately equal. For non-floating point
-/// types, this is equivalent to ArrayEquals(left, right)
-bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right,
- const EqualOptions& = EqualOptions::Defaults());
-
+
+ protected:
+ double atol_ = kDefaultAbsoluteTolerance;
+ bool nans_equal_ = false;
+ std::ostream* diff_sink_ = NULLPTR;
+};
+
+/// Returns true if the arrays are exactly equal
+bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right,
+ const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if the arrays are approximately equal. For non-floating point
+/// types, this is equivalent to ArrayEquals(left, right)
+bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right,
+ const EqualOptions& = EqualOptions::Defaults());
+
/// Returns true if indicated equal-length segment of arrays are exactly equal
-bool ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
- int64_t start_idx, int64_t end_idx,
+bool ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
+ int64_t start_idx, int64_t end_idx,
int64_t other_start_idx,
const EqualOptions& = EqualOptions::Defaults());
-
+
/// Returns true if indicated equal-length segment of arrays are approximately equal
bool ARROW_EXPORT ArrayRangeApproxEquals(const Array& left, const Array& right,
int64_t start_idx, int64_t end_idx,
@@ -107,21 +107,21 @@ bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right,
bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right,
const EqualOptions& = EqualOptions::Defaults());
-/// Returns true if the type metadata are exactly equal
-/// \param[in] left a DataType
-/// \param[in] right a DataType
-/// \param[in] check_metadata whether to compare KeyValueMetadata for child
-/// fields
-bool ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
- bool check_metadata = true);
-
-/// Returns true if scalars are equal
-/// \param[in] left a Scalar
-/// \param[in] right a Scalar
-/// \param[in] options comparison options
-bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right,
- const EqualOptions& options = EqualOptions::Defaults());
-
+/// Returns true if the type metadata are exactly equal
+/// \param[in] left a DataType
+/// \param[in] right a DataType
+/// \param[in] check_metadata whether to compare KeyValueMetadata for child
+/// fields
+bool ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
+ bool check_metadata = true);
+
+/// Returns true if scalars are equal
+/// \param[in] left a Scalar
+/// \param[in] right a Scalar
+/// \param[in] options comparison options
+bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right,
+ const EqualOptions& options = EqualOptions::Defaults());
+
/// Returns true if scalars are approximately equal
/// \param[in] left a Scalar
/// \param[in] right a Scalar
@@ -130,4 +130,4 @@ bool ARROW_EXPORT
ScalarApproxEquals(const Scalar& left, const Scalar& right,
const EqualOptions& options = EqualOptions::Defaults());
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/config.cc b/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
index b93f207161d..b7ab1800fe3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
@@ -1,51 +1,51 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/config.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/config.h"
#include <cstdint>
-#include "arrow/util/config.h"
+#include "arrow/util/config.h"
#include "arrow/util/cpu_info.h"
-
-namespace arrow {
-
+
+namespace arrow {
+
using internal::CpuInfo;
namespace {
const BuildInfo kBuildInfo = {
- // clang-format off
- ARROW_VERSION,
- ARROW_VERSION_MAJOR,
- ARROW_VERSION_MINOR,
- ARROW_VERSION_PATCH,
- ARROW_VERSION_STRING,
- ARROW_SO_VERSION,
- ARROW_FULL_SO_VERSION,
- ARROW_CXX_COMPILER_ID,
- ARROW_CXX_COMPILER_VERSION,
- ARROW_CXX_COMPILER_FLAGS,
- ARROW_GIT_ID,
- ARROW_GIT_DESCRIPTION,
- ARROW_PACKAGE_KIND,
- // clang-format on
-};
-
+ // clang-format off
+ ARROW_VERSION,
+ ARROW_VERSION_MAJOR,
+ ARROW_VERSION_MINOR,
+ ARROW_VERSION_PATCH,
+ ARROW_VERSION_STRING,
+ ARROW_SO_VERSION,
+ ARROW_FULL_SO_VERSION,
+ ARROW_CXX_COMPILER_ID,
+ ARROW_CXX_COMPILER_VERSION,
+ ARROW_CXX_COMPILER_FLAGS,
+ ARROW_GIT_ID,
+ ARROW_GIT_DESCRIPTION,
+ ARROW_PACKAGE_KIND,
+ // clang-format on
+};
+
template <typename QueryFlagFunction>
std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) {
if (query_flag(CpuInfo::AVX512)) {
@@ -63,8 +63,8 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) {
}; // namespace
-const BuildInfo& GetBuildInfo() { return kBuildInfo; }
-
+const BuildInfo& GetBuildInfo() { return kBuildInfo; }
+
RuntimeInfo GetRuntimeInfo() {
RuntimeInfo info;
auto cpu_info = CpuInfo::GetInstance();
@@ -75,4 +75,4 @@ RuntimeInfo GetRuntimeInfo() {
return info;
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/config.h b/contrib/libs/apache/arrow/cpp/src/arrow/config.h
index 5ae7e223164..fab9d2d8e45 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/config.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/config.h
@@ -1,50 +1,50 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <string>
-
-#include "arrow/util/config.h" // IWYU pragma: export
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-struct BuildInfo {
- /// The packed version number, e.g. 1002003 (decimal) for Arrow 1.2.3
- int version;
- /// The "major" version number, e.g. 1 for Arrow 1.2.3
- int version_major;
- /// The "minor" version number, e.g. 2 for Arrow 1.2.3
- int version_minor;
- /// The "patch" version number, e.g. 3 for Arrow 1.2.3
- int version_patch;
- /// The version string, e.g. "1.2.3"
- std::string version_string;
- std::string so_version;
- std::string full_so_version;
- std::string compiler_id;
- std::string compiler_version;
- std::string compiler_flags;
- std::string git_id;
- std::string git_description;
- std::string package_kind;
-};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/util/config.h" // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+struct BuildInfo {
+ /// The packed version number, e.g. 1002003 (decimal) for Arrow 1.2.3
+ int version;
+ /// The "major" version number, e.g. 1 for Arrow 1.2.3
+ int version_major;
+ /// The "minor" version number, e.g. 2 for Arrow 1.2.3
+ int version_minor;
+ /// The "patch" version number, e.g. 3 for Arrow 1.2.3
+ int version_patch;
+ /// The version string, e.g. "1.2.3"
+ std::string version_string;
+ std::string so_version;
+ std::string full_so_version;
+ std::string compiler_id;
+ std::string compiler_version;
+ std::string compiler_flags;
+ std::string git_id;
+ std::string git_description;
+ std::string package_kind;
+};
+
struct RuntimeInfo {
/// The enabled SIMD level
///
@@ -56,17 +56,17 @@ struct RuntimeInfo {
std::string detected_simd_level;
};
-/// \brief Get runtime build info.
-///
-/// The returned values correspond to exact loaded version of the Arrow library,
-/// rather than the values frozen at application compile-time through the `ARROW_*`
-/// preprocessor definitions.
-ARROW_EXPORT
-const BuildInfo& GetBuildInfo();
-
+/// \brief Get runtime build info.
+///
+/// The returned values correspond to exact loaded version of the Arrow library,
+/// rather than the values frozen at application compile-time through the `ARROW_*`
+/// preprocessor definitions.
+ARROW_EXPORT
+const BuildInfo& GetBuildInfo();
+
/// \brief Get runtime info.
///
ARROW_EXPORT
RuntimeInfo GetRuntimeInfo();
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc b/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
index dd10fce3e4d..5f1c63ba0d8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
@@ -1,62 +1,62 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/datum.h"
-
-#include <cstddef>
-#include <memory>
-#include <sstream>
-#include <vector>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/util.h"
-#include "arrow/chunked_array.h"
-#include "arrow/record_batch.h"
-#include "arrow/scalar.h"
-#include "arrow/table.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/memory.h"
-
-namespace arrow {
-
-static bool CollectionEquals(const std::vector<Datum>& left,
- const std::vector<Datum>& right) {
- if (left.size() != right.size()) {
- return false;
- }
-
- for (size_t i = 0; i < left.size(); i++) {
- if (!left[i].Equals(right[i])) {
- return false;
- }
- }
- return true;
-}
-
-Datum::Datum(const Array& value) : Datum(value.data()) {}
-
-Datum::Datum(const std::shared_ptr<Array>& value)
- : Datum(value ? value->data() : NULLPTR) {}
-
-Datum::Datum(std::shared_ptr<ChunkedArray> value) : value(std::move(value)) {}
-Datum::Datum(std::shared_ptr<RecordBatch> value) : value(std::move(value)) {}
-Datum::Datum(std::shared_ptr<Table> value) : value(std::move(value)) {}
-Datum::Datum(std::vector<Datum> value) : value(std::move(value)) {}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/datum.h"
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/util.h"
+#include "arrow/chunked_array.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/table.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/memory.h"
+
+namespace arrow {
+
+static bool CollectionEquals(const std::vector<Datum>& left,
+ const std::vector<Datum>& right) {
+ if (left.size() != right.size()) {
+ return false;
+ }
+
+ for (size_t i = 0; i < left.size(); i++) {
+ if (!left[i].Equals(right[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Datum::Datum(const Array& value) : Datum(value.data()) {}
+
+Datum::Datum(const std::shared_ptr<Array>& value)
+ : Datum(value ? value->data() : NULLPTR) {}
+
+Datum::Datum(std::shared_ptr<ChunkedArray> value) : value(std::move(value)) {}
+Datum::Datum(std::shared_ptr<RecordBatch> value) : value(std::move(value)) {}
+Datum::Datum(std::shared_ptr<Table> value) : value(std::move(value)) {}
+Datum::Datum(std::vector<Datum> value) : value(std::move(value)) {}
+
Datum::Datum(bool value) : value(std::make_shared<BooleanScalar>(value)) {}
Datum::Datum(int8_t value) : value(std::make_shared<Int8Scalar>(value)) {}
Datum::Datum(uint8_t value) : value(std::make_shared<UInt8Scalar>(value)) {}
@@ -71,34 +71,34 @@ Datum::Datum(double value) : value(std::make_shared<DoubleScalar>(value)) {}
Datum::Datum(std::string value)
: value(std::make_shared<StringScalar>(std::move(value))) {}
Datum::Datum(const char* value) : value(std::make_shared<StringScalar>(value)) {}
-
-Datum::Datum(const ChunkedArray& value)
- : value(std::make_shared<ChunkedArray>(value.chunks(), value.type())) {}
-
-Datum::Datum(const Table& value)
- : value(Table::Make(value.schema(), value.columns(), value.num_rows())) {}
-
-Datum::Datum(const RecordBatch& value)
- : value(RecordBatch::Make(value.schema(), value.num_rows(), value.columns())) {}
-
-std::shared_ptr<Array> Datum::make_array() const {
- DCHECK_EQ(Datum::ARRAY, this->kind());
- return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value));
-}
-
-std::shared_ptr<DataType> Datum::type() const {
- if (this->kind() == Datum::ARRAY) {
- return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
+
+Datum::Datum(const ChunkedArray& value)
+ : value(std::make_shared<ChunkedArray>(value.chunks(), value.type())) {}
+
+Datum::Datum(const Table& value)
+ : value(Table::Make(value.schema(), value.columns(), value.num_rows())) {}
+
+Datum::Datum(const RecordBatch& value)
+ : value(RecordBatch::Make(value.schema(), value.num_rows(), value.columns())) {}
+
+std::shared_ptr<Array> Datum::make_array() const {
+ DCHECK_EQ(Datum::ARRAY, this->kind());
+ return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value));
+}
+
+std::shared_ptr<DataType> Datum::type() const {
+ if (this->kind() == Datum::ARRAY) {
+ return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
}
if (this->kind() == Datum::CHUNKED_ARRAY) {
- return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
+ return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
}
if (this->kind() == Datum::SCALAR) {
- return util::get<std::shared_ptr<Scalar>>(this->value)->type;
- }
+ return util::get<std::shared_ptr<Scalar>>(this->value)->type;
+ }
return nullptr;
-}
-
+}
+
std::shared_ptr<Schema> Datum::schema() const {
if (this->kind() == Datum::RECORD_BATCH) {
return util::get<std::shared_ptr<RecordBatch>>(this->value)->schema();
@@ -109,108 +109,108 @@ std::shared_ptr<Schema> Datum::schema() const {
return nullptr;
}
-int64_t Datum::length() const {
- if (this->kind() == Datum::ARRAY) {
- return util::get<std::shared_ptr<ArrayData>>(this->value)->length;
- } else if (this->kind() == Datum::CHUNKED_ARRAY) {
- return util::get<std::shared_ptr<ChunkedArray>>(this->value)->length();
- } else if (this->kind() == Datum::SCALAR) {
- return 1;
- }
- return kUnknownLength;
-}
-
-int64_t Datum::null_count() const {
- if (this->kind() == Datum::ARRAY) {
- return util::get<std::shared_ptr<ArrayData>>(this->value)->GetNullCount();
- } else if (this->kind() == Datum::CHUNKED_ARRAY) {
- return util::get<std::shared_ptr<ChunkedArray>>(this->value)->null_count();
- } else if (this->kind() == Datum::SCALAR) {
- const auto& val = *util::get<std::shared_ptr<Scalar>>(this->value);
- return val.is_valid ? 0 : 1;
- } else {
- DCHECK(false) << "This function only valid for array-like values";
- return 0;
- }
-}
-
-ArrayVector Datum::chunks() const {
- if (!this->is_arraylike()) {
- return {};
- }
- if (this->is_array()) {
- return {this->make_array()};
- }
- return this->chunked_array()->chunks();
-}
-
-bool Datum::Equals(const Datum& other) const {
- if (this->kind() != other.kind()) return false;
-
- switch (this->kind()) {
- case Datum::NONE:
- return true;
- case Datum::SCALAR:
- return internal::SharedPtrEquals(this->scalar(), other.scalar());
- case Datum::ARRAY:
- return internal::SharedPtrEquals(this->make_array(), other.make_array());
- case Datum::CHUNKED_ARRAY:
- return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array());
- case Datum::RECORD_BATCH:
- return internal::SharedPtrEquals(this->record_batch(), other.record_batch());
- case Datum::TABLE:
- return internal::SharedPtrEquals(this->table(), other.table());
- case Datum::COLLECTION:
- return CollectionEquals(this->collection(), other.collection());
- default:
- return false;
- }
-}
-
-ValueDescr Datum::descr() const {
- if (this->is_arraylike()) {
- return ValueDescr(this->type(), ValueDescr::ARRAY);
- } else if (this->is_scalar()) {
- return ValueDescr(this->type(), ValueDescr::SCALAR);
- } else {
- DCHECK(false) << "Datum is not value-like, this method should not be called";
- return ValueDescr();
- }
-}
-
-ValueDescr::Shape Datum::shape() const {
- if (this->is_arraylike()) {
- return ValueDescr::ARRAY;
- } else if (this->is_scalar()) {
- return ValueDescr::SCALAR;
- } else {
- DCHECK(false) << "Datum is not value-like, this method should not be called";
- return ValueDescr::ANY;
- }
-}
-
-static std::string FormatValueDescr(const ValueDescr& descr) {
- std::stringstream ss;
- switch (descr.shape) {
- case ValueDescr::ANY:
- ss << "any";
- break;
- case ValueDescr::ARRAY:
- ss << "array";
- break;
- case ValueDescr::SCALAR:
- ss << "scalar";
- break;
- default:
- DCHECK(false);
- break;
- }
- ss << "[" << descr.type->ToString() << "]";
- return ss.str();
-}
-
-std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
-
+int64_t Datum::length() const {
+ if (this->kind() == Datum::ARRAY) {
+ return util::get<std::shared_ptr<ArrayData>>(this->value)->length;
+ } else if (this->kind() == Datum::CHUNKED_ARRAY) {
+ return util::get<std::shared_ptr<ChunkedArray>>(this->value)->length();
+ } else if (this->kind() == Datum::SCALAR) {
+ return 1;
+ }
+ return kUnknownLength;
+}
+
+int64_t Datum::null_count() const {
+ if (this->kind() == Datum::ARRAY) {
+ return util::get<std::shared_ptr<ArrayData>>(this->value)->GetNullCount();
+ } else if (this->kind() == Datum::CHUNKED_ARRAY) {
+ return util::get<std::shared_ptr<ChunkedArray>>(this->value)->null_count();
+ } else if (this->kind() == Datum::SCALAR) {
+ const auto& val = *util::get<std::shared_ptr<Scalar>>(this->value);
+ return val.is_valid ? 0 : 1;
+ } else {
+ DCHECK(false) << "This function only valid for array-like values";
+ return 0;
+ }
+}
+
+ArrayVector Datum::chunks() const {
+ if (!this->is_arraylike()) {
+ return {};
+ }
+ if (this->is_array()) {
+ return {this->make_array()};
+ }
+ return this->chunked_array()->chunks();
+}
+
+bool Datum::Equals(const Datum& other) const {
+ if (this->kind() != other.kind()) return false;
+
+ switch (this->kind()) {
+ case Datum::NONE:
+ return true;
+ case Datum::SCALAR:
+ return internal::SharedPtrEquals(this->scalar(), other.scalar());
+ case Datum::ARRAY:
+ return internal::SharedPtrEquals(this->make_array(), other.make_array());
+ case Datum::CHUNKED_ARRAY:
+ return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array());
+ case Datum::RECORD_BATCH:
+ return internal::SharedPtrEquals(this->record_batch(), other.record_batch());
+ case Datum::TABLE:
+ return internal::SharedPtrEquals(this->table(), other.table());
+ case Datum::COLLECTION:
+ return CollectionEquals(this->collection(), other.collection());
+ default:
+ return false;
+ }
+}
+
+ValueDescr Datum::descr() const {
+ if (this->is_arraylike()) {
+ return ValueDescr(this->type(), ValueDescr::ARRAY);
+ } else if (this->is_scalar()) {
+ return ValueDescr(this->type(), ValueDescr::SCALAR);
+ } else {
+ DCHECK(false) << "Datum is not value-like, this method should not be called";
+ return ValueDescr();
+ }
+}
+
+ValueDescr::Shape Datum::shape() const {
+ if (this->is_arraylike()) {
+ return ValueDescr::ARRAY;
+ } else if (this->is_scalar()) {
+ return ValueDescr::SCALAR;
+ } else {
+ DCHECK(false) << "Datum is not value-like, this method should not be called";
+ return ValueDescr::ANY;
+ }
+}
+
+static std::string FormatValueDescr(const ValueDescr& descr) {
+ std::stringstream ss;
+ switch (descr.shape) {
+ case ValueDescr::ANY:
+ ss << "any";
+ break;
+ case ValueDescr::ARRAY:
+ ss << "array";
+ break;
+ case ValueDescr::SCALAR:
+ ss << "scalar";
+ break;
+ default:
+ DCHECK(false);
+ break;
+ }
+ ss << "[" << descr.type->ToString() << "]";
+ return ss.str();
+}
+
+std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
+
std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
std::stringstream ss;
ss << "(";
@@ -226,48 +226,48 @@ std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); }
-std::string Datum::ToString() const {
- switch (this->kind()) {
- case Datum::NONE:
- return "nullptr";
- case Datum::SCALAR:
- return "Scalar";
- case Datum::ARRAY:
- return "Array";
- case Datum::CHUNKED_ARRAY:
- return "ChunkedArray";
- case Datum::RECORD_BATCH:
- return "RecordBatch";
- case Datum::TABLE:
- return "Table";
- case Datum::COLLECTION: {
- std::stringstream ss;
- ss << "Collection(";
- const auto& values = this->collection();
- for (size_t i = 0; i < values.size(); ++i) {
- if (i > 0) {
- ss << ", ";
- }
- ss << values[i].ToString();
- }
+std::string Datum::ToString() const {
+ switch (this->kind()) {
+ case Datum::NONE:
+ return "nullptr";
+ case Datum::SCALAR:
+ return "Scalar";
+ case Datum::ARRAY:
+ return "Array";
+ case Datum::CHUNKED_ARRAY:
+ return "ChunkedArray";
+ case Datum::RECORD_BATCH:
+ return "RecordBatch";
+ case Datum::TABLE:
+ return "Table";
+ case Datum::COLLECTION: {
+ std::stringstream ss;
+ ss << "Collection(";
+ const auto& values = this->collection();
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (i > 0) {
+ ss << ", ";
+ }
+ ss << values[i].ToString();
+ }
ss << ')';
- return ss.str();
- }
- default:
- DCHECK(false);
- return "";
- }
-}
-
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
- for (const auto& descr : args) {
- if (descr.shape == ValueDescr::ARRAY) {
+ return ss.str();
+ }
+ default:
+ DCHECK(false);
+ return "";
+ }
+}
+
+ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
+ for (const auto& descr : args) {
+ if (descr.shape == ValueDescr::ARRAY) {
return ValueDescr::ARRAY;
- }
- }
+ }
+ }
return ValueDescr::SCALAR;
-}
-
+}
+
void PrintTo(const Datum& datum, std::ostream* os) {
switch (datum.kind()) {
case Datum::SCALAR:
@@ -281,4 +281,4 @@ void PrintTo(const Datum& datum, std::ostream* os) {
}
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/datum.h b/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
index 6ba6af7f79e..af1e6abd7b6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
@@ -1,281 +1,281 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/data.h"
-#include "arrow/scalar.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/variant.h" // IWYU pragma: export
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class ChunkedArray;
-class RecordBatch;
-class Table;
-
-/// \brief A descriptor type that gives the shape (array or scalar) and
-/// DataType of a Value, but without the data
-struct ARROW_EXPORT ValueDescr {
- std::shared_ptr<DataType> type;
- enum Shape {
- /// \brief Either Array or Scalar
- ANY,
-
- /// \brief Array type
- ARRAY,
-
- /// \brief Only Scalar arguments supported
- SCALAR
- };
-
- Shape shape;
-
- ValueDescr() : shape(ANY) {}
-
- ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
- : type(std::move(type)), shape(shape) {}
-
- ValueDescr(std::shared_ptr<DataType> type) // NOLINT implicit conversion
- : type(std::move(type)), shape(ValueDescr::ANY) {}
-
- /// \brief Convenience constructor for ANY descr
- static ValueDescr Any(std::shared_ptr<DataType> type) {
- return ValueDescr(std::move(type), ANY);
- }
-
- /// \brief Convenience constructor for Value::ARRAY descr
- static ValueDescr Array(std::shared_ptr<DataType> type) {
- return ValueDescr(std::move(type), ARRAY);
- }
-
- /// \brief Convenience constructor for Value::SCALAR descr
- static ValueDescr Scalar(std::shared_ptr<DataType> type) {
- return ValueDescr(std::move(type), SCALAR);
- }
-
- bool operator==(const ValueDescr& other) const {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/scalar.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/variant.h" // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class RecordBatch;
+class Table;
+
+/// \brief A descriptor type that gives the shape (array or scalar) and
+/// DataType of a Value, but without the data
+struct ARROW_EXPORT ValueDescr {
+ std::shared_ptr<DataType> type;
+ enum Shape {
+ /// \brief Either Array or Scalar
+ ANY,
+
+ /// \brief Array type
+ ARRAY,
+
+ /// \brief Only Scalar arguments supported
+ SCALAR
+ };
+
+ Shape shape;
+
+ ValueDescr() : shape(ANY) {}
+
+ ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
+ : type(std::move(type)), shape(shape) {}
+
+ ValueDescr(std::shared_ptr<DataType> type) // NOLINT implicit conversion
+ : type(std::move(type)), shape(ValueDescr::ANY) {}
+
+ /// \brief Convenience constructor for ANY descr
+ static ValueDescr Any(std::shared_ptr<DataType> type) {
+ return ValueDescr(std::move(type), ANY);
+ }
+
+ /// \brief Convenience constructor for Value::ARRAY descr
+ static ValueDescr Array(std::shared_ptr<DataType> type) {
+ return ValueDescr(std::move(type), ARRAY);
+ }
+
+ /// \brief Convenience constructor for Value::SCALAR descr
+ static ValueDescr Scalar(std::shared_ptr<DataType> type) {
+ return ValueDescr(std::move(type), SCALAR);
+ }
+
+ bool operator==(const ValueDescr& other) const {
if (shape != other.shape) return false;
if (type == other.type) return true;
return type && type->Equals(other.type);
- }
-
- bool operator!=(const ValueDescr& other) const { return !(*this == other); }
-
- std::string ToString() const;
+ }
+
+ bool operator!=(const ValueDescr& other) const { return !(*this == other); }
+
+ std::string ToString() const;
static std::string ToString(const std::vector<ValueDescr>&);
ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
-};
-
-/// \brief For use with scalar functions, returns the broadcasted Value::Shape
-/// given a vector of value descriptors. Return SCALAR unless any value is
-/// ARRAY
-ARROW_EXPORT
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
-
-/// \class Datum
-/// \brief Variant type for various Arrow C++ data structures
-struct ARROW_EXPORT Datum {
- enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION };
-
+};
+
+/// \brief For use with scalar functions, returns the broadcasted Value::Shape
+/// given a vector of value descriptors. Return SCALAR unless any value is
+/// ARRAY
+ARROW_EXPORT
+ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
+
+/// \class Datum
+/// \brief Variant type for various Arrow C++ data structures
+struct ARROW_EXPORT Datum {
+ enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION };
+
struct Empty {};
- // Datums variants may have a length. This special value indicate that the
- // current variant does not have a length.
- static constexpr int64_t kUnknownLength = -1;
-
+ // Datums variants may have a length. This special value indicate that the
+ // current variant does not have a length.
+ static constexpr int64_t kUnknownLength = -1;
+
util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
- std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
- std::shared_ptr<Table>, std::vector<Datum>>
- value;
-
- /// \brief Empty datum, to be populated elsewhere
+ std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
+ std::shared_ptr<Table>, std::vector<Datum>>
+ value;
+
+ /// \brief Empty datum, to be populated elsewhere
Datum() = default;
-
+
Datum(const Datum& other) = default;
Datum& operator=(const Datum& other) = default;
Datum(Datum&& other) = default;
Datum& operator=(Datum&& other) = default;
- Datum(std::shared_ptr<Scalar> value) // NOLINT implicit conversion
- : value(std::move(value)) {}
-
- Datum(std::shared_ptr<ArrayData> value) // NOLINT implicit conversion
- : value(std::move(value)) {}
-
- Datum(ArrayData arg) // NOLINT implicit conversion
- : value(std::make_shared<ArrayData>(std::move(arg))) {}
-
- Datum(const Array& value); // NOLINT implicit conversion
- Datum(const std::shared_ptr<Array>& value); // NOLINT implicit conversion
- Datum(std::shared_ptr<ChunkedArray> value); // NOLINT implicit conversion
- Datum(std::shared_ptr<RecordBatch> value); // NOLINT implicit conversion
- Datum(std::shared_ptr<Table> value); // NOLINT implicit conversion
- Datum(std::vector<Datum> value); // NOLINT implicit conversion
-
- // Explicit constructors from const-refs. Can be expensive, prefer the
- // shared_ptr constructors
- explicit Datum(const ChunkedArray& value);
- explicit Datum(const RecordBatch& value);
- explicit Datum(const Table& value);
-
- // Cast from subtypes of Array to Datum
- template <typename T, typename = enable_if_t<std::is_base_of<Array, T>::value>>
- Datum(const std::shared_ptr<T>& value) // NOLINT implicit conversion
- : Datum(std::shared_ptr<Array>(value)) {}
-
- // Convenience constructors
- explicit Datum(bool value);
- explicit Datum(int8_t value);
- explicit Datum(uint8_t value);
- explicit Datum(int16_t value);
- explicit Datum(uint16_t value);
- explicit Datum(int32_t value);
- explicit Datum(uint32_t value);
- explicit Datum(int64_t value);
- explicit Datum(uint64_t value);
- explicit Datum(float value);
- explicit Datum(double value);
+ Datum(std::shared_ptr<Scalar> value) // NOLINT implicit conversion
+ : value(std::move(value)) {}
+
+ Datum(std::shared_ptr<ArrayData> value) // NOLINT implicit conversion
+ : value(std::move(value)) {}
+
+ Datum(ArrayData arg) // NOLINT implicit conversion
+ : value(std::make_shared<ArrayData>(std::move(arg))) {}
+
+ Datum(const Array& value); // NOLINT implicit conversion
+ Datum(const std::shared_ptr<Array>& value); // NOLINT implicit conversion
+ Datum(std::shared_ptr<ChunkedArray> value); // NOLINT implicit conversion
+ Datum(std::shared_ptr<RecordBatch> value); // NOLINT implicit conversion
+ Datum(std::shared_ptr<Table> value); // NOLINT implicit conversion
+ Datum(std::vector<Datum> value); // NOLINT implicit conversion
+
+ // Explicit constructors from const-refs. Can be expensive, prefer the
+ // shared_ptr constructors
+ explicit Datum(const ChunkedArray& value);
+ explicit Datum(const RecordBatch& value);
+ explicit Datum(const Table& value);
+
+ // Cast from subtypes of Array to Datum
+ template <typename T, typename = enable_if_t<std::is_base_of<Array, T>::value>>
+ Datum(const std::shared_ptr<T>& value) // NOLINT implicit conversion
+ : Datum(std::shared_ptr<Array>(value)) {}
+
+ // Convenience constructors
+ explicit Datum(bool value);
+ explicit Datum(int8_t value);
+ explicit Datum(uint8_t value);
+ explicit Datum(int16_t value);
+ explicit Datum(uint16_t value);
+ explicit Datum(int32_t value);
+ explicit Datum(uint32_t value);
+ explicit Datum(int64_t value);
+ explicit Datum(uint64_t value);
+ explicit Datum(float value);
+ explicit Datum(double value);
explicit Datum(std::string value);
explicit Datum(const char* value);
-
- Datum::Kind kind() const {
- switch (this->value.index()) {
- case 0:
- return Datum::NONE;
- case 1:
- return Datum::SCALAR;
- case 2:
- return Datum::ARRAY;
- case 3:
- return Datum::CHUNKED_ARRAY;
- case 4:
- return Datum::RECORD_BATCH;
- case 5:
- return Datum::TABLE;
- case 6:
- return Datum::COLLECTION;
- default:
- return Datum::NONE;
- }
- }
-
- const std::shared_ptr<ArrayData>& array() const {
- return util::get<std::shared_ptr<ArrayData>>(this->value);
- }
-
- ArrayData* mutable_array() const { return this->array().get(); }
-
- std::shared_ptr<Array> make_array() const;
-
- const std::shared_ptr<ChunkedArray>& chunked_array() const {
- return util::get<std::shared_ptr<ChunkedArray>>(this->value);
- }
-
- const std::shared_ptr<RecordBatch>& record_batch() const {
- return util::get<std::shared_ptr<RecordBatch>>(this->value);
- }
-
- const std::shared_ptr<Table>& table() const {
- return util::get<std::shared_ptr<Table>>(this->value);
- }
-
- const std::vector<Datum>& collection() const {
- return util::get<std::vector<Datum>>(this->value);
- }
-
- const std::shared_ptr<Scalar>& scalar() const {
- return util::get<std::shared_ptr<Scalar>>(this->value);
- }
-
- template <typename ExactType>
+
+ Datum::Kind kind() const {
+ switch (this->value.index()) {
+ case 0:
+ return Datum::NONE;
+ case 1:
+ return Datum::SCALAR;
+ case 2:
+ return Datum::ARRAY;
+ case 3:
+ return Datum::CHUNKED_ARRAY;
+ case 4:
+ return Datum::RECORD_BATCH;
+ case 5:
+ return Datum::TABLE;
+ case 6:
+ return Datum::COLLECTION;
+ default:
+ return Datum::NONE;
+ }
+ }
+
+ const std::shared_ptr<ArrayData>& array() const {
+ return util::get<std::shared_ptr<ArrayData>>(this->value);
+ }
+
+ ArrayData* mutable_array() const { return this->array().get(); }
+
+ std::shared_ptr<Array> make_array() const;
+
+ const std::shared_ptr<ChunkedArray>& chunked_array() const {
+ return util::get<std::shared_ptr<ChunkedArray>>(this->value);
+ }
+
+ const std::shared_ptr<RecordBatch>& record_batch() const {
+ return util::get<std::shared_ptr<RecordBatch>>(this->value);
+ }
+
+ const std::shared_ptr<Table>& table() const {
+ return util::get<std::shared_ptr<Table>>(this->value);
+ }
+
+ const std::vector<Datum>& collection() const {
+ return util::get<std::vector<Datum>>(this->value);
+ }
+
+ const std::shared_ptr<Scalar>& scalar() const {
+ return util::get<std::shared_ptr<Scalar>>(this->value);
+ }
+
+ template <typename ExactType>
std::shared_ptr<ExactType> array_as() const {
return internal::checked_pointer_cast<ExactType>(this->make_array());
}
template <typename ExactType>
- const ExactType& scalar_as() const {
- return internal::checked_cast<const ExactType&>(*this->scalar());
- }
-
- bool is_array() const { return this->kind() == Datum::ARRAY; }
-
- bool is_arraylike() const {
- return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
- }
-
- bool is_scalar() const { return this->kind() == Datum::SCALAR; }
-
- /// \brief True if Datum contains a scalar or array-like data
- bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
-
- bool is_collection() const { return this->kind() == Datum::COLLECTION; }
-
- int64_t null_count() const;
-
- /// \brief Return the shape (array or scalar) and type for supported kinds
- /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
- ValueDescr descr() const;
-
- /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
- /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
- ValueDescr::Shape shape() const;
-
- /// \brief The value type of the variant, if any
- ///
- /// \return nullptr if no type
- std::shared_ptr<DataType> type() const;
-
+ const ExactType& scalar_as() const {
+ return internal::checked_cast<const ExactType&>(*this->scalar());
+ }
+
+ bool is_array() const { return this->kind() == Datum::ARRAY; }
+
+ bool is_arraylike() const {
+ return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
+ }
+
+ bool is_scalar() const { return this->kind() == Datum::SCALAR; }
+
+ /// \brief True if Datum contains a scalar or array-like data
+ bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
+
+ bool is_collection() const { return this->kind() == Datum::COLLECTION; }
+
+ int64_t null_count() const;
+
+ /// \brief Return the shape (array or scalar) and type for supported kinds
+ /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
+ ValueDescr descr() const;
+
+ /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
+ /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
+ ValueDescr::Shape shape() const;
+
+ /// \brief The value type of the variant, if any
+ ///
+ /// \return nullptr if no type
+ std::shared_ptr<DataType> type() const;
+
/// \brief The schema of the variant, if any
///
/// \return nullptr if no schema
std::shared_ptr<Schema> schema() const;
- /// \brief The value length of the variant, if any
- ///
- /// \return kUnknownLength if no type
- int64_t length() const;
-
- /// \brief The array chunks of the variant, if any
- ///
- /// \return empty if not arraylike
- ArrayVector chunks() const;
-
- bool Equals(const Datum& other) const;
-
- bool operator==(const Datum& other) const { return Equals(other); }
- bool operator!=(const Datum& other) const { return !Equals(other); }
-
- std::string ToString() const;
+ /// \brief The value length of the variant, if any
+ ///
+ /// \return kUnknownLength if no type
+ int64_t length() const;
+
+ /// \brief The array chunks of the variant, if any
+ ///
+ /// \return empty if not arraylike
+ ArrayVector chunks() const;
+
+ bool Equals(const Datum& other) const;
+
+ bool operator==(const Datum& other) const { return Equals(other); }
+ bool operator!=(const Datum& other) const { return !Equals(other); }
+
+ std::string ToString() const;
ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*);
-};
-
-} // namespace arrow
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/device.cc b/contrib/libs/apache/arrow/cpp/src/arrow/device.cc
index 1aead49bfb1..798335d9ff0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/device.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/device.cc
@@ -1,209 +1,209 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/device.h"
-
-#include <cstring>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/memory.h"
-#include "arrow/result.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-MemoryManager::~MemoryManager() {}
-
-Device::~Device() {}
-
-#define COPY_BUFFER_SUCCESS(maybe_buffer) \
- ((maybe_buffer).ok() && *(maybe_buffer) != nullptr)
-
-#define COPY_BUFFER_RETURN(maybe_buffer, to) \
- if (!maybe_buffer.ok()) { \
- return maybe_buffer; \
- } \
- if (COPY_BUFFER_SUCCESS(maybe_buffer)) { \
- DCHECK_EQ(*(**maybe_buffer).device(), *to->device()); \
- return maybe_buffer; \
- }
-
-Result<std::shared_ptr<Buffer>> MemoryManager::CopyBuffer(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- const auto& from = buf->memory_manager();
- auto maybe_buffer = to->CopyBufferFrom(buf, from);
- COPY_BUFFER_RETURN(maybe_buffer, to);
- // `to` doesn't support copying from `from`, try the other way
- maybe_buffer = from->CopyBufferTo(buf, to);
- COPY_BUFFER_RETURN(maybe_buffer, to);
- if (!from->is_cpu() && !to->is_cpu()) {
- // Try an intermediate view on the CPU
- auto cpu_mm = default_cpu_memory_manager();
- maybe_buffer = from->ViewBufferTo(buf, cpu_mm);
- if (!COPY_BUFFER_SUCCESS(maybe_buffer)) {
- // View failed, try a copy instead
- // XXX should we have a MemoryManager::IsCopySupportedTo(MemoryManager)
- // to avoid copying to CPU if copy from CPU to dest is unsupported?
- maybe_buffer = from->CopyBufferTo(buf, cpu_mm);
- }
- if (COPY_BUFFER_SUCCESS(maybe_buffer)) {
- // Copy from source to CPU succeeded, now try to copy from CPU into dest
- maybe_buffer = to->CopyBufferFrom(*maybe_buffer, cpu_mm);
- if (COPY_BUFFER_SUCCESS(maybe_buffer)) {
- return maybe_buffer;
- }
- }
- }
-
- return Status::NotImplemented("Copying buffer from ", from->device()->ToString(),
- " to ", to->device()->ToString(), " not supported");
-}
-
-Result<std::shared_ptr<Buffer>> MemoryManager::ViewBuffer(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- if (buf->memory_manager() == to) {
- return buf;
- }
- const auto& from = buf->memory_manager();
- auto maybe_buffer = to->ViewBufferFrom(buf, from);
- COPY_BUFFER_RETURN(maybe_buffer, to);
- // `to` doesn't support viewing from `from`, try the other way
- maybe_buffer = from->ViewBufferTo(buf, to);
- COPY_BUFFER_RETURN(maybe_buffer, to);
-
- return Status::NotImplemented("Viewing buffer from ", from->device()->ToString(),
- " on ", to->device()->ToString(), " not supported");
-}
-
-#undef COPY_BUFFER_RETURN
-#undef COPY_BUFFER_SUCCESS
-
-Result<std::shared_ptr<Buffer>> MemoryManager::CopyBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
- return nullptr;
-}
-
-Result<std::shared_ptr<Buffer>> MemoryManager::CopyBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- return nullptr;
-}
-
-Result<std::shared_ptr<Buffer>> MemoryManager::ViewBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
- return nullptr;
-}
-
-Result<std::shared_ptr<Buffer>> MemoryManager::ViewBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- return nullptr;
-}
-
-// ----------------------------------------------------------------------
-// CPU backend implementation
-
-namespace {
-const char kCPUDeviceTypeName[] = "arrow::CPUDevice";
-}
-
-std::shared_ptr<MemoryManager> CPUMemoryManager::Make(
- const std::shared_ptr<Device>& device, MemoryPool* pool) {
- return std::shared_ptr<MemoryManager>(new CPUMemoryManager(device, pool));
-}
-
-Result<std::shared_ptr<io::RandomAccessFile>> CPUMemoryManager::GetBufferReader(
- std::shared_ptr<Buffer> buf) {
- return std::make_shared<io::BufferReader>(std::move(buf));
-}
-
-Result<std::shared_ptr<io::OutputStream>> CPUMemoryManager::GetBufferWriter(
- std::shared_ptr<Buffer> buf) {
- return std::make_shared<io::FixedSizeBufferWriter>(std::move(buf));
-}
-
-Result<std::shared_ptr<Buffer>> CPUMemoryManager::AllocateBuffer(int64_t size) {
- return ::arrow::AllocateBuffer(size, pool_);
-}
-
-Result<std::shared_ptr<Buffer>> CPUMemoryManager::CopyBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
- if (!from->is_cpu()) {
- return nullptr;
- }
- ARROW_ASSIGN_OR_RAISE(auto dest, ::arrow::AllocateBuffer(buf->size(), pool_));
- if (buf->size() > 0) {
- memcpy(dest->mutable_data(), buf->data(), static_cast<size_t>(buf->size()));
- }
- return std::move(dest);
-}
-
-Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
- if (!from->is_cpu()) {
- return nullptr;
- }
- return buf;
-}
-
-Result<std::shared_ptr<Buffer>> CPUMemoryManager::CopyBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- if (!to->is_cpu()) {
- return nullptr;
- }
- ARROW_ASSIGN_OR_RAISE(auto dest, ::arrow::AllocateBuffer(buf->size(), pool_));
- if (buf->size() > 0) {
- memcpy(dest->mutable_data(), buf->data(), static_cast<size_t>(buf->size()));
- }
- return std::move(dest);
-}
-
-Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
- if (!to->is_cpu()) {
- return nullptr;
- }
- return buf;
-}
-
-std::shared_ptr<MemoryManager> default_cpu_memory_manager() {
- static auto instance =
- CPUMemoryManager::Make(CPUDevice::Instance(), default_memory_pool());
- return instance;
-}
-
-std::shared_ptr<Device> CPUDevice::Instance() {
- static auto instance = std::shared_ptr<Device>(new CPUDevice());
- return instance;
-}
-
-const char* CPUDevice::type_name() const { return kCPUDeviceTypeName; }
-
-std::string CPUDevice::ToString() const { return "CPUDevice()"; }
-
-bool CPUDevice::Equals(const Device& other) const {
- return other.type_name() == kCPUDeviceTypeName;
-}
-
-std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool) {
- return CPUMemoryManager::Make(Instance(), pool);
-}
-
-std::shared_ptr<MemoryManager> CPUDevice::default_memory_manager() {
- return default_cpu_memory_manager();
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/device.h"
+
+#include <cstring>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/memory.h"
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+MemoryManager::~MemoryManager() {}
+
+Device::~Device() {}
+
+#define COPY_BUFFER_SUCCESS(maybe_buffer) \
+ ((maybe_buffer).ok() && *(maybe_buffer) != nullptr)
+
+#define COPY_BUFFER_RETURN(maybe_buffer, to) \
+ if (!maybe_buffer.ok()) { \
+ return maybe_buffer; \
+ } \
+ if (COPY_BUFFER_SUCCESS(maybe_buffer)) { \
+ DCHECK_EQ(*(**maybe_buffer).device(), *to->device()); \
+ return maybe_buffer; \
+ }
+
+Result<std::shared_ptr<Buffer>> MemoryManager::CopyBuffer(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ const auto& from = buf->memory_manager();
+ auto maybe_buffer = to->CopyBufferFrom(buf, from);
+ COPY_BUFFER_RETURN(maybe_buffer, to);
+ // `to` doesn't support copying from `from`, try the other way
+ maybe_buffer = from->CopyBufferTo(buf, to);
+ COPY_BUFFER_RETURN(maybe_buffer, to);
+ if (!from->is_cpu() && !to->is_cpu()) {
+ // Try an intermediate view on the CPU
+ auto cpu_mm = default_cpu_memory_manager();
+ maybe_buffer = from->ViewBufferTo(buf, cpu_mm);
+ if (!COPY_BUFFER_SUCCESS(maybe_buffer)) {
+ // View failed, try a copy instead
+ // XXX should we have a MemoryManager::IsCopySupportedTo(MemoryManager)
+ // to avoid copying to CPU if copy from CPU to dest is unsupported?
+ maybe_buffer = from->CopyBufferTo(buf, cpu_mm);
+ }
+ if (COPY_BUFFER_SUCCESS(maybe_buffer)) {
+ // Copy from source to CPU succeeded, now try to copy from CPU into dest
+ maybe_buffer = to->CopyBufferFrom(*maybe_buffer, cpu_mm);
+ if (COPY_BUFFER_SUCCESS(maybe_buffer)) {
+ return maybe_buffer;
+ }
+ }
+ }
+
+ return Status::NotImplemented("Copying buffer from ", from->device()->ToString(),
+ " to ", to->device()->ToString(), " not supported");
+}
+
+Result<std::shared_ptr<Buffer>> MemoryManager::ViewBuffer(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ if (buf->memory_manager() == to) {
+ return buf;
+ }
+ const auto& from = buf->memory_manager();
+ auto maybe_buffer = to->ViewBufferFrom(buf, from);
+ COPY_BUFFER_RETURN(maybe_buffer, to);
+ // `to` doesn't support viewing from `from`, try the other way
+ maybe_buffer = from->ViewBufferTo(buf, to);
+ COPY_BUFFER_RETURN(maybe_buffer, to);
+
+ return Status::NotImplemented("Viewing buffer from ", from->device()->ToString(),
+ " on ", to->device()->ToString(), " not supported");
+}
+
+#undef COPY_BUFFER_RETURN
+#undef COPY_BUFFER_SUCCESS
+
+Result<std::shared_ptr<Buffer>> MemoryManager::CopyBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
+ return nullptr;
+}
+
+Result<std::shared_ptr<Buffer>> MemoryManager::CopyBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ return nullptr;
+}
+
+Result<std::shared_ptr<Buffer>> MemoryManager::ViewBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
+ return nullptr;
+}
+
+Result<std::shared_ptr<Buffer>> MemoryManager::ViewBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ return nullptr;
+}
+
+// ----------------------------------------------------------------------
+// CPU backend implementation
+
+namespace {
+const char kCPUDeviceTypeName[] = "arrow::CPUDevice";
+}
+
+std::shared_ptr<MemoryManager> CPUMemoryManager::Make(
+ const std::shared_ptr<Device>& device, MemoryPool* pool) {
+ return std::shared_ptr<MemoryManager>(new CPUMemoryManager(device, pool));
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> CPUMemoryManager::GetBufferReader(
+ std::shared_ptr<Buffer> buf) {
+ return std::make_shared<io::BufferReader>(std::move(buf));
+}
+
+Result<std::shared_ptr<io::OutputStream>> CPUMemoryManager::GetBufferWriter(
+ std::shared_ptr<Buffer> buf) {
+ return std::make_shared<io::FixedSizeBufferWriter>(std::move(buf));
+}
+
+Result<std::shared_ptr<Buffer>> CPUMemoryManager::AllocateBuffer(int64_t size) {
+ return ::arrow::AllocateBuffer(size, pool_);
+}
+
+Result<std::shared_ptr<Buffer>> CPUMemoryManager::CopyBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
+ if (!from->is_cpu()) {
+ return nullptr;
+ }
+ ARROW_ASSIGN_OR_RAISE(auto dest, ::arrow::AllocateBuffer(buf->size(), pool_));
+ if (buf->size() > 0) {
+ memcpy(dest->mutable_data(), buf->data(), static_cast<size_t>(buf->size()));
+ }
+ return std::move(dest);
+}
+
+Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from) {
+ if (!from->is_cpu()) {
+ return nullptr;
+ }
+ return buf;
+}
+
+Result<std::shared_ptr<Buffer>> CPUMemoryManager::CopyBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ if (!to->is_cpu()) {
+ return nullptr;
+ }
+ ARROW_ASSIGN_OR_RAISE(auto dest, ::arrow::AllocateBuffer(buf->size(), pool_));
+ if (buf->size() > 0) {
+ memcpy(dest->mutable_data(), buf->data(), static_cast<size_t>(buf->size()));
+ }
+ return std::move(dest);
+}
+
+Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to) {
+ if (!to->is_cpu()) {
+ return nullptr;
+ }
+ return buf;
+}
+
+std::shared_ptr<MemoryManager> default_cpu_memory_manager() {
+ static auto instance =
+ CPUMemoryManager::Make(CPUDevice::Instance(), default_memory_pool());
+ return instance;
+}
+
+std::shared_ptr<Device> CPUDevice::Instance() {
+ static auto instance = std::shared_ptr<Device>(new CPUDevice());
+ return instance;
+}
+
+const char* CPUDevice::type_name() const { return kCPUDeviceTypeName; }
+
+std::string CPUDevice::ToString() const { return "CPUDevice()"; }
+
+bool CPUDevice::Equals(const Device& other) const {
+ return other.type_name() == kCPUDeviceTypeName;
+}
+
+std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool) {
+ return CPUMemoryManager::Make(Instance(), pool);
+}
+
+std::shared_ptr<MemoryManager> CPUDevice::default_memory_manager() {
+ return default_cpu_memory_manager();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/device.h b/contrib/libs/apache/arrow/cpp/src/arrow/device.h
index 068be483e98..00ad156a3d3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/device.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/device.h
@@ -1,226 +1,226 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-
-#include "arrow/io/type_fwd.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/compare.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class MemoryManager;
-
-/// \brief EXPERIMENTAL: Abstract interface for hardware devices
-///
-/// This object represents a device with access to some memory spaces.
-/// When handling a Buffer or raw memory address, it allows deciding in which
-/// context the raw memory address should be interpreted
-/// (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
-class ARROW_EXPORT Device : public std::enable_shared_from_this<Device>,
- public util::EqualityComparable<Device> {
- public:
- virtual ~Device();
-
- /// \brief A shorthand for this device's type.
- ///
- /// The returned value is different for each device class, but is the
- /// same for all instances of a given class. It can be used as a replacement
- /// for RTTI.
- virtual const char* type_name() const = 0;
-
- /// \brief A human-readable description of the device.
- ///
- /// The returned value should be detailed enough to distinguish between
- /// different instances, where necessary.
- virtual std::string ToString() const = 0;
-
- /// \brief Whether this instance points to the same device as another one.
- virtual bool Equals(const Device&) const = 0;
-
- /// \brief Whether this device is the main CPU device.
- ///
- /// This shorthand method is very useful when deciding whether a memory address
- /// is CPU-accessible.
- bool is_cpu() const { return is_cpu_; }
-
- /// \brief Return a MemoryManager instance tied to this device
- ///
- /// The returned instance uses default parameters for this device type's
- /// MemoryManager implementation. Some devices also allow constructing
- /// MemoryManager instances with non-default parameters.
- virtual std::shared_ptr<MemoryManager> default_memory_manager() = 0;
-
- protected:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Device);
- explicit Device(bool is_cpu = false) : is_cpu_(is_cpu) {}
-
- bool is_cpu_;
-};
-
-/// \brief EXPERIMENTAL: An object that provides memory management primitives
-///
-/// A MemoryManager is always tied to a particular Device instance.
-/// It can also have additional parameters (such as a MemoryPool to
-/// allocate CPU memory).
-class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this<MemoryManager> {
- public:
- virtual ~MemoryManager();
-
- /// \brief The device this MemoryManager is tied to
- const std::shared_ptr<Device>& device() const { return device_; }
-
- /// \brief Whether this MemoryManager is tied to the main CPU device.
- ///
- /// This shorthand method is very useful when deciding whether a memory address
- /// is CPU-accessible.
- bool is_cpu() const { return device_->is_cpu(); }
-
- /// \brief Create a RandomAccessFile to read a particular buffer.
- ///
- /// The given buffer must be tied to this MemoryManager.
- ///
- /// See also the Buffer::GetReader shorthand.
- virtual Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
- std::shared_ptr<Buffer> buf) = 0;
-
- /// \brief Create a OutputStream to write to a particular buffer.
- ///
- /// The given buffer must be mutable and tied to this MemoryManager.
- /// The returned stream object writes into the buffer's underlying memory
- /// (but it won't resize it).
- ///
- /// See also the Buffer::GetWriter shorthand.
- virtual Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
- std::shared_ptr<Buffer> buf) = 0;
-
- /// \brief Allocate a (mutable) Buffer
- ///
- /// The buffer will be allocated in the device's memory.
- virtual Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) = 0;
-
- // XXX Should this take a `const Buffer&` instead
- /// \brief Copy a Buffer to a destination MemoryManager
- ///
- /// See also the Buffer::Copy shorthand.
- static Result<std::shared_ptr<Buffer>> CopyBuffer(
- const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
-
- /// \brief Make a no-copy Buffer view in a destination MemoryManager
- ///
- /// See also the Buffer::View shorthand.
- static Result<std::shared_ptr<Buffer>> ViewBuffer(
- const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
-
- protected:
- ARROW_DISALLOW_COPY_AND_ASSIGN(MemoryManager);
-
- explicit MemoryManager(const std::shared_ptr<Device>& device) : device_(device) {}
-
- // Default implementations always return nullptr, should be overridden
- // by subclasses that support data transfer.
- // (returning nullptr means unsupported copy / view)
- // In CopyBufferFrom and ViewBufferFrom, the `from` parameter is guaranteed to
- // be equal to `buf->memory_manager()`.
- virtual Result<std::shared_ptr<Buffer>> CopyBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
- virtual Result<std::shared_ptr<Buffer>> CopyBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
- virtual Result<std::shared_ptr<Buffer>> ViewBufferFrom(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
- virtual Result<std::shared_ptr<Buffer>> ViewBufferTo(
- const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
-
- std::shared_ptr<Device> device_;
-};
-
-// ----------------------------------------------------------------------
-// CPU backend implementation
-
-class ARROW_EXPORT CPUDevice : public Device {
- public:
- const char* type_name() const override;
- std::string ToString() const override;
- bool Equals(const Device&) const override;
-
- std::shared_ptr<MemoryManager> default_memory_manager() override;
-
- /// \brief Return the global CPUDevice instance
- static std::shared_ptr<Device> Instance();
-
- /// \brief Create a MemoryManager
- ///
- /// The returned MemoryManager will use the given MemoryPool for allocations.
- static std::shared_ptr<MemoryManager> memory_manager(MemoryPool* pool);
-
- protected:
- CPUDevice() : Device(true) {}
-};
-
-class ARROW_EXPORT CPUMemoryManager : public MemoryManager {
- public:
- Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
- std::shared_ptr<Buffer> buf) override;
- Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
- std::shared_ptr<Buffer> buf) override;
-
- Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) override;
-
- /// \brief Return the MemoryPool associated with this MemoryManager.
- MemoryPool* pool() const { return pool_; }
-
- protected:
- CPUMemoryManager(const std::shared_ptr<Device>& device, MemoryPool* pool)
- : MemoryManager(device), pool_(pool) {}
-
- static std::shared_ptr<MemoryManager> Make(const std::shared_ptr<Device>& device,
- MemoryPool* pool = default_memory_pool());
-
- Result<std::shared_ptr<Buffer>> CopyBufferFrom(
- const std::shared_ptr<Buffer>& buf,
- const std::shared_ptr<MemoryManager>& from) override;
- Result<std::shared_ptr<Buffer>> CopyBufferTo(
- const std::shared_ptr<Buffer>& buf,
- const std::shared_ptr<MemoryManager>& to) override;
- Result<std::shared_ptr<Buffer>> ViewBufferFrom(
- const std::shared_ptr<Buffer>& buf,
- const std::shared_ptr<MemoryManager>& from) override;
- Result<std::shared_ptr<Buffer>> ViewBufferTo(
- const std::shared_ptr<Buffer>& buf,
- const std::shared_ptr<MemoryManager>& to) override;
-
- MemoryPool* pool_;
-
- friend std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool);
- friend ARROW_EXPORT std::shared_ptr<MemoryManager> default_cpu_memory_manager();
-};
-
-/// \brief Return the default CPU MemoryManager instance
-///
-/// The returned singleton instance uses the default MemoryPool.
-/// This function is a faster spelling of
-/// `CPUDevice::Instance()->default_memory_manager()`.
-ARROW_EXPORT
-std::shared_ptr<MemoryManager> default_cpu_memory_manager();
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/io/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryManager;
+
+/// \brief EXPERIMENTAL: Abstract interface for hardware devices
+///
+/// This object represents a device with access to some memory spaces.
+/// When handling a Buffer or raw memory address, it allows deciding in which
+/// context the raw memory address should be interpreted
+/// (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+class ARROW_EXPORT Device : public std::enable_shared_from_this<Device>,
+ public util::EqualityComparable<Device> {
+ public:
+ virtual ~Device();
+
+ /// \brief A shorthand for this device's type.
+ ///
+ /// The returned value is different for each device class, but is the
+ /// same for all instances of a given class. It can be used as a replacement
+ /// for RTTI.
+ virtual const char* type_name() const = 0;
+
+ /// \brief A human-readable description of the device.
+ ///
+ /// The returned value should be detailed enough to distinguish between
+ /// different instances, where necessary.
+ virtual std::string ToString() const = 0;
+
+ /// \brief Whether this instance points to the same device as another one.
+ virtual bool Equals(const Device&) const = 0;
+
+ /// \brief Whether this device is the main CPU device.
+ ///
+ /// This shorthand method is very useful when deciding whether a memory address
+ /// is CPU-accessible.
+ bool is_cpu() const { return is_cpu_; }
+
+ /// \brief Return a MemoryManager instance tied to this device
+ ///
+ /// The returned instance uses default parameters for this device type's
+ /// MemoryManager implementation. Some devices also allow constructing
+ /// MemoryManager instances with non-default parameters.
+ virtual std::shared_ptr<MemoryManager> default_memory_manager() = 0;
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Device);
+ explicit Device(bool is_cpu = false) : is_cpu_(is_cpu) {}
+
+ bool is_cpu_;
+};
+
+/// \brief EXPERIMENTAL: An object that provides memory management primitives
+///
+/// A MemoryManager is always tied to a particular Device instance.
+/// It can also have additional parameters (such as a MemoryPool to
+/// allocate CPU memory).
+class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this<MemoryManager> {
+ public:
+ virtual ~MemoryManager();
+
+ /// \brief The device this MemoryManager is tied to
+ const std::shared_ptr<Device>& device() const { return device_; }
+
+ /// \brief Whether this MemoryManager is tied to the main CPU device.
+ ///
+ /// This shorthand method is very useful when deciding whether a memory address
+ /// is CPU-accessible.
+ bool is_cpu() const { return device_->is_cpu(); }
+
+ /// \brief Create a RandomAccessFile to read a particular buffer.
+ ///
+ /// The given buffer must be tied to this MemoryManager.
+ ///
+ /// See also the Buffer::GetReader shorthand.
+ virtual Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
+ std::shared_ptr<Buffer> buf) = 0;
+
+ /// \brief Create a OutputStream to write to a particular buffer.
+ ///
+ /// The given buffer must be mutable and tied to this MemoryManager.
+ /// The returned stream object writes into the buffer's underlying memory
+ /// (but it won't resize it).
+ ///
+ /// See also the Buffer::GetWriter shorthand.
+ virtual Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
+ std::shared_ptr<Buffer> buf) = 0;
+
+ /// \brief Allocate a (mutable) Buffer
+ ///
+ /// The buffer will be allocated in the device's memory.
+ virtual Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) = 0;
+
+ // XXX Should this take a `const Buffer&` instead
+ /// \brief Copy a Buffer to a destination MemoryManager
+ ///
+ /// See also the Buffer::Copy shorthand.
+ static Result<std::shared_ptr<Buffer>> CopyBuffer(
+ const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
+
+ /// \brief Make a no-copy Buffer view in a destination MemoryManager
+ ///
+ /// See also the Buffer::View shorthand.
+ static Result<std::shared_ptr<Buffer>> ViewBuffer(
+ const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(MemoryManager);
+
+ explicit MemoryManager(const std::shared_ptr<Device>& device) : device_(device) {}
+
+ // Default implementations always return nullptr, should be overridden
+ // by subclasses that support data transfer.
+ // (returning nullptr means unsupported copy / view)
+ // In CopyBufferFrom and ViewBufferFrom, the `from` parameter is guaranteed to
+ // be equal to `buf->memory_manager()`.
+ virtual Result<std::shared_ptr<Buffer>> CopyBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
+ virtual Result<std::shared_ptr<Buffer>> CopyBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
+ virtual Result<std::shared_ptr<Buffer>> ViewBufferFrom(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
+ virtual Result<std::shared_ptr<Buffer>> ViewBufferTo(
+ const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
+
+ std::shared_ptr<Device> device_;
+};
+
+// ----------------------------------------------------------------------
+// CPU backend implementation
+
+class ARROW_EXPORT CPUDevice : public Device {
+ public:
+ const char* type_name() const override;
+ std::string ToString() const override;
+ bool Equals(const Device&) const override;
+
+ std::shared_ptr<MemoryManager> default_memory_manager() override;
+
+ /// \brief Return the global CPUDevice instance
+ static std::shared_ptr<Device> Instance();
+
+ /// \brief Create a MemoryManager
+ ///
+ /// The returned MemoryManager will use the given MemoryPool for allocations.
+ static std::shared_ptr<MemoryManager> memory_manager(MemoryPool* pool);
+
+ protected:
+ CPUDevice() : Device(true) {}
+};
+
+class ARROW_EXPORT CPUMemoryManager : public MemoryManager {
+ public:
+ Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
+ std::shared_ptr<Buffer> buf) override;
+ Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
+ std::shared_ptr<Buffer> buf) override;
+
+ Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) override;
+
+ /// \brief Return the MemoryPool associated with this MemoryManager.
+ MemoryPool* pool() const { return pool_; }
+
+ protected:
+ CPUMemoryManager(const std::shared_ptr<Device>& device, MemoryPool* pool)
+ : MemoryManager(device), pool_(pool) {}
+
+ static std::shared_ptr<MemoryManager> Make(const std::shared_ptr<Device>& device,
+ MemoryPool* pool = default_memory_pool());
+
+ Result<std::shared_ptr<Buffer>> CopyBufferFrom(
+ const std::shared_ptr<Buffer>& buf,
+ const std::shared_ptr<MemoryManager>& from) override;
+ Result<std::shared_ptr<Buffer>> CopyBufferTo(
+ const std::shared_ptr<Buffer>& buf,
+ const std::shared_ptr<MemoryManager>& to) override;
+ Result<std::shared_ptr<Buffer>> ViewBufferFrom(
+ const std::shared_ptr<Buffer>& buf,
+ const std::shared_ptr<MemoryManager>& from) override;
+ Result<std::shared_ptr<Buffer>> ViewBufferTo(
+ const std::shared_ptr<Buffer>& buf,
+ const std::shared_ptr<MemoryManager>& to) override;
+
+ MemoryPool* pool_;
+
+ friend std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool);
+ friend ARROW_EXPORT std::shared_ptr<MemoryManager> default_cpu_memory_manager();
+};
+
+/// \brief Return the default CPU MemoryManager instance
+///
+/// The returned singleton instance uses the default MemoryPool.
+/// This function is a faster spelling of
+/// `CPUDevice::Instance()->default_memory_manager()`.
+ARROW_EXPORT
+std::shared_ptr<MemoryManager> default_cpu_memory_manager();
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc b/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc
index e579b691023..6181afcc417 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc
@@ -1,169 +1,169 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/extension_type.h"
-
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <utility>
-
-#include "arrow/array/util.h"
-#include "arrow/chunked_array.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-DataTypeLayout ExtensionType::layout() const { return storage_type_->layout(); }
-
-std::string ExtensionType::ToString() const {
- std::stringstream ss;
- ss << "extension<" << this->extension_name() << ">";
- return ss.str();
-}
-
-std::shared_ptr<Array> ExtensionType::WrapArray(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& storage) {
- DCHECK_EQ(type->id(), Type::EXTENSION);
- const auto& ext_type = checked_cast<const ExtensionType&>(*type);
- DCHECK_EQ(storage->type_id(), ext_type.storage_type()->id());
- auto data = storage->data()->Copy();
- data->type = type;
- return ext_type.MakeArray(std::move(data));
-}
-
-std::shared_ptr<ChunkedArray> ExtensionType::WrapArray(
- const std::shared_ptr<DataType>& type, const std::shared_ptr<ChunkedArray>& storage) {
- DCHECK_EQ(type->id(), Type::EXTENSION);
- const auto& ext_type = checked_cast<const ExtensionType&>(*type);
- DCHECK_EQ(storage->type()->id(), ext_type.storage_type()->id());
-
- ArrayVector out_chunks(storage->num_chunks());
- for (int i = 0; i < storage->num_chunks(); i++) {
- auto data = storage->chunk(i)->data()->Copy();
- data->type = type;
- out_chunks[i] = ext_type.MakeArray(std::move(data));
- }
- return std::make_shared<ChunkedArray>(std::move(out_chunks));
-}
-
-ExtensionArray::ExtensionArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
-
-ExtensionArray::ExtensionArray(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& storage) {
- ARROW_CHECK_EQ(type->id(), Type::EXTENSION);
- ARROW_CHECK(
- storage->type()->Equals(*checked_cast<const ExtensionType&>(*type).storage_type()));
- auto data = storage->data()->Copy();
- // XXX This pointer is reverted below in SetData()...
- data->type = type;
- SetData(data);
-}
-
-void ExtensionArray::SetData(const std::shared_ptr<ArrayData>& data) {
- ARROW_CHECK_EQ(data->type->id(), Type::EXTENSION);
- this->Array::SetData(data);
-
- auto storage_data = data->Copy();
- storage_data->type = (static_cast<const ExtensionType&>(*data->type).storage_type());
- storage_ = MakeArray(storage_data);
-}
-
-class ExtensionTypeRegistryImpl : public ExtensionTypeRegistry {
- public:
- ExtensionTypeRegistryImpl() {}
-
- Status RegisterType(std::shared_ptr<ExtensionType> type) override {
- std::lock_guard<std::mutex> lock(lock_);
- std::string type_name = type->extension_name();
- auto it = name_to_type_.find(type_name);
- if (it != name_to_type_.end()) {
- return Status::KeyError("A type extension with name ", type_name,
- " already defined");
- }
- name_to_type_[type_name] = std::move(type);
- return Status::OK();
- }
-
- Status UnregisterType(const std::string& type_name) override {
- std::lock_guard<std::mutex> lock(lock_);
- auto it = name_to_type_.find(type_name);
- if (it == name_to_type_.end()) {
- return Status::KeyError("No type extension with name ", type_name, " found");
- }
- name_to_type_.erase(it);
- return Status::OK();
- }
-
- std::shared_ptr<ExtensionType> GetType(const std::string& type_name) override {
- std::lock_guard<std::mutex> lock(lock_);
- auto it = name_to_type_.find(type_name);
- if (it == name_to_type_.end()) {
- return nullptr;
- } else {
- return it->second;
- }
- return nullptr;
- }
-
- private:
- std::mutex lock_;
- std::unordered_map<std::string, std::shared_ptr<ExtensionType>> name_to_type_;
-};
-
-static std::shared_ptr<ExtensionTypeRegistry> g_registry;
-static std::once_flag registry_initialized;
-
-namespace internal {
-
-static void CreateGlobalRegistry() {
- g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
-}
-
-} // namespace internal
-
-std::shared_ptr<ExtensionTypeRegistry> ExtensionTypeRegistry::GetGlobalRegistry() {
- std::call_once(registry_initialized, internal::CreateGlobalRegistry);
- return g_registry;
-}
-
-Status RegisterExtensionType(std::shared_ptr<ExtensionType> type) {
- auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
- return registry->RegisterType(type);
-}
-
-Status UnregisterExtensionType(const std::string& type_name) {
- auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
- return registry->UnregisterType(type_name);
-}
-
-std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name) {
- auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
- return registry->GetType(type_name);
-}
-
-extern const char kExtensionTypeKeyName[] = "ARROW:extension:name";
-extern const char kExtensionMetadataKeyName[] = "ARROW:extension:metadata";
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension_type.h"
+
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/array/util.h"
+#include "arrow/chunked_array.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+DataTypeLayout ExtensionType::layout() const { return storage_type_->layout(); }
+
+std::string ExtensionType::ToString() const {
+ std::stringstream ss;
+ ss << "extension<" << this->extension_name() << ">";
+ return ss.str();
+}
+
+std::shared_ptr<Array> ExtensionType::WrapArray(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& storage) {
+ DCHECK_EQ(type->id(), Type::EXTENSION);
+ const auto& ext_type = checked_cast<const ExtensionType&>(*type);
+ DCHECK_EQ(storage->type_id(), ext_type.storage_type()->id());
+ auto data = storage->data()->Copy();
+ data->type = type;
+ return ext_type.MakeArray(std::move(data));
+}
+
+std::shared_ptr<ChunkedArray> ExtensionType::WrapArray(
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<ChunkedArray>& storage) {
+ DCHECK_EQ(type->id(), Type::EXTENSION);
+ const auto& ext_type = checked_cast<const ExtensionType&>(*type);
+ DCHECK_EQ(storage->type()->id(), ext_type.storage_type()->id());
+
+ ArrayVector out_chunks(storage->num_chunks());
+ for (int i = 0; i < storage->num_chunks(); i++) {
+ auto data = storage->chunk(i)->data()->Copy();
+ data->type = type;
+ out_chunks[i] = ext_type.MakeArray(std::move(data));
+ }
+ return std::make_shared<ChunkedArray>(std::move(out_chunks));
+}
+
+ExtensionArray::ExtensionArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+ExtensionArray::ExtensionArray(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& storage) {
+ ARROW_CHECK_EQ(type->id(), Type::EXTENSION);
+ ARROW_CHECK(
+ storage->type()->Equals(*checked_cast<const ExtensionType&>(*type).storage_type()));
+ auto data = storage->data()->Copy();
+ // XXX This pointer is reverted below in SetData()...
+ data->type = type;
+ SetData(data);
+}
+
+void ExtensionArray::SetData(const std::shared_ptr<ArrayData>& data) {
+ ARROW_CHECK_EQ(data->type->id(), Type::EXTENSION);
+ this->Array::SetData(data);
+
+ auto storage_data = data->Copy();
+ storage_data->type = (static_cast<const ExtensionType&>(*data->type).storage_type());
+ storage_ = MakeArray(storage_data);
+}
+
+class ExtensionTypeRegistryImpl : public ExtensionTypeRegistry {
+ public:
+ ExtensionTypeRegistryImpl() {}
+
+ Status RegisterType(std::shared_ptr<ExtensionType> type) override {
+ std::lock_guard<std::mutex> lock(lock_);
+ std::string type_name = type->extension_name();
+ auto it = name_to_type_.find(type_name);
+ if (it != name_to_type_.end()) {
+ return Status::KeyError("A type extension with name ", type_name,
+ " already defined");
+ }
+ name_to_type_[type_name] = std::move(type);
+ return Status::OK();
+ }
+
+ Status UnregisterType(const std::string& type_name) override {
+ std::lock_guard<std::mutex> lock(lock_);
+ auto it = name_to_type_.find(type_name);
+ if (it == name_to_type_.end()) {
+ return Status::KeyError("No type extension with name ", type_name, " found");
+ }
+ name_to_type_.erase(it);
+ return Status::OK();
+ }
+
+ std::shared_ptr<ExtensionType> GetType(const std::string& type_name) override {
+ std::lock_guard<std::mutex> lock(lock_);
+ auto it = name_to_type_.find(type_name);
+ if (it == name_to_type_.end()) {
+ return nullptr;
+ } else {
+ return it->second;
+ }
+ return nullptr;
+ }
+
+ private:
+ std::mutex lock_;
+ std::unordered_map<std::string, std::shared_ptr<ExtensionType>> name_to_type_;
+};
+
+static std::shared_ptr<ExtensionTypeRegistry> g_registry;
+static std::once_flag registry_initialized;
+
+namespace internal {
+
+static void CreateGlobalRegistry() {
+ g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
+}
+
+} // namespace internal
+
+std::shared_ptr<ExtensionTypeRegistry> ExtensionTypeRegistry::GetGlobalRegistry() {
+ std::call_once(registry_initialized, internal::CreateGlobalRegistry);
+ return g_registry;
+}
+
+Status RegisterExtensionType(std::shared_ptr<ExtensionType> type) {
+ auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
+ return registry->RegisterType(type);
+}
+
+Status UnregisterExtensionType(const std::string& type_name) {
+ auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
+ return registry->UnregisterType(type_name);
+}
+
+std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name) {
+ auto registry = ExtensionTypeRegistry::GetGlobalRegistry();
+ return registry->GetType(type_name);
+}
+
+extern const char kExtensionTypeKeyName[] = "ARROW:extension:name";
+extern const char kExtensionMetadataKeyName[] = "ARROW:extension:metadata";
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.h b/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.h
index a22d015195d..9286ad1d1df 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.h
@@ -1,161 +1,161 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// User-defined extension types. EXPERIMENTAL in 0.13.0
-/// \since 0.13.0
-
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/data.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief The base class for custom / user-defined types.
-class ARROW_EXPORT ExtensionType : public DataType {
- public:
- static constexpr Type::type type_id = Type::EXTENSION;
-
- static constexpr const char* type_name() { return "extension"; }
-
- /// \brief The type of array used to represent this extension type's data
- std::shared_ptr<DataType> storage_type() const { return storage_type_; }
-
- DataTypeLayout layout() const override;
-
- std::string ToString() const override;
-
- std::string name() const override { return "extension"; }
-
- /// \brief Unique name of extension type used to identify type for
- /// serialization
- /// \return the string name of the extension
- virtual std::string extension_name() const = 0;
-
- /// \brief Determine if two instances of the same extension types are
- /// equal. Invoked from ExtensionType::Equals
- /// \param[in] other the type to compare this type with
- /// \return bool true if type instances are equal
- virtual bool ExtensionEquals(const ExtensionType& other) const = 0;
-
- /// \brief Wrap built-in Array type in a user-defined ExtensionArray instance
- /// \param[in] data the physical storage for the extension type
- virtual std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const = 0;
-
- /// \brief Create an instance of the ExtensionType given the actual storage
- /// type and the serialized representation
- /// \param[in] storage_type the physical storage type of the extension
- /// \param[in] serialized_data the serialized representation produced by
- /// Serialize
- virtual Result<std::shared_ptr<DataType>> Deserialize(
- std::shared_ptr<DataType> storage_type,
- const std::string& serialized_data) const = 0;
-
- /// \brief Create a serialized representation of the extension type's
- /// metadata. The storage type will be handled automatically in IPC code
- /// paths
- /// \return the serialized representation
- virtual std::string Serialize() const = 0;
-
- /// \brief Wrap the given storage array as an extension array
- static std::shared_ptr<Array> WrapArray(const std::shared_ptr<DataType>& ext_type,
- const std::shared_ptr<Array>& storage);
-
- /// \brief Wrap the given chunked storage array as a chunked extension array
- static std::shared_ptr<ChunkedArray> WrapArray(
- const std::shared_ptr<DataType>& ext_type,
- const std::shared_ptr<ChunkedArray>& storage);
-
- protected:
- explicit ExtensionType(std::shared_ptr<DataType> storage_type)
- : DataType(Type::EXTENSION), storage_type_(storage_type) {}
-
- std::shared_ptr<DataType> storage_type_;
-};
-
-/// \brief Base array class for user-defined extension types
-class ARROW_EXPORT ExtensionArray : public Array {
- public:
- /// \brief Construct an ExtensionArray from an ArrayData.
- ///
- /// The ArrayData must have the right ExtensionType.
- explicit ExtensionArray(const std::shared_ptr<ArrayData>& data);
-
- /// \brief Construct an ExtensionArray from a type and the underlying storage.
- ExtensionArray(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Array>& storage);
-
- const ExtensionType* extension_type() const {
- return internal::checked_cast<const ExtensionType*>(data_->type.get());
- }
-
- /// \brief The physical storage for the extension array
- std::shared_ptr<Array> storage() const { return storage_; }
-
- protected:
- void SetData(const std::shared_ptr<ArrayData>& data);
- std::shared_ptr<Array> storage_;
-};
-
-class ARROW_EXPORT ExtensionTypeRegistry {
- public:
- /// \brief Provide access to the global registry to allow code to control for
- /// race conditions in registry teardown when some types need to be
- /// unregistered and destroyed first
- static std::shared_ptr<ExtensionTypeRegistry> GetGlobalRegistry();
-
- virtual ~ExtensionTypeRegistry() = default;
-
- virtual Status RegisterType(std::shared_ptr<ExtensionType> type) = 0;
- virtual Status UnregisterType(const std::string& type_name) = 0;
- virtual std::shared_ptr<ExtensionType> GetType(const std::string& type_name) = 0;
-};
-
-/// \brief Register an extension type globally. The name returned by the type's
-/// extension_name() method should be unique. This method is thread-safe
-/// \param[in] type an instance of the extension type
-/// \return Status
-ARROW_EXPORT
-Status RegisterExtensionType(std::shared_ptr<ExtensionType> type);
-
-/// \brief Delete an extension type from the global registry. This method is
-/// thread-safe
-/// \param[in] type_name the unique name of a registered extension type
-/// \return Status error if the type name is unknown
-ARROW_EXPORT
-Status UnregisterExtensionType(const std::string& type_name);
-
-/// \brief Retrieve an extension type from the global registry. Returns nullptr
-/// if not found. This method is thread-safe
-/// \return the globally-registered extension type
-ARROW_EXPORT
-std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name);
-
-ARROW_EXPORT extern const char kExtensionTypeKeyName[];
-ARROW_EXPORT extern const char kExtensionMetadataKeyName[];
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// User-defined extension types. EXPERIMENTAL in 0.13.0
+/// \since 0.13.0
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief The base class for custom / user-defined types.
+class ARROW_EXPORT ExtensionType : public DataType {
+ public:
+ static constexpr Type::type type_id = Type::EXTENSION;
+
+ static constexpr const char* type_name() { return "extension"; }
+
+ /// \brief The type of array used to represent this extension type's data
+ std::shared_ptr<DataType> storage_type() const { return storage_type_; }
+
+ DataTypeLayout layout() const override;
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "extension"; }
+
+ /// \brief Unique name of extension type used to identify type for
+ /// serialization
+ /// \return the string name of the extension
+ virtual std::string extension_name() const = 0;
+
+ /// \brief Determine if two instances of the same extension types are
+ /// equal. Invoked from ExtensionType::Equals
+ /// \param[in] other the type to compare this type with
+ /// \return bool true if type instances are equal
+ virtual bool ExtensionEquals(const ExtensionType& other) const = 0;
+
+ /// \brief Wrap built-in Array type in a user-defined ExtensionArray instance
+ /// \param[in] data the physical storage for the extension type
+ virtual std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const = 0;
+
+ /// \brief Create an instance of the ExtensionType given the actual storage
+ /// type and the serialized representation
+ /// \param[in] storage_type the physical storage type of the extension
+ /// \param[in] serialized_data the serialized representation produced by
+ /// Serialize
+ virtual Result<std::shared_ptr<DataType>> Deserialize(
+ std::shared_ptr<DataType> storage_type,
+ const std::string& serialized_data) const = 0;
+
+ /// \brief Create a serialized representation of the extension type's
+ /// metadata. The storage type will be handled automatically in IPC code
+ /// paths
+ /// \return the serialized representation
+ virtual std::string Serialize() const = 0;
+
+ /// \brief Wrap the given storage array as an extension array
+ static std::shared_ptr<Array> WrapArray(const std::shared_ptr<DataType>& ext_type,
+ const std::shared_ptr<Array>& storage);
+
+ /// \brief Wrap the given chunked storage array as a chunked extension array
+ static std::shared_ptr<ChunkedArray> WrapArray(
+ const std::shared_ptr<DataType>& ext_type,
+ const std::shared_ptr<ChunkedArray>& storage);
+
+ protected:
+ explicit ExtensionType(std::shared_ptr<DataType> storage_type)
+ : DataType(Type::EXTENSION), storage_type_(storage_type) {}
+
+ std::shared_ptr<DataType> storage_type_;
+};
+
+/// \brief Base array class for user-defined extension types
+class ARROW_EXPORT ExtensionArray : public Array {
+ public:
+ /// \brief Construct an ExtensionArray from an ArrayData.
+ ///
+ /// The ArrayData must have the right ExtensionType.
+ explicit ExtensionArray(const std::shared_ptr<ArrayData>& data);
+
+ /// \brief Construct an ExtensionArray from a type and the underlying storage.
+ ExtensionArray(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Array>& storage);
+
+ const ExtensionType* extension_type() const {
+ return internal::checked_cast<const ExtensionType*>(data_->type.get());
+ }
+
+ /// \brief The physical storage for the extension array
+ std::shared_ptr<Array> storage() const { return storage_; }
+
+ protected:
+ void SetData(const std::shared_ptr<ArrayData>& data);
+ std::shared_ptr<Array> storage_;
+};
+
+class ARROW_EXPORT ExtensionTypeRegistry {
+ public:
+ /// \brief Provide access to the global registry to allow code to control for
+ /// race conditions in registry teardown when some types need to be
+ /// unregistered and destroyed first
+ static std::shared_ptr<ExtensionTypeRegistry> GetGlobalRegistry();
+
+ virtual ~ExtensionTypeRegistry() = default;
+
+ virtual Status RegisterType(std::shared_ptr<ExtensionType> type) = 0;
+ virtual Status UnregisterType(const std::string& type_name) = 0;
+ virtual std::shared_ptr<ExtensionType> GetType(const std::string& type_name) = 0;
+};
+
+/// \brief Register an extension type globally. The name returned by the type's
+/// extension_name() method should be unique. This method is thread-safe
+/// \param[in] type an instance of the extension type
+/// \return Status
+ARROW_EXPORT
+Status RegisterExtensionType(std::shared_ptr<ExtensionType> type);
+
+/// \brief Delete an extension type from the global registry. This method is
+/// thread-safe
+/// \param[in] type_name the unique name of a registered extension type
+/// \return Status error if the type name is unknown
+ARROW_EXPORT
+Status UnregisterExtensionType(const std::string& type_name);
+
+/// \brief Retrieve an extension type from the global registry. Returns nullptr
+/// if not found. This method is thread-safe
+/// \return the globally-registered extension type
+ARROW_EXPORT
+std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name);
+
+ARROW_EXPORT extern const char kExtensionTypeKeyName[];
+ARROW_EXPORT extern const char kExtensionMetadataKeyName[];
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
index 7804c130ca1..5afe74b8730 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
@@ -1,481 +1,481 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/buffered.h"
-
-#include <algorithm>
-#include <cstring>
-#include <memory>
-#include <mutex>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/string_view.h"
-
-namespace arrow {
-namespace io {
-
-// ----------------------------------------------------------------------
-// BufferedOutputStream implementation
-
-class BufferedBase {
- public:
- explicit BufferedBase(MemoryPool* pool)
- : pool_(pool),
- is_open_(true),
- buffer_data_(nullptr),
- buffer_pos_(0),
- buffer_size_(0),
- raw_pos_(-1) {}
-
- bool closed() const {
- std::lock_guard<std::mutex> guard(lock_);
- return !is_open_;
- }
-
- Status ResetBuffer() {
- if (!buffer_) {
- // On first invocation, or if the buffer has been released, we allocate a
- // new buffer
- ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(buffer_size_, pool_));
- } else if (buffer_->size() != buffer_size_) {
- RETURN_NOT_OK(buffer_->Resize(buffer_size_));
- }
- buffer_data_ = buffer_->mutable_data();
- return Status::OK();
- }
-
- Status ResizeBuffer(int64_t new_buffer_size) {
- buffer_size_ = new_buffer_size;
- return ResetBuffer();
- }
-
- void AppendToBuffer(const void* data, int64_t nbytes) {
- DCHECK_LE(buffer_pos_ + nbytes, buffer_size_);
- std::memcpy(buffer_data_ + buffer_pos_, data, nbytes);
- buffer_pos_ += nbytes;
- }
-
- int64_t buffer_size() const { return buffer_size_; }
-
- int64_t buffer_pos() const { return buffer_pos_; }
-
- protected:
- MemoryPool* pool_;
- bool is_open_;
-
- std::shared_ptr<ResizableBuffer> buffer_;
- uint8_t* buffer_data_;
- int64_t buffer_pos_;
- int64_t buffer_size_;
-
- mutable int64_t raw_pos_;
- mutable std::mutex lock_;
-};
-
-class BufferedOutputStream::Impl : public BufferedBase {
- public:
- explicit Impl(std::shared_ptr<OutputStream> raw, MemoryPool* pool)
- : BufferedBase(pool), raw_(std::move(raw)) {}
-
- Status Close() {
- std::lock_guard<std::mutex> guard(lock_);
- if (is_open_) {
- Status st = FlushUnlocked();
- is_open_ = false;
- RETURN_NOT_OK(raw_->Close());
- return st;
- }
- return Status::OK();
- }
-
- Status Abort() {
- std::lock_guard<std::mutex> guard(lock_);
- if (is_open_) {
- is_open_ = false;
- return raw_->Abort();
- }
- return Status::OK();
- }
-
- Result<int64_t> Tell() const {
- std::lock_guard<std::mutex> guard(lock_);
- if (raw_pos_ == -1) {
- ARROW_ASSIGN_OR_RAISE(raw_pos_, raw_->Tell());
- DCHECK_GE(raw_pos_, 0);
- }
- return raw_pos_ + buffer_pos_;
- }
-
- Status Write(const void* data, int64_t nbytes) { return DoWrite(data, nbytes); }
-
- Status Write(const std::shared_ptr<Buffer>& buffer) {
- return DoWrite(buffer->data(), buffer->size(), buffer);
- }
-
- Status DoWrite(const void* data, int64_t nbytes,
- const std::shared_ptr<Buffer>& buffer = nullptr) {
- std::lock_guard<std::mutex> guard(lock_);
- if (nbytes < 0) {
- return Status::Invalid("write count should be >= 0");
- }
- if (nbytes == 0) {
- return Status::OK();
- }
- if (nbytes + buffer_pos_ >= buffer_size_) {
- RETURN_NOT_OK(FlushUnlocked());
- DCHECK_EQ(buffer_pos_, 0);
- if (nbytes >= buffer_size_) {
- // Direct write
- if (buffer) {
- return raw_->Write(buffer);
- } else {
- return raw_->Write(data, nbytes);
- }
- }
- }
- AppendToBuffer(data, nbytes);
- return Status::OK();
- }
-
- Status FlushUnlocked() {
- if (buffer_pos_ > 0) {
- // Invalidate cached raw pos
- raw_pos_ = -1;
- RETURN_NOT_OK(raw_->Write(buffer_data_, buffer_pos_));
- buffer_pos_ = 0;
- }
- return Status::OK();
- }
-
- Status Flush() {
- std::lock_guard<std::mutex> guard(lock_);
- return FlushUnlocked();
- }
-
- Result<std::shared_ptr<OutputStream>> Detach() {
- std::lock_guard<std::mutex> guard(lock_);
- RETURN_NOT_OK(FlushUnlocked());
- is_open_ = false;
- return std::move(raw_);
- }
-
- Status SetBufferSize(int64_t new_buffer_size) {
- std::lock_guard<std::mutex> guard(lock_);
- if (new_buffer_size <= 0) {
- return Status::Invalid("Buffer size should be positive");
- }
- if (buffer_pos_ >= new_buffer_size) {
- // If the buffer is shrinking, first flush to the raw OutputStream
- RETURN_NOT_OK(FlushUnlocked());
- }
- return ResizeBuffer(new_buffer_size);
- }
-
- std::shared_ptr<OutputStream> raw() const { return raw_; }
-
- private:
- std::shared_ptr<OutputStream> raw_;
-};
-
-BufferedOutputStream::BufferedOutputStream(std::shared_ptr<OutputStream> raw,
- MemoryPool* pool) {
- impl_.reset(new Impl(std::move(raw), pool));
-}
-
-Result<std::shared_ptr<BufferedOutputStream>> BufferedOutputStream::Create(
- int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw) {
- auto result = std::shared_ptr<BufferedOutputStream>(
- new BufferedOutputStream(std::move(raw), pool));
- RETURN_NOT_OK(result->SetBufferSize(buffer_size));
- return result;
-}
-
-BufferedOutputStream::~BufferedOutputStream() { internal::CloseFromDestructor(this); }
-
-Status BufferedOutputStream::SetBufferSize(int64_t new_buffer_size) {
- return impl_->SetBufferSize(new_buffer_size);
-}
-
-int64_t BufferedOutputStream::buffer_size() const { return impl_->buffer_size(); }
-
-int64_t BufferedOutputStream::bytes_buffered() const { return impl_->buffer_pos(); }
-
-Result<std::shared_ptr<OutputStream>> BufferedOutputStream::Detach() {
- return impl_->Detach();
-}
-
-Status BufferedOutputStream::Close() { return impl_->Close(); }
-
-Status BufferedOutputStream::Abort() { return impl_->Abort(); }
-
-bool BufferedOutputStream::closed() const { return impl_->closed(); }
-
-Result<int64_t> BufferedOutputStream::Tell() const { return impl_->Tell(); }
-
-Status BufferedOutputStream::Write(const void* data, int64_t nbytes) {
- return impl_->Write(data, nbytes);
-}
-
-Status BufferedOutputStream::Write(const std::shared_ptr<Buffer>& data) {
- return impl_->Write(data);
-}
-
-Status BufferedOutputStream::Flush() { return impl_->Flush(); }
-
-std::shared_ptr<OutputStream> BufferedOutputStream::raw() const { return impl_->raw(); }
-
-// ----------------------------------------------------------------------
-// BufferedInputStream implementation
-
-class BufferedInputStream::Impl : public BufferedBase {
- public:
- Impl(std::shared_ptr<InputStream> raw, MemoryPool* pool, int64_t raw_total_bytes_bound)
- : BufferedBase(pool),
- raw_(std::move(raw)),
- raw_read_total_(0),
- raw_read_bound_(raw_total_bytes_bound),
- bytes_buffered_(0) {}
-
- Status Close() {
- if (is_open_) {
- is_open_ = false;
- return raw_->Close();
- }
- return Status::OK();
- }
-
- Status Abort() {
- if (is_open_) {
- is_open_ = false;
- return raw_->Abort();
- }
- return Status::OK();
- }
-
- Result<int64_t> Tell() const {
- if (raw_pos_ == -1) {
- ARROW_ASSIGN_OR_RAISE(raw_pos_, raw_->Tell());
- DCHECK_GE(raw_pos_, 0);
- }
- // Shift by bytes_buffered to return semantic stream position
- return raw_pos_ - bytes_buffered_;
- }
-
- Status SetBufferSize(int64_t new_buffer_size) {
- if (new_buffer_size <= 0) {
- return Status::Invalid("Buffer size should be positive");
- }
- if ((buffer_pos_ + bytes_buffered_) >= new_buffer_size) {
- return Status::Invalid("Cannot shrink read buffer if buffered data remains");
- }
- return ResizeBuffer(new_buffer_size);
- }
-
- Result<util::string_view> Peek(int64_t nbytes) {
- if (raw_read_bound_ >= 0) {
- // Do not try to peek more than the total remaining number of bytes.
- nbytes = std::min(nbytes, bytes_buffered_ + (raw_read_bound_ - raw_read_total_));
- }
-
- if (bytes_buffered_ == 0 && nbytes < buffer_size_) {
- // Pre-buffer for small reads
- RETURN_NOT_OK(BufferIfNeeded());
- }
-
- // Increase the buffer size if needed.
- if (nbytes > buffer_->size() - buffer_pos_) {
- RETURN_NOT_OK(SetBufferSize(nbytes + buffer_pos_));
- DCHECK(buffer_->size() - buffer_pos_ >= nbytes);
- }
- // Read more data when buffer has insufficient left
- if (nbytes > bytes_buffered_) {
- int64_t additional_bytes_to_read = nbytes - bytes_buffered_;
- if (raw_read_bound_ >= 0) {
- additional_bytes_to_read =
- std::min(additional_bytes_to_read, raw_read_bound_ - raw_read_total_);
- }
- ARROW_ASSIGN_OR_RAISE(
- int64_t bytes_read,
- raw_->Read(additional_bytes_to_read,
- buffer_->mutable_data() + buffer_pos_ + bytes_buffered_));
- bytes_buffered_ += bytes_read;
- raw_read_total_ += bytes_read;
- nbytes = bytes_buffered_;
- }
- DCHECK(nbytes <= bytes_buffered_); // Enough bytes available
- return util::string_view(reinterpret_cast<const char*>(buffer_data_ + buffer_pos_),
- static_cast<size_t>(nbytes));
- }
-
- int64_t bytes_buffered() const { return bytes_buffered_; }
-
- int64_t buffer_size() const { return buffer_size_; }
-
- std::shared_ptr<InputStream> Detach() {
- is_open_ = false;
- return std::move(raw_);
- }
-
- void RewindBuffer() {
- // Invalidate buffered data, as with a Seek or large Read
- buffer_pos_ = bytes_buffered_ = 0;
- }
-
- Status BufferIfNeeded() {
- if (bytes_buffered_ == 0) {
- // Fill buffer
- if (!buffer_) {
- RETURN_NOT_OK(ResetBuffer());
- }
-
- int64_t bytes_to_buffer = buffer_size_;
- if (raw_read_bound_ >= 0) {
- bytes_to_buffer = std::min(buffer_size_, raw_read_bound_ - raw_read_total_);
- }
- ARROW_ASSIGN_OR_RAISE(bytes_buffered_, raw_->Read(bytes_to_buffer, buffer_data_));
- buffer_pos_ = 0;
- raw_read_total_ += bytes_buffered_;
-
- // Do not make assumptions about the raw stream position
- raw_pos_ = -1;
- }
- return Status::OK();
- }
-
- void ConsumeBuffer(int64_t nbytes) {
- buffer_pos_ += nbytes;
- bytes_buffered_ -= nbytes;
- }
-
- Result<int64_t> Read(int64_t nbytes, void* out) {
- if (ARROW_PREDICT_FALSE(nbytes < 0)) {
- return Status::Invalid("Bytes to read must be positive. Received:", nbytes);
- }
-
- if (nbytes < buffer_size_) {
- // Pre-buffer for small reads
- RETURN_NOT_OK(BufferIfNeeded());
- }
-
- if (nbytes > bytes_buffered_) {
- // Copy buffered bytes into out, then read rest
- memcpy(out, buffer_data_ + buffer_pos_, bytes_buffered_);
-
- int64_t bytes_to_read = nbytes - bytes_buffered_;
- if (raw_read_bound_ >= 0) {
- bytes_to_read = std::min(bytes_to_read, raw_read_bound_ - raw_read_total_);
- }
- ARROW_ASSIGN_OR_RAISE(
- int64_t bytes_read,
- raw_->Read(bytes_to_read, reinterpret_cast<uint8_t*>(out) + bytes_buffered_));
- raw_read_total_ += bytes_read;
-
- // Do not make assumptions about the raw stream position
- raw_pos_ = -1;
- bytes_read += bytes_buffered_;
- RewindBuffer();
- return bytes_read;
- } else {
- memcpy(out, buffer_data_ + buffer_pos_, nbytes);
- ConsumeBuffer(nbytes);
- return nbytes;
- }
- }
-
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
-
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
-
- if (bytes_read < nbytes) {
- // Change size but do not reallocate internal capacity
- RETURN_NOT_OK(buffer->Resize(bytes_read, false /* shrink_to_fit */));
- buffer->ZeroPadding();
- }
- return std::move(buffer);
- }
-
- // For providing access to the raw file handles
- std::shared_ptr<InputStream> raw() const { return raw_; }
-
- private:
- std::shared_ptr<InputStream> raw_;
- int64_t raw_read_total_;
- int64_t raw_read_bound_;
-
- // Number of remaining bytes in the buffer, to be reduced on each read from
- // the buffer
- int64_t bytes_buffered_;
-};
-
-BufferedInputStream::BufferedInputStream(std::shared_ptr<InputStream> raw,
- MemoryPool* pool,
- int64_t raw_total_bytes_bound) {
- impl_.reset(new Impl(std::move(raw), pool, raw_total_bytes_bound));
-}
-
-BufferedInputStream::~BufferedInputStream() { internal::CloseFromDestructor(this); }
-
-Result<std::shared_ptr<BufferedInputStream>> BufferedInputStream::Create(
- int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
- int64_t raw_total_bytes_bound) {
- auto result = std::shared_ptr<BufferedInputStream>(
- new BufferedInputStream(std::move(raw), pool, raw_total_bytes_bound));
- RETURN_NOT_OK(result->SetBufferSize(buffer_size));
- return result;
-}
-
-Status BufferedInputStream::DoClose() { return impl_->Close(); }
-
-Status BufferedInputStream::DoAbort() { return impl_->Abort(); }
-
-bool BufferedInputStream::closed() const { return impl_->closed(); }
-
-std::shared_ptr<InputStream> BufferedInputStream::Detach() { return impl_->Detach(); }
-
-std::shared_ptr<InputStream> BufferedInputStream::raw() const { return impl_->raw(); }
-
-Result<int64_t> BufferedInputStream::DoTell() const { return impl_->Tell(); }
-
-Result<util::string_view> BufferedInputStream::DoPeek(int64_t nbytes) {
- return impl_->Peek(nbytes);
-}
-
-Status BufferedInputStream::SetBufferSize(int64_t new_buffer_size) {
- return impl_->SetBufferSize(new_buffer_size);
-}
-
-int64_t BufferedInputStream::bytes_buffered() const { return impl_->bytes_buffered(); }
-
-int64_t BufferedInputStream::buffer_size() const { return impl_->buffer_size(); }
-
-Result<int64_t> BufferedInputStream::DoRead(int64_t nbytes, void* out) {
- return impl_->Read(nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> BufferedInputStream::DoRead(int64_t nbytes) {
- return impl_->Read(nbytes);
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/buffered.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace io {
+
+// ----------------------------------------------------------------------
+// BufferedOutputStream implementation
+
+class BufferedBase {
+ public:
+ explicit BufferedBase(MemoryPool* pool)
+ : pool_(pool),
+ is_open_(true),
+ buffer_data_(nullptr),
+ buffer_pos_(0),
+ buffer_size_(0),
+ raw_pos_(-1) {}
+
+ bool closed() const {
+ std::lock_guard<std::mutex> guard(lock_);
+ return !is_open_;
+ }
+
+ Status ResetBuffer() {
+ if (!buffer_) {
+ // On first invocation, or if the buffer has been released, we allocate a
+ // new buffer
+ ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(buffer_size_, pool_));
+ } else if (buffer_->size() != buffer_size_) {
+ RETURN_NOT_OK(buffer_->Resize(buffer_size_));
+ }
+ buffer_data_ = buffer_->mutable_data();
+ return Status::OK();
+ }
+
+ Status ResizeBuffer(int64_t new_buffer_size) {
+ buffer_size_ = new_buffer_size;
+ return ResetBuffer();
+ }
+
+ void AppendToBuffer(const void* data, int64_t nbytes) {
+ DCHECK_LE(buffer_pos_ + nbytes, buffer_size_);
+ std::memcpy(buffer_data_ + buffer_pos_, data, nbytes);
+ buffer_pos_ += nbytes;
+ }
+
+ int64_t buffer_size() const { return buffer_size_; }
+
+ int64_t buffer_pos() const { return buffer_pos_; }
+
+ protected:
+ MemoryPool* pool_;
+ bool is_open_;
+
+ std::shared_ptr<ResizableBuffer> buffer_;
+ uint8_t* buffer_data_;
+ int64_t buffer_pos_;
+ int64_t buffer_size_;
+
+ mutable int64_t raw_pos_;
+ mutable std::mutex lock_;
+};
+
+class BufferedOutputStream::Impl : public BufferedBase {
+ public:
+ explicit Impl(std::shared_ptr<OutputStream> raw, MemoryPool* pool)
+ : BufferedBase(pool), raw_(std::move(raw)) {}
+
+ Status Close() {
+ std::lock_guard<std::mutex> guard(lock_);
+ if (is_open_) {
+ Status st = FlushUnlocked();
+ is_open_ = false;
+ RETURN_NOT_OK(raw_->Close());
+ return st;
+ }
+ return Status::OK();
+ }
+
+ Status Abort() {
+ std::lock_guard<std::mutex> guard(lock_);
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Abort();
+ }
+ return Status::OK();
+ }
+
+ Result<int64_t> Tell() const {
+ std::lock_guard<std::mutex> guard(lock_);
+ if (raw_pos_ == -1) {
+ ARROW_ASSIGN_OR_RAISE(raw_pos_, raw_->Tell());
+ DCHECK_GE(raw_pos_, 0);
+ }
+ return raw_pos_ + buffer_pos_;
+ }
+
+ Status Write(const void* data, int64_t nbytes) { return DoWrite(data, nbytes); }
+
+ Status Write(const std::shared_ptr<Buffer>& buffer) {
+ return DoWrite(buffer->data(), buffer->size(), buffer);
+ }
+
+ Status DoWrite(const void* data, int64_t nbytes,
+ const std::shared_ptr<Buffer>& buffer = nullptr) {
+ std::lock_guard<std::mutex> guard(lock_);
+ if (nbytes < 0) {
+ return Status::Invalid("write count should be >= 0");
+ }
+ if (nbytes == 0) {
+ return Status::OK();
+ }
+ if (nbytes + buffer_pos_ >= buffer_size_) {
+ RETURN_NOT_OK(FlushUnlocked());
+ DCHECK_EQ(buffer_pos_, 0);
+ if (nbytes >= buffer_size_) {
+ // Direct write
+ if (buffer) {
+ return raw_->Write(buffer);
+ } else {
+ return raw_->Write(data, nbytes);
+ }
+ }
+ }
+ AppendToBuffer(data, nbytes);
+ return Status::OK();
+ }
+
+ Status FlushUnlocked() {
+ if (buffer_pos_ > 0) {
+ // Invalidate cached raw pos
+ raw_pos_ = -1;
+ RETURN_NOT_OK(raw_->Write(buffer_data_, buffer_pos_));
+ buffer_pos_ = 0;
+ }
+ return Status::OK();
+ }
+
+ Status Flush() {
+ std::lock_guard<std::mutex> guard(lock_);
+ return FlushUnlocked();
+ }
+
+ Result<std::shared_ptr<OutputStream>> Detach() {
+ std::lock_guard<std::mutex> guard(lock_);
+ RETURN_NOT_OK(FlushUnlocked());
+ is_open_ = false;
+ return std::move(raw_);
+ }
+
+ Status SetBufferSize(int64_t new_buffer_size) {
+ std::lock_guard<std::mutex> guard(lock_);
+ if (new_buffer_size <= 0) {
+ return Status::Invalid("Buffer size should be positive");
+ }
+ if (buffer_pos_ >= new_buffer_size) {
+ // If the buffer is shrinking, first flush to the raw OutputStream
+ RETURN_NOT_OK(FlushUnlocked());
+ }
+ return ResizeBuffer(new_buffer_size);
+ }
+
+ std::shared_ptr<OutputStream> raw() const { return raw_; }
+
+ private:
+ std::shared_ptr<OutputStream> raw_;
+};
+
+BufferedOutputStream::BufferedOutputStream(std::shared_ptr<OutputStream> raw,
+ MemoryPool* pool) {
+ impl_.reset(new Impl(std::move(raw), pool));
+}
+
+Result<std::shared_ptr<BufferedOutputStream>> BufferedOutputStream::Create(
+ int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw) {
+ auto result = std::shared_ptr<BufferedOutputStream>(
+ new BufferedOutputStream(std::move(raw), pool));
+ RETURN_NOT_OK(result->SetBufferSize(buffer_size));
+ return result;
+}
+
+BufferedOutputStream::~BufferedOutputStream() { internal::CloseFromDestructor(this); }
+
+Status BufferedOutputStream::SetBufferSize(int64_t new_buffer_size) {
+ return impl_->SetBufferSize(new_buffer_size);
+}
+
+int64_t BufferedOutputStream::buffer_size() const { return impl_->buffer_size(); }
+
+int64_t BufferedOutputStream::bytes_buffered() const { return impl_->buffer_pos(); }
+
+Result<std::shared_ptr<OutputStream>> BufferedOutputStream::Detach() {
+ return impl_->Detach();
+}
+
+Status BufferedOutputStream::Close() { return impl_->Close(); }
+
+Status BufferedOutputStream::Abort() { return impl_->Abort(); }
+
+bool BufferedOutputStream::closed() const { return impl_->closed(); }
+
+Result<int64_t> BufferedOutputStream::Tell() const { return impl_->Tell(); }
+
+Status BufferedOutputStream::Write(const void* data, int64_t nbytes) {
+ return impl_->Write(data, nbytes);
+}
+
+Status BufferedOutputStream::Write(const std::shared_ptr<Buffer>& data) {
+ return impl_->Write(data);
+}
+
+Status BufferedOutputStream::Flush() { return impl_->Flush(); }
+
+std::shared_ptr<OutputStream> BufferedOutputStream::raw() const { return impl_->raw(); }
+
+// ----------------------------------------------------------------------
+// BufferedInputStream implementation
+
+class BufferedInputStream::Impl : public BufferedBase {
+ public:
+ Impl(std::shared_ptr<InputStream> raw, MemoryPool* pool, int64_t raw_total_bytes_bound)
+ : BufferedBase(pool),
+ raw_(std::move(raw)),
+ raw_read_total_(0),
+ raw_read_bound_(raw_total_bytes_bound),
+ bytes_buffered_(0) {}
+
+ Status Close() {
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Close();
+ }
+ return Status::OK();
+ }
+
+ Status Abort() {
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Abort();
+ }
+ return Status::OK();
+ }
+
+ Result<int64_t> Tell() const {
+ if (raw_pos_ == -1) {
+ ARROW_ASSIGN_OR_RAISE(raw_pos_, raw_->Tell());
+ DCHECK_GE(raw_pos_, 0);
+ }
+ // Shift by bytes_buffered to return semantic stream position
+ return raw_pos_ - bytes_buffered_;
+ }
+
+ Status SetBufferSize(int64_t new_buffer_size) {
+ if (new_buffer_size <= 0) {
+ return Status::Invalid("Buffer size should be positive");
+ }
+ if ((buffer_pos_ + bytes_buffered_) >= new_buffer_size) {
+ return Status::Invalid("Cannot shrink read buffer if buffered data remains");
+ }
+ return ResizeBuffer(new_buffer_size);
+ }
+
+ Result<util::string_view> Peek(int64_t nbytes) {
+ if (raw_read_bound_ >= 0) {
+ // Do not try to peek more than the total remaining number of bytes.
+ nbytes = std::min(nbytes, bytes_buffered_ + (raw_read_bound_ - raw_read_total_));
+ }
+
+ if (bytes_buffered_ == 0 && nbytes < buffer_size_) {
+ // Pre-buffer for small reads
+ RETURN_NOT_OK(BufferIfNeeded());
+ }
+
+ // Increase the buffer size if needed.
+ if (nbytes > buffer_->size() - buffer_pos_) {
+ RETURN_NOT_OK(SetBufferSize(nbytes + buffer_pos_));
+ DCHECK(buffer_->size() - buffer_pos_ >= nbytes);
+ }
+ // Read more data when buffer has insufficient left
+ if (nbytes > bytes_buffered_) {
+ int64_t additional_bytes_to_read = nbytes - bytes_buffered_;
+ if (raw_read_bound_ >= 0) {
+ additional_bytes_to_read =
+ std::min(additional_bytes_to_read, raw_read_bound_ - raw_read_total_);
+ }
+ ARROW_ASSIGN_OR_RAISE(
+ int64_t bytes_read,
+ raw_->Read(additional_bytes_to_read,
+ buffer_->mutable_data() + buffer_pos_ + bytes_buffered_));
+ bytes_buffered_ += bytes_read;
+ raw_read_total_ += bytes_read;
+ nbytes = bytes_buffered_;
+ }
+ DCHECK(nbytes <= bytes_buffered_); // Enough bytes available
+ return util::string_view(reinterpret_cast<const char*>(buffer_data_ + buffer_pos_),
+ static_cast<size_t>(nbytes));
+ }
+
+ int64_t bytes_buffered() const { return bytes_buffered_; }
+
+ int64_t buffer_size() const { return buffer_size_; }
+
+ std::shared_ptr<InputStream> Detach() {
+ is_open_ = false;
+ return std::move(raw_);
+ }
+
+ void RewindBuffer() {
+ // Invalidate buffered data, as with a Seek or large Read
+ buffer_pos_ = bytes_buffered_ = 0;
+ }
+
+ Status BufferIfNeeded() {
+ if (bytes_buffered_ == 0) {
+ // Fill buffer
+ if (!buffer_) {
+ RETURN_NOT_OK(ResetBuffer());
+ }
+
+ int64_t bytes_to_buffer = buffer_size_;
+ if (raw_read_bound_ >= 0) {
+ bytes_to_buffer = std::min(buffer_size_, raw_read_bound_ - raw_read_total_);
+ }
+ ARROW_ASSIGN_OR_RAISE(bytes_buffered_, raw_->Read(bytes_to_buffer, buffer_data_));
+ buffer_pos_ = 0;
+ raw_read_total_ += bytes_buffered_;
+
+ // Do not make assumptions about the raw stream position
+ raw_pos_ = -1;
+ }
+ return Status::OK();
+ }
+
+ void ConsumeBuffer(int64_t nbytes) {
+ buffer_pos_ += nbytes;
+ bytes_buffered_ -= nbytes;
+ }
+
+ Result<int64_t> Read(int64_t nbytes, void* out) {
+ if (ARROW_PREDICT_FALSE(nbytes < 0)) {
+ return Status::Invalid("Bytes to read must be positive. Received:", nbytes);
+ }
+
+ if (nbytes < buffer_size_) {
+ // Pre-buffer for small reads
+ RETURN_NOT_OK(BufferIfNeeded());
+ }
+
+ if (nbytes > bytes_buffered_) {
+ // Copy buffered bytes into out, then read rest
+ memcpy(out, buffer_data_ + buffer_pos_, bytes_buffered_);
+
+ int64_t bytes_to_read = nbytes - bytes_buffered_;
+ if (raw_read_bound_ >= 0) {
+ bytes_to_read = std::min(bytes_to_read, raw_read_bound_ - raw_read_total_);
+ }
+ ARROW_ASSIGN_OR_RAISE(
+ int64_t bytes_read,
+ raw_->Read(bytes_to_read, reinterpret_cast<uint8_t*>(out) + bytes_buffered_));
+ raw_read_total_ += bytes_read;
+
+ // Do not make assumptions about the raw stream position
+ raw_pos_ = -1;
+ bytes_read += bytes_buffered_;
+ RewindBuffer();
+ return bytes_read;
+ } else {
+ memcpy(out, buffer_data_ + buffer_pos_, nbytes);
+ ConsumeBuffer(nbytes);
+ return nbytes;
+ }
+ }
+
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
+
+ if (bytes_read < nbytes) {
+ // Change size but do not reallocate internal capacity
+ RETURN_NOT_OK(buffer->Resize(bytes_read, false /* shrink_to_fit */));
+ buffer->ZeroPadding();
+ }
+ return std::move(buffer);
+ }
+
+ // For providing access to the raw file handles
+ std::shared_ptr<InputStream> raw() const { return raw_; }
+
+ private:
+ std::shared_ptr<InputStream> raw_;
+ int64_t raw_read_total_;
+ int64_t raw_read_bound_;
+
+ // Number of remaining bytes in the buffer, to be reduced on each read from
+ // the buffer
+ int64_t bytes_buffered_;
+};
+
+BufferedInputStream::BufferedInputStream(std::shared_ptr<InputStream> raw,
+ MemoryPool* pool,
+ int64_t raw_total_bytes_bound) {
+ impl_.reset(new Impl(std::move(raw), pool, raw_total_bytes_bound));
+}
+
+BufferedInputStream::~BufferedInputStream() { internal::CloseFromDestructor(this); }
+
+Result<std::shared_ptr<BufferedInputStream>> BufferedInputStream::Create(
+ int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
+ int64_t raw_total_bytes_bound) {
+ auto result = std::shared_ptr<BufferedInputStream>(
+ new BufferedInputStream(std::move(raw), pool, raw_total_bytes_bound));
+ RETURN_NOT_OK(result->SetBufferSize(buffer_size));
+ return result;
+}
+
+Status BufferedInputStream::DoClose() { return impl_->Close(); }
+
+Status BufferedInputStream::DoAbort() { return impl_->Abort(); }
+
+bool BufferedInputStream::closed() const { return impl_->closed(); }
+
+std::shared_ptr<InputStream> BufferedInputStream::Detach() { return impl_->Detach(); }
+
+std::shared_ptr<InputStream> BufferedInputStream::raw() const { return impl_->raw(); }
+
+Result<int64_t> BufferedInputStream::DoTell() const { return impl_->Tell(); }
+
+Result<util::string_view> BufferedInputStream::DoPeek(int64_t nbytes) {
+ return impl_->Peek(nbytes);
+}
+
+Status BufferedInputStream::SetBufferSize(int64_t new_buffer_size) {
+ return impl_->SetBufferSize(new_buffer_size);
+}
+
+int64_t BufferedInputStream::bytes_buffered() const { return impl_->bytes_buffered(); }
+
+int64_t BufferedInputStream::buffer_size() const { return impl_->buffer_size(); }
+
+Result<int64_t> BufferedInputStream::DoRead(int64_t nbytes, void* out) {
+ return impl_->Read(nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> BufferedInputStream::DoRead(int64_t nbytes) {
+ return impl_->Read(nbytes);
+}
+
Result<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadata() {
return impl_->raw()->ReadMetadata();
}
@@ -485,5 +485,5 @@ Future<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadat
return impl_->raw()->ReadMetadataAsync(io_context);
}
-} // namespace io
-} // namespace arrow
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
index 8116613fa4e..3a9a634a739 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
@@ -1,167 +1,167 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Buffered stream implementations
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/io/concurrency.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Buffer;
-class MemoryPool;
-class Status;
-
-namespace io {
-
-class ARROW_EXPORT BufferedOutputStream : public OutputStream {
- public:
- ~BufferedOutputStream() override;
-
- /// \brief Create a buffered output stream wrapping the given output stream.
- /// \param[in] buffer_size the size of the temporary write buffer
- /// \param[in] pool a MemoryPool to use for allocations
- /// \param[in] raw another OutputStream
- /// \return the created BufferedOutputStream
- static Result<std::shared_ptr<BufferedOutputStream>> Create(
- int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw);
-
- /// \brief Resize internal buffer
- /// \param[in] new_buffer_size the new buffer size
- /// \return Status
- Status SetBufferSize(int64_t new_buffer_size);
-
- /// \brief Return the current size of the internal buffer
- int64_t buffer_size() const;
-
- /// \brief Return the number of remaining bytes that have not been flushed to
- /// the raw OutputStream
- int64_t bytes_buffered() const;
-
- /// \brief Flush any buffered writes and release the raw
- /// OutputStream. Further operations on this object are invalid
- /// \return the underlying OutputStream
- Result<std::shared_ptr<OutputStream>> Detach();
-
- // OutputStream interface
-
- /// \brief Close the buffered output stream. This implicitly closes the
- /// underlying raw output stream.
- Status Close() override;
- Status Abort() override;
- bool closed() const override;
-
- Result<int64_t> Tell() const override;
- // Write bytes to the stream. Thread-safe
- Status Write(const void* data, int64_t nbytes) override;
- Status Write(const std::shared_ptr<Buffer>& data) override;
-
- Status Flush() override;
-
- /// \brief Return the underlying raw output stream.
- std::shared_ptr<OutputStream> raw() const;
-
- private:
- explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw, MemoryPool* pool);
-
- class ARROW_NO_EXPORT Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-/// \class BufferedInputStream
-/// \brief An InputStream that performs buffered reads from an unbuffered
-/// InputStream, which can mitigate the overhead of many small reads in some
-/// cases
-class ARROW_EXPORT BufferedInputStream
- : public internal::InputStreamConcurrencyWrapper<BufferedInputStream> {
- public:
- ~BufferedInputStream() override;
-
- /// \brief Create a BufferedInputStream from a raw InputStream
- /// \param[in] buffer_size the size of the temporary read buffer
- /// \param[in] pool a MemoryPool to use for allocations
- /// \param[in] raw a raw InputStream
- /// \param[in] raw_read_bound a bound on the maximum number of bytes
- /// to read from the raw input stream. The default -1 indicates that
- /// it is unbounded
- /// \return the created BufferedInputStream
- static Result<std::shared_ptr<BufferedInputStream>> Create(
- int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
- int64_t raw_read_bound = -1);
-
- /// \brief Resize internal read buffer; calls to Read(...) will read at least
- /// \param[in] new_buffer_size the new read buffer size
- /// \return Status
- Status SetBufferSize(int64_t new_buffer_size);
-
- /// \brief Return the number of remaining bytes in the read buffer
- int64_t bytes_buffered() const;
-
- /// \brief Return the current size of the internal buffer
- int64_t buffer_size() const;
-
- /// \brief Release the raw InputStream. Any data buffered will be
- /// discarded. Further operations on this object are invalid
- /// \return raw the underlying InputStream
- std::shared_ptr<InputStream> Detach();
-
- /// \brief Return the unbuffered InputStream
- std::shared_ptr<InputStream> raw() const;
-
- // InputStream APIs
-
- bool closed() const override;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Buffered stream implementations
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/io/concurrency.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+class Status;
+
+namespace io {
+
+class ARROW_EXPORT BufferedOutputStream : public OutputStream {
+ public:
+ ~BufferedOutputStream() override;
+
+ /// \brief Create a buffered output stream wrapping the given output stream.
+ /// \param[in] buffer_size the size of the temporary write buffer
+ /// \param[in] pool a MemoryPool to use for allocations
+ /// \param[in] raw another OutputStream
+ /// \return the created BufferedOutputStream
+ static Result<std::shared_ptr<BufferedOutputStream>> Create(
+ int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw);
+
+ /// \brief Resize internal buffer
+ /// \param[in] new_buffer_size the new buffer size
+ /// \return Status
+ Status SetBufferSize(int64_t new_buffer_size);
+
+ /// \brief Return the current size of the internal buffer
+ int64_t buffer_size() const;
+
+ /// \brief Return the number of remaining bytes that have not been flushed to
+ /// the raw OutputStream
+ int64_t bytes_buffered() const;
+
+ /// \brief Flush any buffered writes and release the raw
+ /// OutputStream. Further operations on this object are invalid
+ /// \return the underlying OutputStream
+ Result<std::shared_ptr<OutputStream>> Detach();
+
+ // OutputStream interface
+
+ /// \brief Close the buffered output stream. This implicitly closes the
+ /// underlying raw output stream.
+ Status Close() override;
+ Status Abort() override;
+ bool closed() const override;
+
+ Result<int64_t> Tell() const override;
+ // Write bytes to the stream. Thread-safe
+ Status Write(const void* data, int64_t nbytes) override;
+ Status Write(const std::shared_ptr<Buffer>& data) override;
+
+ Status Flush() override;
+
+ /// \brief Return the underlying raw output stream.
+ std::shared_ptr<OutputStream> raw() const;
+
+ private:
+ explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw, MemoryPool* pool);
+
+ class ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+/// \class BufferedInputStream
+/// \brief An InputStream that performs buffered reads from an unbuffered
+/// InputStream, which can mitigate the overhead of many small reads in some
+/// cases
+class ARROW_EXPORT BufferedInputStream
+ : public internal::InputStreamConcurrencyWrapper<BufferedInputStream> {
+ public:
+ ~BufferedInputStream() override;
+
+ /// \brief Create a BufferedInputStream from a raw InputStream
+ /// \param[in] buffer_size the size of the temporary read buffer
+ /// \param[in] pool a MemoryPool to use for allocations
+ /// \param[in] raw a raw InputStream
+ /// \param[in] raw_read_bound a bound on the maximum number of bytes
+ /// to read from the raw input stream. The default -1 indicates that
+ /// it is unbounded
+ /// \return the created BufferedInputStream
+ static Result<std::shared_ptr<BufferedInputStream>> Create(
+ int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
+ int64_t raw_read_bound = -1);
+
+ /// \brief Resize internal read buffer; calls to Read(...) will read at least
+ /// \param[in] new_buffer_size the new read buffer size
+ /// \return Status
+ Status SetBufferSize(int64_t new_buffer_size);
+
+ /// \brief Return the number of remaining bytes in the read buffer
+ int64_t bytes_buffered() const;
+
+ /// \brief Return the current size of the internal buffer
+ int64_t buffer_size() const;
+
+ /// \brief Release the raw InputStream. Any data buffered will be
+ /// discarded. Further operations on this object are invalid
+ /// \return raw the underlying InputStream
+ std::shared_ptr<InputStream> Detach();
+
+ /// \brief Return the unbuffered InputStream
+ std::shared_ptr<InputStream> raw() const;
+
+ // InputStream APIs
+
+ bool closed() const override;
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
-
- private:
- friend InputStreamConcurrencyWrapper<BufferedInputStream>;
-
- explicit BufferedInputStream(std::shared_ptr<InputStream> raw, MemoryPool* pool,
- int64_t raw_total_bytes_bound);
-
- Status DoClose();
- Status DoAbort() override;
-
- /// \brief Returns the position of the buffered stream, though the position
- /// of the unbuffered stream may be further advanced.
- Result<int64_t> DoTell() const;
-
- Result<int64_t> DoRead(int64_t nbytes, void* out);
-
- /// \brief Read into buffer.
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
-
- /// \brief Return a zero-copy string view referencing buffered data,
- /// but do not advance the position of the stream. Buffers data and
- /// expands the buffer size if necessary
- Result<util::string_view> DoPeek(int64_t nbytes) override;
-
- class ARROW_NO_EXPORT Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-} // namespace io
-} // namespace arrow
+
+ private:
+ friend InputStreamConcurrencyWrapper<BufferedInputStream>;
+
+ explicit BufferedInputStream(std::shared_ptr<InputStream> raw, MemoryPool* pool,
+ int64_t raw_total_bytes_bound);
+
+ Status DoClose();
+ Status DoAbort() override;
+
+ /// \brief Returns the position of the buffered stream, though the position
+ /// of the unbuffered stream may be further advanced.
+ Result<int64_t> DoTell() const;
+
+ Result<int64_t> DoRead(int64_t nbytes, void* out);
+
+ /// \brief Read into buffer.
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+
+ /// \brief Return a zero-copy string view referencing buffered data,
+ /// but do not advance the position of the stream. Buffers data and
+ /// expands the buffer size if necessary
+ Result<util::string_view> DoPeek(int64_t nbytes) override;
+
+ class ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
index 722026ccd9b..8a911cc9429 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
@@ -1,156 +1,156 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
#include <atomic>
-#include <cmath>
+#include <cmath>
#include <mutex>
-#include <utility>
+#include <utility>
#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/io/caching.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/result.h"
-#include "arrow/util/future.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace io {
-
-CacheOptions CacheOptions::Defaults() {
- return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
+
+#include "arrow/buffer.h"
+#include "arrow/io/caching.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace io {
+
+CacheOptions CacheOptions::Defaults() {
+ return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
internal::ReadRangeCache::kDefaultRangeSizeLimit,
/*lazy=*/false};
-}
-
+}
+
CacheOptions CacheOptions::LazyDefaults() {
return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
internal::ReadRangeCache::kDefaultRangeSizeLimit,
/*lazy=*/true};
}
-CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
- int64_t transfer_bandwidth_mib_per_sec,
- double ideal_bandwidth_utilization_frac,
- int64_t max_ideal_request_size_mib) {
- //
- // The I/O coalescing algorithm uses two parameters:
- // 1. hole_size_limit (a.k.a max_io_gap): Max I/O gap/hole size in bytes
- // 2. range_size_limit (a.k.a ideal_request_size): Ideal I/O Request size in bytes
- //
- // These parameters can be derived from network metrics (e.g. S3) as described below:
- //
- // In an S3 compatible storage, there are two main metrics:
- // 1. Seek-time or Time-To-First-Byte (TTFB) in seconds: call setup latency of a new
- // S3 request
- // 2. Transfer Bandwidth (BW) for data in bytes/sec
- //
- // 1. Computing hole_size_limit:
- //
- // hole_size_limit = TTFB * BW
- //
- // This is also called Bandwidth-Delay-Product (BDP).
- // Two byte ranges that have a gap can still be mapped to the same read
- // if the gap is less than the bandwidth-delay product [TTFB * TransferBandwidth],
- // i.e. if the Time-To-First-Byte (or call setup latency of a new S3 request) is
- // expected to be greater than just reading and discarding the extra bytes on an
- // existing HTTP request.
- //
- // 2. Computing range_size_limit:
- //
- // We want to have high bandwidth utilization per S3 connections,
- // i.e. transfer large amounts of data to amortize the seek overhead.
- // But, we also want to leverage parallelism by slicing very large IO chunks.
- // We define two more config parameters with suggested default values to control
- // the slice size and seek to balance the two effects with the goal of maximizing
- // net data load performance.
- //
- // BW_util_frac (ideal bandwidth utilization): Transfer bandwidth utilization fraction
- // (per connection) to maximize the net data load. 90% is a good default number for
- // an effective transfer bandwidth.
- //
- // MAX_IDEAL_REQUEST_SIZE: The maximum single data request size (in MiB) to maximize
- // the net data load. 64 MiB is a good default number for the ideal request size.
- //
- // The amount of data that needs to be transferred in a single S3 get_object
- // request to achieve effective bandwidth eff_BW = BW_util_frac * BW is as follows:
- // eff_BW = range_size_limit / (TTFB + range_size_limit / BW)
- //
- // Substituting TTFB = hole_size_limit / BW and eff_BW = BW_util_frac * BW, we get the
- // following result:
- // range_size_limit = hole_size_limit * BW_util_frac / (1 - BW_util_frac)
- //
- // Applying the MAX_IDEAL_REQUEST_SIZE, we get the following:
- // range_size_limit = min(MAX_IDEAL_REQUEST_SIZE,
- // hole_size_limit * BW_util_frac / (1 - BW_util_frac))
- //
- DCHECK_GT(time_to_first_byte_millis, 0) << "TTFB must be > 0";
- DCHECK_GT(transfer_bandwidth_mib_per_sec, 0) << "Transfer bandwidth must be > 0";
- DCHECK_GT(ideal_bandwidth_utilization_frac, 0)
- << "Ideal bandwidth utilization fraction must be > 0";
- DCHECK_LT(ideal_bandwidth_utilization_frac, 1.0)
- << "Ideal bandwidth utilization fraction must be < 1";
- DCHECK_GT(max_ideal_request_size_mib, 0) << "Max Ideal request size must be > 0";
-
- const double time_to_first_byte_sec = time_to_first_byte_millis / 1000.0;
- const int64_t transfer_bandwidth_bytes_per_sec =
- transfer_bandwidth_mib_per_sec * 1024 * 1024;
- const int64_t max_ideal_request_size_bytes = max_ideal_request_size_mib * 1024 * 1024;
-
- // hole_size_limit = TTFB * BW
- const auto hole_size_limit = static_cast<int64_t>(
- std::round(time_to_first_byte_sec * transfer_bandwidth_bytes_per_sec));
- DCHECK_GT(hole_size_limit, 0) << "Computed hole_size_limit must be > 0";
-
- // range_size_limit = min(MAX_IDEAL_REQUEST_SIZE,
- // hole_size_limit * BW_util_frac / (1 - BW_util_frac))
- const int64_t range_size_limit = std::min(
- max_ideal_request_size_bytes,
- static_cast<int64_t>(std::round(hole_size_limit * ideal_bandwidth_utilization_frac /
- (1 - ideal_bandwidth_utilization_frac))));
- DCHECK_GT(range_size_limit, 0) << "Computed range_size_limit must be > 0";
-
+CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
+ int64_t transfer_bandwidth_mib_per_sec,
+ double ideal_bandwidth_utilization_frac,
+ int64_t max_ideal_request_size_mib) {
+ //
+ // The I/O coalescing algorithm uses two parameters:
+ // 1. hole_size_limit (a.k.a max_io_gap): Max I/O gap/hole size in bytes
+ // 2. range_size_limit (a.k.a ideal_request_size): Ideal I/O Request size in bytes
+ //
+ // These parameters can be derived from network metrics (e.g. S3) as described below:
+ //
+ // In an S3 compatible storage, there are two main metrics:
+ // 1. Seek-time or Time-To-First-Byte (TTFB) in seconds: call setup latency of a new
+ // S3 request
+ // 2. Transfer Bandwidth (BW) for data in bytes/sec
+ //
+ // 1. Computing hole_size_limit:
+ //
+ // hole_size_limit = TTFB * BW
+ //
+ // This is also called Bandwidth-Delay-Product (BDP).
+ // Two byte ranges that have a gap can still be mapped to the same read
+ // if the gap is less than the bandwidth-delay product [TTFB * TransferBandwidth],
+ // i.e. if the Time-To-First-Byte (or call setup latency of a new S3 request) is
+ // expected to be greater than just reading and discarding the extra bytes on an
+ // existing HTTP request.
+ //
+ // 2. Computing range_size_limit:
+ //
+ // We want to have high bandwidth utilization per S3 connections,
+ // i.e. transfer large amounts of data to amortize the seek overhead.
+ // But, we also want to leverage parallelism by slicing very large IO chunks.
+ // We define two more config parameters with suggested default values to control
+ // the slice size and seek to balance the two effects with the goal of maximizing
+ // net data load performance.
+ //
+ // BW_util_frac (ideal bandwidth utilization): Transfer bandwidth utilization fraction
+ // (per connection) to maximize the net data load. 90% is a good default number for
+ // an effective transfer bandwidth.
+ //
+ // MAX_IDEAL_REQUEST_SIZE: The maximum single data request size (in MiB) to maximize
+ // the net data load. 64 MiB is a good default number for the ideal request size.
+ //
+ // The amount of data that needs to be transferred in a single S3 get_object
+ // request to achieve effective bandwidth eff_BW = BW_util_frac * BW is as follows:
+ // eff_BW = range_size_limit / (TTFB + range_size_limit / BW)
+ //
+ // Substituting TTFB = hole_size_limit / BW and eff_BW = BW_util_frac * BW, we get the
+ // following result:
+ // range_size_limit = hole_size_limit * BW_util_frac / (1 - BW_util_frac)
+ //
+ // Applying the MAX_IDEAL_REQUEST_SIZE, we get the following:
+ // range_size_limit = min(MAX_IDEAL_REQUEST_SIZE,
+ // hole_size_limit * BW_util_frac / (1 - BW_util_frac))
+ //
+ DCHECK_GT(time_to_first_byte_millis, 0) << "TTFB must be > 0";
+ DCHECK_GT(transfer_bandwidth_mib_per_sec, 0) << "Transfer bandwidth must be > 0";
+ DCHECK_GT(ideal_bandwidth_utilization_frac, 0)
+ << "Ideal bandwidth utilization fraction must be > 0";
+ DCHECK_LT(ideal_bandwidth_utilization_frac, 1.0)
+ << "Ideal bandwidth utilization fraction must be < 1";
+ DCHECK_GT(max_ideal_request_size_mib, 0) << "Max Ideal request size must be > 0";
+
+ const double time_to_first_byte_sec = time_to_first_byte_millis / 1000.0;
+ const int64_t transfer_bandwidth_bytes_per_sec =
+ transfer_bandwidth_mib_per_sec * 1024 * 1024;
+ const int64_t max_ideal_request_size_bytes = max_ideal_request_size_mib * 1024 * 1024;
+
+ // hole_size_limit = TTFB * BW
+ const auto hole_size_limit = static_cast<int64_t>(
+ std::round(time_to_first_byte_sec * transfer_bandwidth_bytes_per_sec));
+ DCHECK_GT(hole_size_limit, 0) << "Computed hole_size_limit must be > 0";
+
+ // range_size_limit = min(MAX_IDEAL_REQUEST_SIZE,
+ // hole_size_limit * BW_util_frac / (1 - BW_util_frac))
+ const int64_t range_size_limit = std::min(
+ max_ideal_request_size_bytes,
+ static_cast<int64_t>(std::round(hole_size_limit * ideal_bandwidth_utilization_frac /
+ (1 - ideal_bandwidth_utilization_frac))));
+ DCHECK_GT(range_size_limit, 0) << "Computed range_size_limit must be > 0";
+
return {hole_size_limit, range_size_limit, false};
-}
-
-namespace internal {
-
-struct RangeCacheEntry {
- ReadRange range;
- Future<std::shared_ptr<Buffer>> future;
-
+}
+
+namespace internal {
+
+struct RangeCacheEntry {
+ ReadRange range;
+ Future<std::shared_ptr<Buffer>> future;
+
RangeCacheEntry() = default;
RangeCacheEntry(const ReadRange& range_, Future<std::shared_ptr<Buffer>> future_)
: range(range_), future(std::move(future_)) {}
- friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
- return left.range.offset < right.range.offset;
- }
-};
-
-struct ReadRangeCache::Impl {
- std::shared_ptr<RandomAccessFile> file;
+ friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
+ return left.range.offset < right.range.offset;
+ }
+};
+
+struct ReadRangeCache::Impl {
+ std::shared_ptr<RandomAccessFile> file;
IOContext ctx;
- CacheOptions options;
-
- // Ordered by offset (so as to find a matching region by binary search)
- std::vector<RangeCacheEntry> entries;
-
+ CacheOptions options;
+
+ // Ordered by offset (so as to find a matching region by binary search)
+ std::vector<RangeCacheEntry> entries;
+
virtual ~Impl() = default;
// Get the future corresponding to a range
@@ -175,17 +175,17 @@ struct ReadRangeCache::Impl {
options.range_size_limit);
std::vector<RangeCacheEntry> new_entries = MakeCacheEntries(ranges);
// Add new entries, themselves ordered by offset
- if (entries.size() > 0) {
- std::vector<RangeCacheEntry> merged(entries.size() + new_entries.size());
- std::merge(entries.begin(), entries.end(), new_entries.begin(), new_entries.end(),
- merged.begin());
- entries = std::move(merged);
- } else {
- entries = std::move(new_entries);
- }
+ if (entries.size() > 0) {
+ std::vector<RangeCacheEntry> merged(entries.size() + new_entries.size());
+ std::merge(entries.begin(), entries.end(), new_entries.begin(), new_entries.end(),
+ merged.begin());
+ entries = std::move(merged);
+ } else {
+ entries = std::move(new_entries);
+ }
// Prefetch immediately, regardless of executor availability, if possible
return file->WillNeed(ranges);
- }
+ }
// Read the given range from the cache, blocking if needed. Cannot read a range
// that spans cache entries.
@@ -238,8 +238,8 @@ struct ReadRangeCache::Impl {
}
return AllComplete(futures);
}
-};
-
+};
+
// Don't read ranges when they're first added. Instead, wait until they're requested
// (either through Read or WaitFor).
struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl {
@@ -290,29 +290,29 @@ struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl {
};
ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
- CacheOptions options)
+ CacheOptions options)
: impl_(options.lazy ? new LazyImpl() : new Impl()) {
- impl_->file = std::move(file);
- impl_->ctx = std::move(ctx);
- impl_->options = options;
-}
-
+ impl_->file = std::move(file);
+ impl_->ctx = std::move(ctx);
+ impl_->options = options;
+}
+
ReadRangeCache::~ReadRangeCache() = default;
-
-Status ReadRangeCache::Cache(std::vector<ReadRange> ranges) {
+
+Status ReadRangeCache::Cache(std::vector<ReadRange> ranges) {
return impl_->Cache(std::move(ranges));
-}
-
-Result<std::shared_ptr<Buffer>> ReadRangeCache::Read(ReadRange range) {
+}
+
+Result<std::shared_ptr<Buffer>> ReadRangeCache::Read(ReadRange range) {
return impl_->Read(range);
}
-
+
Future<> ReadRangeCache::Wait() { return impl_->Wait(); }
Future<> ReadRangeCache::WaitFor(std::vector<ReadRange> ranges) {
return impl_->WaitFor(std::move(ranges));
-}
-
-} // namespace internal
-} // namespace io
-} // namespace arrow
+}
+
+} // namespace internal
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
index 59a9b60e82f..f6c5733d164 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
@@ -1,81 +1,81 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/io/interfaces.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/io/interfaces.h"
#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace io {
-
-struct ARROW_EXPORT CacheOptions {
- static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
- static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+struct ARROW_EXPORT CacheOptions {
+ static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
+ static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
+
/// \brief The maximum distance in bytes between two consecutive
- /// ranges; beyond this value, ranges are not combined
- int64_t hole_size_limit;
+ /// ranges; beyond this value, ranges are not combined
+ int64_t hole_size_limit;
/// \brief The maximum size in bytes of a combined range; if
- /// combining two consecutive ranges would produce a range of a
- /// size greater than this, they are not combined
- int64_t range_size_limit;
+ /// combining two consecutive ranges would produce a range of a
+ /// size greater than this, they are not combined
+ int64_t range_size_limit;
/// \brief A lazy cache does not perform any I/O until requested.
bool lazy;
-
- bool operator==(const CacheOptions& other) const {
- return hole_size_limit == other.hole_size_limit &&
+
+ bool operator==(const CacheOptions& other) const {
+ return hole_size_limit == other.hole_size_limit &&
range_size_limit == other.range_size_limit && lazy == other.lazy;
- }
-
- /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
- ///
- /// \param[in] time_to_first_byte_millis Seek-time or Time-To-First-Byte (TTFB) in
- /// milliseconds, also called call setup latency of a new S3 request.
- /// The value is a positive integer.
- /// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec.
- /// The value is a positive integer.
- /// \param[in] ideal_bandwidth_utilization_frac Transfer bandwidth utilization fraction
- /// (per connection) to maximize the net data load.
- /// The value is a positive double precision number less than 1.
- /// \param[in] max_ideal_request_size_mib The maximum single data request size (in MiB)
- /// to maximize the net data load.
- /// The value is a positive integer.
- /// \return A new instance of CacheOptions.
- static CacheOptions MakeFromNetworkMetrics(
- int64_t time_to_first_byte_millis, int64_t transfer_bandwidth_mib_per_sec,
- double ideal_bandwidth_utilization_frac = kDefaultIdealBandwidthUtilizationFrac,
- int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
-
- static CacheOptions Defaults();
+ }
+
+ /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
+ ///
+ /// \param[in] time_to_first_byte_millis Seek-time or Time-To-First-Byte (TTFB) in
+ /// milliseconds, also called call setup latency of a new S3 request.
+ /// The value is a positive integer.
+ /// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec.
+ /// The value is a positive integer.
+ /// \param[in] ideal_bandwidth_utilization_frac Transfer bandwidth utilization fraction
+ /// (per connection) to maximize the net data load.
+ /// The value is a positive double precision number less than 1.
+ /// \param[in] max_ideal_request_size_mib The maximum single data request size (in MiB)
+ /// to maximize the net data load.
+ /// The value is a positive integer.
+ /// \return A new instance of CacheOptions.
+ static CacheOptions MakeFromNetworkMetrics(
+ int64_t time_to_first_byte_millis, int64_t transfer_bandwidth_mib_per_sec,
+ double ideal_bandwidth_utilization_frac = kDefaultIdealBandwidthUtilizationFrac,
+ int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
+
+ static CacheOptions Defaults();
static CacheOptions LazyDefaults();
-};
-
-namespace internal {
-
-/// \brief A read cache designed to hide IO latencies when reading.
-///
+};
+
+namespace internal {
+
+/// \brief A read cache designed to hide IO latencies when reading.
+///
/// This class takes multiple byte ranges that an application expects to read, and
/// coalesces them into fewer, larger read requests, which benefits performance on some
/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
@@ -97,42 +97,42 @@ namespace internal {
/// 3. Call Read() to retrieve the actual data for the given ranges.
/// A synchronous application may skip WaitFor() and just call Read() - it will still
/// benefit from coalescing and parallel fetching.
-class ARROW_EXPORT ReadRangeCache {
- public:
- static constexpr int64_t kDefaultHoleSizeLimit = 8192;
- static constexpr int64_t kDefaultRangeSizeLimit = 32 * 1024 * 1024;
-
- /// Construct a read cache with default
+class ARROW_EXPORT ReadRangeCache {
+ public:
+ static constexpr int64_t kDefaultHoleSizeLimit = 8192;
+ static constexpr int64_t kDefaultRangeSizeLimit = 32 * 1024 * 1024;
+
+ /// Construct a read cache with default
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx)
- : ReadRangeCache(file, std::move(ctx), CacheOptions::Defaults()) {}
-
- /// Construct a read cache with given options
+ : ReadRangeCache(file, std::move(ctx), CacheOptions::Defaults()) {}
+
+ /// Construct a read cache with given options
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
- CacheOptions options);
- ~ReadRangeCache();
-
- /// \brief Cache the given ranges in the background.
- ///
- /// The caller must ensure that the ranges do not overlap with each other,
- /// nor with previously cached ranges. Otherwise, behaviour will be undefined.
- Status Cache(std::vector<ReadRange> ranges);
-
- /// \brief Read a range previously given to Cache().
- Result<std::shared_ptr<Buffer>> Read(ReadRange range);
-
+ CacheOptions options);
+ ~ReadRangeCache();
+
+ /// \brief Cache the given ranges in the background.
+ ///
+ /// The caller must ensure that the ranges do not overlap with each other,
+ /// nor with previously cached ranges. Otherwise, behaviour will be undefined.
+ Status Cache(std::vector<ReadRange> ranges);
+
+ /// \brief Read a range previously given to Cache().
+ Result<std::shared_ptr<Buffer>> Read(ReadRange range);
+
/// \brief Wait until all ranges added so far have been cached.
Future<> Wait();
/// \brief Wait until all given ranges have been cached.
Future<> WaitFor(std::vector<ReadRange> ranges);
- protected:
- struct Impl;
+ protected:
+ struct Impl;
struct LazyImpl;
- std::unique_ptr<Impl> impl_;
-};
-
-} // namespace internal
-} // namespace io
-} // namespace arrow
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace internal
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
index 72977f0f297..e163b91c072 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
@@ -1,442 +1,442 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/compressed.h"
-
-#include <algorithm>
-#include <cstring>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/util/compression.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using util::Codec;
-using util::Compressor;
-using util::Decompressor;
-
-namespace io {
-
-// ----------------------------------------------------------------------
-// CompressedOutputStream implementation
-
-class CompressedOutputStream::Impl {
- public:
- Impl(MemoryPool* pool, const std::shared_ptr<OutputStream>& raw)
- : pool_(pool), raw_(raw), is_open_(false), compressed_pos_(0), total_pos_(0) {}
-
- Status Init(Codec* codec) {
- ARROW_ASSIGN_OR_RAISE(compressor_, codec->MakeCompressor());
- ARROW_ASSIGN_OR_RAISE(compressed_, AllocateResizableBuffer(kChunkSize, pool_));
- compressed_pos_ = 0;
- is_open_ = true;
- return Status::OK();
- }
-
- Result<int64_t> Tell() const {
- std::lock_guard<std::mutex> guard(lock_);
- return total_pos_;
- }
-
- std::shared_ptr<OutputStream> raw() const { return raw_; }
-
- Status FlushCompressed() {
- if (compressed_pos_ > 0) {
- RETURN_NOT_OK(raw_->Write(compressed_->data(), compressed_pos_));
- compressed_pos_ = 0;
- }
- return Status::OK();
- }
-
- Status Write(const void* data, int64_t nbytes) {
- std::lock_guard<std::mutex> guard(lock_);
-
- auto input = reinterpret_cast<const uint8_t*>(data);
- while (nbytes > 0) {
- int64_t input_len = nbytes;
- int64_t output_len = compressed_->size() - compressed_pos_;
- uint8_t* output = compressed_->mutable_data() + compressed_pos_;
- ARROW_ASSIGN_OR_RAISE(auto result,
- compressor_->Compress(input_len, input, output_len, output));
- compressed_pos_ += result.bytes_written;
-
- if (result.bytes_read == 0) {
- // Not enough output, try to flush it and retry
- if (compressed_pos_ > 0) {
- RETURN_NOT_OK(FlushCompressed());
- output_len = compressed_->size() - compressed_pos_;
- output = compressed_->mutable_data() + compressed_pos_;
- ARROW_ASSIGN_OR_RAISE(
- result, compressor_->Compress(input_len, input, output_len, output));
- compressed_pos_ += result.bytes_written;
- }
- }
- input += result.bytes_read;
- nbytes -= result.bytes_read;
- total_pos_ += result.bytes_read;
- if (compressed_pos_ == compressed_->size()) {
- // Output buffer full, flush it
- RETURN_NOT_OK(FlushCompressed());
- }
- if (result.bytes_read == 0) {
- // Need to enlarge output buffer
- RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
- }
- }
- return Status::OK();
- }
-
- Status Flush() {
- std::lock_guard<std::mutex> guard(lock_);
-
- while (true) {
- // Flush compressor
- int64_t output_len = compressed_->size() - compressed_pos_;
- uint8_t* output = compressed_->mutable_data() + compressed_pos_;
- ARROW_ASSIGN_OR_RAISE(auto result, compressor_->Flush(output_len, output));
- compressed_pos_ += result.bytes_written;
-
- // Flush compressed output
- RETURN_NOT_OK(FlushCompressed());
-
- if (result.should_retry) {
- // Need to enlarge output buffer
- RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
- } else {
- break;
- }
- }
- return Status::OK();
- }
-
- Status FinalizeCompression() {
- while (true) {
- // Try to end compressor
- int64_t output_len = compressed_->size() - compressed_pos_;
- uint8_t* output = compressed_->mutable_data() + compressed_pos_;
- ARROW_ASSIGN_OR_RAISE(auto result, compressor_->End(output_len, output));
- compressed_pos_ += result.bytes_written;
-
- // Flush compressed output
- RETURN_NOT_OK(FlushCompressed());
-
- if (result.should_retry) {
- // Need to enlarge output buffer
- RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
- } else {
- // Done
- break;
- }
- }
- return Status::OK();
- }
-
- Status Close() {
- std::lock_guard<std::mutex> guard(lock_);
-
- if (is_open_) {
- is_open_ = false;
- RETURN_NOT_OK(FinalizeCompression());
- return raw_->Close();
- } else {
- return Status::OK();
- }
- }
-
- Status Abort() {
- std::lock_guard<std::mutex> guard(lock_);
-
- if (is_open_) {
- is_open_ = false;
- return raw_->Abort();
- } else {
- return Status::OK();
- }
- }
-
- bool closed() {
- std::lock_guard<std::mutex> guard(lock_);
- return !is_open_;
- }
-
- private:
- // Write 64 KB compressed data at a time
- static const int64_t kChunkSize = 64 * 1024;
-
- MemoryPool* pool_;
- std::shared_ptr<OutputStream> raw_;
- bool is_open_;
- std::shared_ptr<Compressor> compressor_;
- std::shared_ptr<ResizableBuffer> compressed_;
- int64_t compressed_pos_;
- // Total number of bytes compressed
- int64_t total_pos_;
-
- mutable std::mutex lock_;
-};
-
-Result<std::shared_ptr<CompressedOutputStream>> CompressedOutputStream::Make(
- util::Codec* codec, const std::shared_ptr<OutputStream>& raw, MemoryPool* pool) {
- // CAUTION: codec is not owned
- std::shared_ptr<CompressedOutputStream> res(new CompressedOutputStream);
- res->impl_.reset(new Impl(pool, std::move(raw)));
- RETURN_NOT_OK(res->impl_->Init(codec));
- return res;
-}
-
-CompressedOutputStream::~CompressedOutputStream() { internal::CloseFromDestructor(this); }
-
-Status CompressedOutputStream::Close() { return impl_->Close(); }
-
-Status CompressedOutputStream::Abort() { return impl_->Abort(); }
-
-bool CompressedOutputStream::closed() const { return impl_->closed(); }
-
-Result<int64_t> CompressedOutputStream::Tell() const { return impl_->Tell(); }
-
-Status CompressedOutputStream::Write(const void* data, int64_t nbytes) {
- return impl_->Write(data, nbytes);
-}
-
-Status CompressedOutputStream::Flush() { return impl_->Flush(); }
-
-std::shared_ptr<OutputStream> CompressedOutputStream::raw() const { return impl_->raw(); }
-
-// ----------------------------------------------------------------------
-// CompressedInputStream implementation
-
-class CompressedInputStream::Impl {
- public:
- Impl(MemoryPool* pool, const std::shared_ptr<InputStream>& raw)
- : pool_(pool),
- raw_(raw),
- is_open_(true),
- compressed_pos_(0),
- decompressed_pos_(0),
- total_pos_(0) {}
-
- Status Init(Codec* codec) {
- ARROW_ASSIGN_OR_RAISE(decompressor_, codec->MakeDecompressor());
- fresh_decompressor_ = true;
- return Status::OK();
- }
-
- Status Close() {
- if (is_open_) {
- is_open_ = false;
- return raw_->Close();
- } else {
- return Status::OK();
- }
- }
-
- Status Abort() {
- if (is_open_) {
- is_open_ = false;
- return raw_->Abort();
- } else {
- return Status::OK();
- }
- }
-
- bool closed() { return !is_open_; }
-
- Result<int64_t> Tell() const { return total_pos_; }
-
- // Read compressed data if necessary
- Status EnsureCompressedData() {
- int64_t compressed_avail = compressed_ ? compressed_->size() - compressed_pos_ : 0;
- if (compressed_avail == 0) {
- // No compressed data available, read a full chunk
- ARROW_ASSIGN_OR_RAISE(compressed_, raw_->Read(kChunkSize));
- compressed_pos_ = 0;
- }
- return Status::OK();
- }
-
- // Decompress some data from the compressed_ buffer.
- // Call this function only if the decompressed_ buffer is empty.
- Status DecompressData() {
- int64_t decompress_size = kDecompressSize;
-
- while (true) {
- ARROW_ASSIGN_OR_RAISE(decompressed_,
- AllocateResizableBuffer(decompress_size, pool_));
- decompressed_pos_ = 0;
-
- int64_t input_len = compressed_->size() - compressed_pos_;
- const uint8_t* input = compressed_->data() + compressed_pos_;
- int64_t output_len = decompressed_->size();
- uint8_t* output = decompressed_->mutable_data();
-
- ARROW_ASSIGN_OR_RAISE(
- auto result, decompressor_->Decompress(input_len, input, output_len, output));
- compressed_pos_ += result.bytes_read;
- if (result.bytes_read > 0) {
- fresh_decompressor_ = false;
- }
- if (result.bytes_written > 0 || !result.need_more_output || input_len == 0) {
- RETURN_NOT_OK(decompressed_->Resize(result.bytes_written));
- break;
- }
- DCHECK_EQ(result.bytes_written, 0);
- // Need to enlarge output buffer
- decompress_size *= 2;
- }
- return Status::OK();
- }
-
- // Read a given number of bytes from the decompressed_ buffer.
- int64_t ReadFromDecompressed(int64_t nbytes, uint8_t* out) {
- int64_t readable = decompressed_ ? (decompressed_->size() - decompressed_pos_) : 0;
- int64_t read_bytes = std::min(readable, nbytes);
-
- if (read_bytes > 0) {
- memcpy(out, decompressed_->data() + decompressed_pos_, read_bytes);
- decompressed_pos_ += read_bytes;
-
- if (decompressed_pos_ == decompressed_->size()) {
- // Decompressed data is exhausted, release buffer
- decompressed_.reset();
- }
- }
-
- return read_bytes;
- }
-
- // Try to feed more data into the decompressed_ buffer.
- Status RefillDecompressed(bool* has_data) {
- // First try to read data from the decompressor
- if (compressed_) {
- if (decompressor_->IsFinished()) {
- // We just went over the end of a previous compressed stream.
- RETURN_NOT_OK(decompressor_->Reset());
- fresh_decompressor_ = true;
- }
- RETURN_NOT_OK(DecompressData());
- }
- if (!decompressed_ || decompressed_->size() == 0) {
- // Got nothing, need to read more compressed data
- RETURN_NOT_OK(EnsureCompressedData());
- if (compressed_pos_ == compressed_->size()) {
- // No more data to decompress
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/compressed.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using util::Codec;
+using util::Compressor;
+using util::Decompressor;
+
+namespace io {
+
+// ----------------------------------------------------------------------
+// CompressedOutputStream implementation
+
+class CompressedOutputStream::Impl {
+ public:
+ Impl(MemoryPool* pool, const std::shared_ptr<OutputStream>& raw)
+ : pool_(pool), raw_(raw), is_open_(false), compressed_pos_(0), total_pos_(0) {}
+
+ Status Init(Codec* codec) {
+ ARROW_ASSIGN_OR_RAISE(compressor_, codec->MakeCompressor());
+ ARROW_ASSIGN_OR_RAISE(compressed_, AllocateResizableBuffer(kChunkSize, pool_));
+ compressed_pos_ = 0;
+ is_open_ = true;
+ return Status::OK();
+ }
+
+ Result<int64_t> Tell() const {
+ std::lock_guard<std::mutex> guard(lock_);
+ return total_pos_;
+ }
+
+ std::shared_ptr<OutputStream> raw() const { return raw_; }
+
+ Status FlushCompressed() {
+ if (compressed_pos_ > 0) {
+ RETURN_NOT_OK(raw_->Write(compressed_->data(), compressed_pos_));
+ compressed_pos_ = 0;
+ }
+ return Status::OK();
+ }
+
+ Status Write(const void* data, int64_t nbytes) {
+ std::lock_guard<std::mutex> guard(lock_);
+
+ auto input = reinterpret_cast<const uint8_t*>(data);
+ while (nbytes > 0) {
+ int64_t input_len = nbytes;
+ int64_t output_len = compressed_->size() - compressed_pos_;
+ uint8_t* output = compressed_->mutable_data() + compressed_pos_;
+ ARROW_ASSIGN_OR_RAISE(auto result,
+ compressor_->Compress(input_len, input, output_len, output));
+ compressed_pos_ += result.bytes_written;
+
+ if (result.bytes_read == 0) {
+ // Not enough output, try to flush it and retry
+ if (compressed_pos_ > 0) {
+ RETURN_NOT_OK(FlushCompressed());
+ output_len = compressed_->size() - compressed_pos_;
+ output = compressed_->mutable_data() + compressed_pos_;
+ ARROW_ASSIGN_OR_RAISE(
+ result, compressor_->Compress(input_len, input, output_len, output));
+ compressed_pos_ += result.bytes_written;
+ }
+ }
+ input += result.bytes_read;
+ nbytes -= result.bytes_read;
+ total_pos_ += result.bytes_read;
+ if (compressed_pos_ == compressed_->size()) {
+ // Output buffer full, flush it
+ RETURN_NOT_OK(FlushCompressed());
+ }
+ if (result.bytes_read == 0) {
+ // Need to enlarge output buffer
+ RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
+ }
+ }
+ return Status::OK();
+ }
+
+ Status Flush() {
+ std::lock_guard<std::mutex> guard(lock_);
+
+ while (true) {
+ // Flush compressor
+ int64_t output_len = compressed_->size() - compressed_pos_;
+ uint8_t* output = compressed_->mutable_data() + compressed_pos_;
+ ARROW_ASSIGN_OR_RAISE(auto result, compressor_->Flush(output_len, output));
+ compressed_pos_ += result.bytes_written;
+
+ // Flush compressed output
+ RETURN_NOT_OK(FlushCompressed());
+
+ if (result.should_retry) {
+ // Need to enlarge output buffer
+ RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
+ } else {
+ break;
+ }
+ }
+ return Status::OK();
+ }
+
+ Status FinalizeCompression() {
+ while (true) {
+ // Try to end compressor
+ int64_t output_len = compressed_->size() - compressed_pos_;
+ uint8_t* output = compressed_->mutable_data() + compressed_pos_;
+ ARROW_ASSIGN_OR_RAISE(auto result, compressor_->End(output_len, output));
+ compressed_pos_ += result.bytes_written;
+
+ // Flush compressed output
+ RETURN_NOT_OK(FlushCompressed());
+
+ if (result.should_retry) {
+ // Need to enlarge output buffer
+ RETURN_NOT_OK(compressed_->Resize(compressed_->size() * 2));
+ } else {
+ // Done
+ break;
+ }
+ }
+ return Status::OK();
+ }
+
+ Status Close() {
+ std::lock_guard<std::mutex> guard(lock_);
+
+ if (is_open_) {
+ is_open_ = false;
+ RETURN_NOT_OK(FinalizeCompression());
+ return raw_->Close();
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Status Abort() {
+ std::lock_guard<std::mutex> guard(lock_);
+
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Abort();
+ } else {
+ return Status::OK();
+ }
+ }
+
+ bool closed() {
+ std::lock_guard<std::mutex> guard(lock_);
+ return !is_open_;
+ }
+
+ private:
+ // Write 64 KB compressed data at a time
+ static const int64_t kChunkSize = 64 * 1024;
+
+ MemoryPool* pool_;
+ std::shared_ptr<OutputStream> raw_;
+ bool is_open_;
+ std::shared_ptr<Compressor> compressor_;
+ std::shared_ptr<ResizableBuffer> compressed_;
+ int64_t compressed_pos_;
+ // Total number of bytes compressed
+ int64_t total_pos_;
+
+ mutable std::mutex lock_;
+};
+
+Result<std::shared_ptr<CompressedOutputStream>> CompressedOutputStream::Make(
+ util::Codec* codec, const std::shared_ptr<OutputStream>& raw, MemoryPool* pool) {
+ // CAUTION: codec is not owned
+ std::shared_ptr<CompressedOutputStream> res(new CompressedOutputStream);
+ res->impl_.reset(new Impl(pool, std::move(raw)));
+ RETURN_NOT_OK(res->impl_->Init(codec));
+ return res;
+}
+
+CompressedOutputStream::~CompressedOutputStream() { internal::CloseFromDestructor(this); }
+
+Status CompressedOutputStream::Close() { return impl_->Close(); }
+
+Status CompressedOutputStream::Abort() { return impl_->Abort(); }
+
+bool CompressedOutputStream::closed() const { return impl_->closed(); }
+
+Result<int64_t> CompressedOutputStream::Tell() const { return impl_->Tell(); }
+
+Status CompressedOutputStream::Write(const void* data, int64_t nbytes) {
+ return impl_->Write(data, nbytes);
+}
+
+Status CompressedOutputStream::Flush() { return impl_->Flush(); }
+
+std::shared_ptr<OutputStream> CompressedOutputStream::raw() const { return impl_->raw(); }
+
+// ----------------------------------------------------------------------
+// CompressedInputStream implementation
+
+class CompressedInputStream::Impl {
+ public:
+ Impl(MemoryPool* pool, const std::shared_ptr<InputStream>& raw)
+ : pool_(pool),
+ raw_(raw),
+ is_open_(true),
+ compressed_pos_(0),
+ decompressed_pos_(0),
+ total_pos_(0) {}
+
+ Status Init(Codec* codec) {
+ ARROW_ASSIGN_OR_RAISE(decompressor_, codec->MakeDecompressor());
+ fresh_decompressor_ = true;
+ return Status::OK();
+ }
+
+ Status Close() {
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Close();
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Status Abort() {
+ if (is_open_) {
+ is_open_ = false;
+ return raw_->Abort();
+ } else {
+ return Status::OK();
+ }
+ }
+
+ bool closed() { return !is_open_; }
+
+ Result<int64_t> Tell() const { return total_pos_; }
+
+ // Read compressed data if necessary
+ Status EnsureCompressedData() {
+ int64_t compressed_avail = compressed_ ? compressed_->size() - compressed_pos_ : 0;
+ if (compressed_avail == 0) {
+ // No compressed data available, read a full chunk
+ ARROW_ASSIGN_OR_RAISE(compressed_, raw_->Read(kChunkSize));
+ compressed_pos_ = 0;
+ }
+ return Status::OK();
+ }
+
+ // Decompress some data from the compressed_ buffer.
+ // Call this function only if the decompressed_ buffer is empty.
+ Status DecompressData() {
+ int64_t decompress_size = kDecompressSize;
+
+ while (true) {
+ ARROW_ASSIGN_OR_RAISE(decompressed_,
+ AllocateResizableBuffer(decompress_size, pool_));
+ decompressed_pos_ = 0;
+
+ int64_t input_len = compressed_->size() - compressed_pos_;
+ const uint8_t* input = compressed_->data() + compressed_pos_;
+ int64_t output_len = decompressed_->size();
+ uint8_t* output = decompressed_->mutable_data();
+
+ ARROW_ASSIGN_OR_RAISE(
+ auto result, decompressor_->Decompress(input_len, input, output_len, output));
+ compressed_pos_ += result.bytes_read;
+ if (result.bytes_read > 0) {
+ fresh_decompressor_ = false;
+ }
+ if (result.bytes_written > 0 || !result.need_more_output || input_len == 0) {
+ RETURN_NOT_OK(decompressed_->Resize(result.bytes_written));
+ break;
+ }
+ DCHECK_EQ(result.bytes_written, 0);
+ // Need to enlarge output buffer
+ decompress_size *= 2;
+ }
+ return Status::OK();
+ }
+
+ // Read a given number of bytes from the decompressed_ buffer.
+ int64_t ReadFromDecompressed(int64_t nbytes, uint8_t* out) {
+ int64_t readable = decompressed_ ? (decompressed_->size() - decompressed_pos_) : 0;
+ int64_t read_bytes = std::min(readable, nbytes);
+
+ if (read_bytes > 0) {
+ memcpy(out, decompressed_->data() + decompressed_pos_, read_bytes);
+ decompressed_pos_ += read_bytes;
+
+ if (decompressed_pos_ == decompressed_->size()) {
+ // Decompressed data is exhausted, release buffer
+ decompressed_.reset();
+ }
+ }
+
+ return read_bytes;
+ }
+
+ // Try to feed more data into the decompressed_ buffer.
+ Status RefillDecompressed(bool* has_data) {
+ // First try to read data from the decompressor
+ if (compressed_) {
+ if (decompressor_->IsFinished()) {
+ // We just went over the end of a previous compressed stream.
+ RETURN_NOT_OK(decompressor_->Reset());
+ fresh_decompressor_ = true;
+ }
+ RETURN_NOT_OK(DecompressData());
+ }
+ if (!decompressed_ || decompressed_->size() == 0) {
+ // Got nothing, need to read more compressed data
+ RETURN_NOT_OK(EnsureCompressedData());
+ if (compressed_pos_ == compressed_->size()) {
+ // No more data to decompress
if (!fresh_decompressor_ && !decompressor_->IsFinished()) {
- return Status::IOError("Truncated compressed stream");
- }
- *has_data = false;
- return Status::OK();
- }
- RETURN_NOT_OK(DecompressData());
- }
- *has_data = true;
- return Status::OK();
- }
-
- Result<int64_t> Read(int64_t nbytes, void* out) {
- auto out_data = reinterpret_cast<uint8_t*>(out);
-
- int64_t total_read = 0;
- bool decompressor_has_data = true;
-
- while (nbytes - total_read > 0 && decompressor_has_data) {
- total_read += ReadFromDecompressed(nbytes - total_read, out_data + total_read);
-
- if (nbytes == total_read) {
- break;
- }
-
- // At this point, no more decompressed data remains, so we need to
- // decompress more
- RETURN_NOT_OK(RefillDecompressed(&decompressor_has_data));
- }
-
- total_pos_ += total_read;
- return total_read;
- }
-
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
- ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes, pool_));
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buf->mutable_data()));
- RETURN_NOT_OK(buf->Resize(bytes_read));
- return std::move(buf);
- }
-
- std::shared_ptr<InputStream> raw() const { return raw_; }
-
- private:
- // Read 64 KB compressed data at a time
- static const int64_t kChunkSize = 64 * 1024;
- // Decompress 1 MB at a time
- static const int64_t kDecompressSize = 1024 * 1024;
-
- MemoryPool* pool_;
- std::shared_ptr<InputStream> raw_;
- bool is_open_;
- std::shared_ptr<Decompressor> decompressor_;
- std::shared_ptr<Buffer> compressed_;
- // Position in compressed buffer
- int64_t compressed_pos_;
- std::shared_ptr<ResizableBuffer> decompressed_;
- // Position in decompressed buffer
- int64_t decompressed_pos_;
- // True if the decompressor hasn't read any data yet.
- bool fresh_decompressor_;
- // Total number of bytes decompressed
- int64_t total_pos_;
-};
-
-Result<std::shared_ptr<CompressedInputStream>> CompressedInputStream::Make(
- Codec* codec, const std::shared_ptr<InputStream>& raw, MemoryPool* pool) {
- // CAUTION: codec is not owned
- std::shared_ptr<CompressedInputStream> res(new CompressedInputStream);
- res->impl_.reset(new Impl(pool, std::move(raw)));
- RETURN_NOT_OK(res->impl_->Init(codec));
- return res;
- return Status::OK();
-}
-
-CompressedInputStream::~CompressedInputStream() { internal::CloseFromDestructor(this); }
-
-Status CompressedInputStream::DoClose() { return impl_->Close(); }
-
-Status CompressedInputStream::DoAbort() { return impl_->Abort(); }
-
-bool CompressedInputStream::closed() const { return impl_->closed(); }
-
-Result<int64_t> CompressedInputStream::DoTell() const { return impl_->Tell(); }
-
-Result<int64_t> CompressedInputStream::DoRead(int64_t nbytes, void* out) {
- return impl_->Read(nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> CompressedInputStream::DoRead(int64_t nbytes) {
- return impl_->Read(nbytes);
-}
-
-std::shared_ptr<InputStream> CompressedInputStream::raw() const { return impl_->raw(); }
-
+ return Status::IOError("Truncated compressed stream");
+ }
+ *has_data = false;
+ return Status::OK();
+ }
+ RETURN_NOT_OK(DecompressData());
+ }
+ *has_data = true;
+ return Status::OK();
+ }
+
+ Result<int64_t> Read(int64_t nbytes, void* out) {
+ auto out_data = reinterpret_cast<uint8_t*>(out);
+
+ int64_t total_read = 0;
+ bool decompressor_has_data = true;
+
+ while (nbytes - total_read > 0 && decompressor_has_data) {
+ total_read += ReadFromDecompressed(nbytes - total_read, out_data + total_read);
+
+ if (nbytes == total_read) {
+ break;
+ }
+
+ // At this point, no more decompressed data remains, so we need to
+ // decompress more
+ RETURN_NOT_OK(RefillDecompressed(&decompressor_has_data));
+ }
+
+ total_pos_ += total_read;
+ return total_read;
+ }
+
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
+ ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes, pool_));
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buf->mutable_data()));
+ RETURN_NOT_OK(buf->Resize(bytes_read));
+ return std::move(buf);
+ }
+
+ std::shared_ptr<InputStream> raw() const { return raw_; }
+
+ private:
+ // Read 64 KB compressed data at a time
+ static const int64_t kChunkSize = 64 * 1024;
+ // Decompress 1 MB at a time
+ static const int64_t kDecompressSize = 1024 * 1024;
+
+ MemoryPool* pool_;
+ std::shared_ptr<InputStream> raw_;
+ bool is_open_;
+ std::shared_ptr<Decompressor> decompressor_;
+ std::shared_ptr<Buffer> compressed_;
+ // Position in compressed buffer
+ int64_t compressed_pos_;
+ std::shared_ptr<ResizableBuffer> decompressed_;
+ // Position in decompressed buffer
+ int64_t decompressed_pos_;
+ // True if the decompressor hasn't read any data yet.
+ bool fresh_decompressor_;
+ // Total number of bytes decompressed
+ int64_t total_pos_;
+};
+
+Result<std::shared_ptr<CompressedInputStream>> CompressedInputStream::Make(
+ Codec* codec, const std::shared_ptr<InputStream>& raw, MemoryPool* pool) {
+ // CAUTION: codec is not owned
+ std::shared_ptr<CompressedInputStream> res(new CompressedInputStream);
+ res->impl_.reset(new Impl(pool, std::move(raw)));
+ RETURN_NOT_OK(res->impl_->Init(codec));
+ return res;
+ return Status::OK();
+}
+
+CompressedInputStream::~CompressedInputStream() { internal::CloseFromDestructor(this); }
+
+Status CompressedInputStream::DoClose() { return impl_->Close(); }
+
+Status CompressedInputStream::DoAbort() { return impl_->Abort(); }
+
+bool CompressedInputStream::closed() const { return impl_->closed(); }
+
+Result<int64_t> CompressedInputStream::DoTell() const { return impl_->Tell(); }
+
+Result<int64_t> CompressedInputStream::DoRead(int64_t nbytes, void* out) {
+ return impl_->Read(nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> CompressedInputStream::DoRead(int64_t nbytes) {
+ return impl_->Read(nbytes);
+}
+
+std::shared_ptr<InputStream> CompressedInputStream::raw() const { return impl_->raw(); }
+
Result<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadata() {
return impl_->raw()->ReadMetadata();
}
@@ -446,5 +446,5 @@ Future<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetad
return impl_->raw()->ReadMetadataAsync(io_context);
}
-} // namespace io
-} // namespace arrow
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
index cd1a7f673ce..3b7a8e30596 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
@@ -1,118 +1,118 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Compressed stream implementations
-
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include "arrow/io/concurrency.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class MemoryPool;
-class Status;
-
-namespace util {
-
-class Codec;
-
-} // namespace util
-
-namespace io {
-
-class ARROW_EXPORT CompressedOutputStream : public OutputStream {
- public:
- ~CompressedOutputStream() override;
-
- /// \brief Create a compressed output stream wrapping the given output stream.
- static Result<std::shared_ptr<CompressedOutputStream>> Make(
- util::Codec* codec, const std::shared_ptr<OutputStream>& raw,
- MemoryPool* pool = default_memory_pool());
-
- // OutputStream interface
-
- /// \brief Close the compressed output stream. This implicitly closes the
- /// underlying raw output stream.
- Status Close() override;
- Status Abort() override;
- bool closed() const override;
-
- Result<int64_t> Tell() const override;
-
- Status Write(const void* data, int64_t nbytes) override;
- /// \cond FALSE
- using Writable::Write;
- /// \endcond
- Status Flush() override;
-
- /// \brief Return the underlying raw output stream.
- std::shared_ptr<OutputStream> raw() const;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedOutputStream);
-
- CompressedOutputStream() = default;
-
- class ARROW_NO_EXPORT Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-class ARROW_EXPORT CompressedInputStream
- : public internal::InputStreamConcurrencyWrapper<CompressedInputStream> {
- public:
- ~CompressedInputStream() override;
-
- /// \brief Create a compressed input stream wrapping the given input stream.
- static Result<std::shared_ptr<CompressedInputStream>> Make(
- util::Codec* codec, const std::shared_ptr<InputStream>& raw,
- MemoryPool* pool = default_memory_pool());
-
- // InputStream interface
-
- bool closed() const override;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Compressed stream implementations
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/io/concurrency.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+class Status;
+
+namespace util {
+
+class Codec;
+
+} // namespace util
+
+namespace io {
+
+class ARROW_EXPORT CompressedOutputStream : public OutputStream {
+ public:
+ ~CompressedOutputStream() override;
+
+ /// \brief Create a compressed output stream wrapping the given output stream.
+ static Result<std::shared_ptr<CompressedOutputStream>> Make(
+ util::Codec* codec, const std::shared_ptr<OutputStream>& raw,
+ MemoryPool* pool = default_memory_pool());
+
+ // OutputStream interface
+
+ /// \brief Close the compressed output stream. This implicitly closes the
+ /// underlying raw output stream.
+ Status Close() override;
+ Status Abort() override;
+ bool closed() const override;
+
+ Result<int64_t> Tell() const override;
+
+ Status Write(const void* data, int64_t nbytes) override;
+ /// \cond FALSE
+ using Writable::Write;
+ /// \endcond
+ Status Flush() override;
+
+ /// \brief Return the underlying raw output stream.
+ std::shared_ptr<OutputStream> raw() const;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedOutputStream);
+
+ CompressedOutputStream() = default;
+
+ class ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+class ARROW_EXPORT CompressedInputStream
+ : public internal::InputStreamConcurrencyWrapper<CompressedInputStream> {
+ public:
+ ~CompressedInputStream() override;
+
+ /// \brief Create a compressed input stream wrapping the given input stream.
+ static Result<std::shared_ptr<CompressedInputStream>> Make(
+ util::Codec* codec, const std::shared_ptr<InputStream>& raw,
+ MemoryPool* pool = default_memory_pool());
+
+ // InputStream interface
+
+ bool closed() const override;
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
-
- /// \brief Return the underlying raw input stream.
- std::shared_ptr<InputStream> raw() const;
-
- private:
- friend InputStreamConcurrencyWrapper<CompressedInputStream>;
- ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedInputStream);
-
- CompressedInputStream() = default;
-
- /// \brief Close the compressed input stream. This implicitly closes the
- /// underlying raw input stream.
- Status DoClose();
- Status DoAbort() override;
- Result<int64_t> DoTell() const;
- Result<int64_t> DoRead(int64_t nbytes, void* out);
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
-
- class ARROW_NO_EXPORT Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-} // namespace io
-} // namespace arrow
+
+ /// \brief Return the underlying raw input stream.
+ std::shared_ptr<InputStream> raw() const;
+
+ private:
+ friend InputStreamConcurrencyWrapper<CompressedInputStream>;
+ ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedInputStream);
+
+ CompressedInputStream() = default;
+
+ /// \brief Close the compressed input stream. This implicitly closes the
+ /// underlying raw input stream.
+ Status DoClose();
+ Status DoAbort() override;
+ Result<int64_t> DoTell() const;
+ Result<int64_t> DoRead(int64_t nbytes, void* out);
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+
+ class ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/concurrency.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/concurrency.h
index b41ad2c1350..f501991f924 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/concurrency.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/concurrency.h
@@ -1,263 +1,263 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/io/interfaces.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace io {
-namespace internal {
-
-template <class LockType>
-class SharedLockGuard {
- public:
- explicit SharedLockGuard(LockType* lock) : lock_(lock) { lock_->LockShared(); }
-
- ~SharedLockGuard() { lock_->UnlockShared(); }
-
- protected:
- LockType* lock_;
-};
-
-template <class LockType>
-class ExclusiveLockGuard {
- public:
- explicit ExclusiveLockGuard(LockType* lock) : lock_(lock) { lock_->LockExclusive(); }
-
- ~ExclusiveLockGuard() { lock_->UnlockExclusive(); }
-
- protected:
- LockType* lock_;
-};
-
-// Debug concurrency checker that marks "shared" and "exclusive" code sections,
-// aborting if the concurrency rules get violated. Does nothing in release mode.
-// Note that we intentionally use the same class declaration in debug and
-// release builds in order to avoid runtime failures when e.g. loading a
-// release-built DLL with a debug-built application, or the reverse.
-
-class ARROW_EXPORT SharedExclusiveChecker {
- public:
- SharedExclusiveChecker();
- void LockShared();
- void UnlockShared();
- void LockExclusive();
- void UnlockExclusive();
-
- SharedLockGuard<SharedExclusiveChecker> shared_guard() {
- return SharedLockGuard<SharedExclusiveChecker>(this);
- }
-
- ExclusiveLockGuard<SharedExclusiveChecker> exclusive_guard() {
- return ExclusiveLockGuard<SharedExclusiveChecker>(this);
- }
-
- protected:
- struct Impl;
- std::shared_ptr<Impl> impl_;
-};
-
-// Concurrency wrappers for IO classes that check the correctness of
-// concurrent calls to various methods. It is not necessary to wrap all
-// IO classes with these, only a few core classes that get used in tests.
-//
-// We're not using virtual inheritance here as virtual bases have poorly
-// understood semantic overhead which we'd be passing on to implementers
-// and users of these interfaces. Instead, we just duplicate the method
-// wrappers between those two classes.
-
-template <class Derived>
-class ARROW_EXPORT InputStreamConcurrencyWrapper : public InputStream {
- public:
- Status Close() final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoClose();
- }
-
- Status Abort() final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoAbort();
- }
-
- Result<int64_t> Tell() const final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoTell();
- }
-
- Result<int64_t> Read(int64_t nbytes, void* out) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoRead(nbytes, out);
- }
-
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoRead(nbytes);
- }
-
- Result<util::string_view> Peek(int64_t nbytes) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoPeek(nbytes);
- }
-
- /*
- Methods to implement in derived class:
-
- Status DoClose();
- Result<int64_t> DoTell() const;
- Result<int64_t> DoRead(int64_t nbytes, void* out);
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
-
- And optionally:
-
- Status DoAbort() override;
- Result<util::string_view> DoPeek(int64_t nbytes) override;
-
- These methods should be protected in the derived class and
- InputStreamConcurrencyWrapper declared as a friend with
-
- friend InputStreamConcurrencyWrapper<derived>;
- */
-
- protected:
- // Default implementations. They are virtual because the derived class may
- // have derived classes itself.
- virtual Status DoAbort() { return derived()->DoClose(); }
-
- virtual Result<util::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
- return Status::NotImplemented("Peek not implemented");
- }
-
- Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
-
- const Derived* derived() const {
- return ::arrow::internal::checked_cast<const Derived*>(this);
- }
-
- mutable SharedExclusiveChecker lock_;
-};
-
-template <class Derived>
-class ARROW_EXPORT RandomAccessFileConcurrencyWrapper : public RandomAccessFile {
- public:
- Status Close() final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoClose();
- }
-
- Status Abort() final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoAbort();
- }
-
- Result<int64_t> Tell() const final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoTell();
- }
-
- Result<int64_t> Read(int64_t nbytes, void* out) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoRead(nbytes, out);
- }
-
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoRead(nbytes);
- }
-
- Result<util::string_view> Peek(int64_t nbytes) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoPeek(nbytes);
- }
-
- Status Seek(int64_t position) final {
- auto guard = lock_.exclusive_guard();
- return derived()->DoSeek(position);
- }
-
- Result<int64_t> GetSize() final {
- auto guard = lock_.shared_guard();
- return derived()->DoGetSize();
- }
-
- // NOTE: ReadAt doesn't use stream pointer, but it is allowed to update it
- // (it's the case on Windows when using ReadFileEx).
- // So any method that relies on the current position (even if it doesn't
- // update it, such as Peek) cannot run in parallel with ReadAt and has
- // to use the exclusive_guard.
-
- Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) final {
- auto guard = lock_.shared_guard();
- return derived()->DoReadAt(position, nbytes, out);
- }
-
- Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) final {
- auto guard = lock_.shared_guard();
- return derived()->DoReadAt(position, nbytes);
- }
-
- /*
- Methods to implement in derived class:
-
- Status DoClose();
- Result<int64_t> DoTell() const;
- Result<int64_t> DoRead(int64_t nbytes, void* out);
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
- Status DoSeek(int64_t position);
- Result<int64_t> DoGetSize()
- Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
- Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
-
- And optionally:
-
- Status DoAbort() override;
- Result<util::string_view> DoPeek(int64_t nbytes) override;
-
- These methods should be protected in the derived class and
- RandomAccessFileConcurrencyWrapper declared as a friend with
-
- friend RandomAccessFileConcurrencyWrapper<derived>;
- */
-
- protected:
- // Default implementations. They are virtual because the derived class may
- // have derived classes itself.
- virtual Status DoAbort() { return derived()->DoClose(); }
-
- virtual Result<util::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
- return Status::NotImplemented("Peek not implemented");
- }
-
- Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
-
- const Derived* derived() const {
- return ::arrow::internal::checked_cast<const Derived*>(this);
- }
-
- mutable SharedExclusiveChecker lock_;
-};
-
-} // namespace internal
-} // namespace io
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+namespace internal {
+
+template <class LockType>
+class SharedLockGuard {
+ public:
+ explicit SharedLockGuard(LockType* lock) : lock_(lock) { lock_->LockShared(); }
+
+ ~SharedLockGuard() { lock_->UnlockShared(); }
+
+ protected:
+ LockType* lock_;
+};
+
+template <class LockType>
+class ExclusiveLockGuard {
+ public:
+ explicit ExclusiveLockGuard(LockType* lock) : lock_(lock) { lock_->LockExclusive(); }
+
+ ~ExclusiveLockGuard() { lock_->UnlockExclusive(); }
+
+ protected:
+ LockType* lock_;
+};
+
+// Debug concurrency checker that marks "shared" and "exclusive" code sections,
+// aborting if the concurrency rules get violated. Does nothing in release mode.
+// Note that we intentionally use the same class declaration in debug and
+// release builds in order to avoid runtime failures when e.g. loading a
+// release-built DLL with a debug-built application, or the reverse.
+
+class ARROW_EXPORT SharedExclusiveChecker {
+ public:
+ SharedExclusiveChecker();
+ void LockShared();
+ void UnlockShared();
+ void LockExclusive();
+ void UnlockExclusive();
+
+ SharedLockGuard<SharedExclusiveChecker> shared_guard() {
+ return SharedLockGuard<SharedExclusiveChecker>(this);
+ }
+
+ ExclusiveLockGuard<SharedExclusiveChecker> exclusive_guard() {
+ return ExclusiveLockGuard<SharedExclusiveChecker>(this);
+ }
+
+ protected:
+ struct Impl;
+ std::shared_ptr<Impl> impl_;
+};
+
+// Concurrency wrappers for IO classes that check the correctness of
+// concurrent calls to various methods. It is not necessary to wrap all
+// IO classes with these, only a few core classes that get used in tests.
+//
+// We're not using virtual inheritance here as virtual bases have poorly
+// understood semantic overhead which we'd be passing on to implementers
+// and users of these interfaces. Instead, we just duplicate the method
+// wrappers between those two classes.
+
+template <class Derived>
+class ARROW_EXPORT InputStreamConcurrencyWrapper : public InputStream {
+ public:
+ Status Close() final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoClose();
+ }
+
+ Status Abort() final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoAbort();
+ }
+
+ Result<int64_t> Tell() const final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoTell();
+ }
+
+ Result<int64_t> Read(int64_t nbytes, void* out) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoRead(nbytes, out);
+ }
+
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoRead(nbytes);
+ }
+
+ Result<util::string_view> Peek(int64_t nbytes) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoPeek(nbytes);
+ }
+
+ /*
+ Methods to implement in derived class:
+
+ Status DoClose();
+ Result<int64_t> DoTell() const;
+ Result<int64_t> DoRead(int64_t nbytes, void* out);
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+
+ And optionally:
+
+ Status DoAbort() override;
+ Result<util::string_view> DoPeek(int64_t nbytes) override;
+
+ These methods should be protected in the derived class and
+ InputStreamConcurrencyWrapper declared as a friend with
+
+ friend InputStreamConcurrencyWrapper<derived>;
+ */
+
+ protected:
+ // Default implementations. They are virtual because the derived class may
+ // have derived classes itself.
+ virtual Status DoAbort() { return derived()->DoClose(); }
+
+ virtual Result<util::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
+ return Status::NotImplemented("Peek not implemented");
+ }
+
+ Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
+
+ const Derived* derived() const {
+ return ::arrow::internal::checked_cast<const Derived*>(this);
+ }
+
+ mutable SharedExclusiveChecker lock_;
+};
+
+template <class Derived>
+class ARROW_EXPORT RandomAccessFileConcurrencyWrapper : public RandomAccessFile {
+ public:
+ Status Close() final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoClose();
+ }
+
+ Status Abort() final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoAbort();
+ }
+
+ Result<int64_t> Tell() const final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoTell();
+ }
+
+ Result<int64_t> Read(int64_t nbytes, void* out) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoRead(nbytes, out);
+ }
+
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoRead(nbytes);
+ }
+
+ Result<util::string_view> Peek(int64_t nbytes) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoPeek(nbytes);
+ }
+
+ Status Seek(int64_t position) final {
+ auto guard = lock_.exclusive_guard();
+ return derived()->DoSeek(position);
+ }
+
+ Result<int64_t> GetSize() final {
+ auto guard = lock_.shared_guard();
+ return derived()->DoGetSize();
+ }
+
+ // NOTE: ReadAt doesn't use stream pointer, but it is allowed to update it
+ // (it's the case on Windows when using ReadFileEx).
+ // So any method that relies on the current position (even if it doesn't
+ // update it, such as Peek) cannot run in parallel with ReadAt and has
+ // to use the exclusive_guard.
+
+ Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) final {
+ auto guard = lock_.shared_guard();
+ return derived()->DoReadAt(position, nbytes, out);
+ }
+
+ Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) final {
+ auto guard = lock_.shared_guard();
+ return derived()->DoReadAt(position, nbytes);
+ }
+
+ /*
+ Methods to implement in derived class:
+
+ Status DoClose();
+ Result<int64_t> DoTell() const;
+ Result<int64_t> DoRead(int64_t nbytes, void* out);
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+ Status DoSeek(int64_t position);
+ Result<int64_t> DoGetSize()
+ Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
+ Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
+
+ And optionally:
+
+ Status DoAbort() override;
+ Result<util::string_view> DoPeek(int64_t nbytes) override;
+
+ These methods should be protected in the derived class and
+ RandomAccessFileConcurrencyWrapper declared as a friend with
+
+ friend RandomAccessFileConcurrencyWrapper<derived>;
+ */
+
+ protected:
+ // Default implementations. They are virtual because the derived class may
+ // have derived classes itself.
+ virtual Status DoAbort() { return derived()->DoClose(); }
+
+ virtual Result<util::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
+ return Status::NotImplemented("Peek not implemented");
+ }
+
+ Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
+
+ const Derived* derived() const {
+ return ::arrow::internal::checked_cast<const Derived*>(this);
+ }
+
+ mutable SharedExclusiveChecker lock_;
+};
+
+} // namespace internal
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
index 70e15335af2..6eb049f235c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
@@ -1,772 +1,772 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
-
-// sys/mman.h not present in Visual Studio or Cygwin
-#ifdef _WIN32
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
-#else
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h> // IWYU pragma: keep
-#endif
-
-#include <algorithm>
-#include <atomic>
-#include <cerrno>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <string>
-#include <utility>
-
-// ----------------------------------------------------------------------
-// Other Arrow includes
-
-#include "arrow/io/file.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/io/util_internal.h"
-
-#include "arrow/buffer.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/util/future.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using internal::IOErrorFromErrno;
-
-namespace io {
-
-class OSFile {
- public:
- OSFile() : fd_(-1), is_open_(false), size_(-1), need_seeking_(false) {}
-
- ~OSFile() {}
-
- // Note: only one of the Open* methods below may be called on a given instance
-
- Status OpenWritable(const std::string& path, bool truncate, bool append,
- bool write_only) {
- RETURN_NOT_OK(SetFileName(path));
-
- ARROW_ASSIGN_OR_RAISE(fd_, ::arrow::internal::FileOpenWritable(file_name_, write_only,
- truncate, append));
- is_open_ = true;
- mode_ = write_only ? FileMode::WRITE : FileMode::READWRITE;
-
- if (!truncate) {
- ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd_));
- } else {
- size_ = 0;
- }
- return Status::OK();
- }
-
- // This is different from OpenWritable(string, ...) in that it doesn't
- // truncate nor mandate a seekable file
- Status OpenWritable(int fd) {
- auto result = ::arrow::internal::FileGetSize(fd);
- if (result.ok()) {
- size_ = *result;
- } else {
- // Non-seekable file
- size_ = -1;
- }
- RETURN_NOT_OK(SetFileName(fd));
- is_open_ = true;
- mode_ = FileMode::WRITE;
- fd_ = fd;
- return Status::OK();
- }
-
- Status OpenReadable(const std::string& path) {
- RETURN_NOT_OK(SetFileName(path));
-
- ARROW_ASSIGN_OR_RAISE(fd_, ::arrow::internal::FileOpenReadable(file_name_));
- ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd_));
-
- is_open_ = true;
- mode_ = FileMode::READ;
- return Status::OK();
- }
-
- Status OpenReadable(int fd) {
- ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd));
- RETURN_NOT_OK(SetFileName(fd));
- is_open_ = true;
- mode_ = FileMode::READ;
- fd_ = fd;
- return Status::OK();
- }
-
- Status CheckClosed() const {
- if (!is_open_) {
- return Status::Invalid("Invalid operation on closed file");
- }
- return Status::OK();
- }
-
- Status Close() {
- if (is_open_) {
- // Even if closing fails, the fd will likely be closed (perhaps it's
- // already closed).
- is_open_ = false;
- int fd = fd_;
- fd_ = -1;
- RETURN_NOT_OK(::arrow::internal::FileClose(fd));
- }
- return Status::OK();
- }
-
- Result<int64_t> Read(int64_t nbytes, void* out) {
- RETURN_NOT_OK(CheckClosed());
- RETURN_NOT_OK(CheckPositioned());
- return ::arrow::internal::FileRead(fd_, reinterpret_cast<uint8_t*>(out), nbytes);
- }
-
- Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) {
- RETURN_NOT_OK(CheckClosed());
- RETURN_NOT_OK(internal::ValidateRange(position, nbytes));
- // ReadAt() leaves the file position undefined, so require that we seek
- // before calling Read() or Write().
- need_seeking_.store(true);
- return ::arrow::internal::FileReadAt(fd_, reinterpret_cast<uint8_t*>(out), position,
- nbytes);
- }
-
- Status Seek(int64_t pos) {
- RETURN_NOT_OK(CheckClosed());
- if (pos < 0) {
- return Status::Invalid("Invalid position");
- }
- Status st = ::arrow::internal::FileSeek(fd_, pos);
- if (st.ok()) {
- need_seeking_.store(false);
- }
- return st;
- }
-
- Result<int64_t> Tell() const {
- RETURN_NOT_OK(CheckClosed());
- return ::arrow::internal::FileTell(fd_);
- }
-
- Status Write(const void* data, int64_t length) {
- RETURN_NOT_OK(CheckClosed());
-
- std::lock_guard<std::mutex> guard(lock_);
- RETURN_NOT_OK(CheckPositioned());
- if (length < 0) {
- return Status::IOError("Length must be non-negative");
- }
- return ::arrow::internal::FileWrite(fd_, reinterpret_cast<const uint8_t*>(data),
- length);
- }
-
- int fd() const { return fd_; }
-
- bool is_open() const { return is_open_; }
-
- int64_t size() const { return size_; }
-
- FileMode::type mode() const { return mode_; }
-
- std::mutex& lock() { return lock_; }
-
- protected:
- Status SetFileName(const std::string& file_name) {
- return ::arrow::internal::PlatformFilename::FromString(file_name).Value(&file_name_);
- }
-
- Status SetFileName(int fd) {
- std::stringstream ss;
- ss << "<fd " << fd << ">";
- return SetFileName(ss.str());
- }
-
- Status CheckPositioned() {
- if (need_seeking_.load()) {
- return Status::Invalid(
- "Need seeking after ReadAt() before "
- "calling implicitly-positioned operation");
- }
- return Status::OK();
- }
-
- ::arrow::internal::PlatformFilename file_name_;
-
- std::mutex lock_;
-
- // File descriptor
- int fd_;
-
- FileMode::type mode_;
-
- bool is_open_;
- int64_t size_;
- // Whether ReadAt made the file position non-deterministic.
- std::atomic<bool> need_seeking_;
-};
-
-// ----------------------------------------------------------------------
-// ReadableFile implementation
-
-class ReadableFile::ReadableFileImpl : public OSFile {
- public:
- explicit ReadableFileImpl(MemoryPool* pool) : OSFile(), pool_(pool) {}
-
- Status Open(const std::string& path) { return OpenReadable(path); }
- Status Open(int fd) { return OpenReadable(fd); }
-
- Result<std::shared_ptr<Buffer>> ReadBuffer(int64_t nbytes) {
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
-
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
- if (bytes_read < nbytes) {
- RETURN_NOT_OK(buffer->Resize(bytes_read));
- buffer->ZeroPadding();
- }
- return std::move(buffer);
- }
-
- Result<std::shared_ptr<Buffer>> ReadBufferAt(int64_t position, int64_t nbytes) {
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
-
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
- ReadAt(position, nbytes, buffer->mutable_data()));
- if (bytes_read < nbytes) {
- RETURN_NOT_OK(buffer->Resize(bytes_read));
- buffer->ZeroPadding();
- }
- return std::move(buffer);
- }
-
- Status WillNeed(const std::vector<ReadRange>& ranges) {
- RETURN_NOT_OK(CheckClosed());
- for (const auto& range : ranges) {
- RETURN_NOT_OK(internal::ValidateRange(range.offset, range.length));
-#if defined(POSIX_FADV_WILLNEED)
- if (posix_fadvise(fd_, range.offset, range.length, POSIX_FADV_WILLNEED)) {
- return IOErrorFromErrno(errno, "posix_fadvise failed");
- }
-#elif defined(F_RDADVISE) // macOS, BSD?
- struct {
- off_t ra_offset;
- int ra_count;
- } radvisory{range.offset, static_cast<int>(range.length)};
- if (radvisory.ra_count > 0 && fcntl(fd_, F_RDADVISE, &radvisory) == -1) {
- return IOErrorFromErrno(errno, "fcntl(fd, F_RDADVISE, ...) failed");
- }
-#endif
- }
- return Status::OK();
- }
-
- private:
- MemoryPool* pool_;
-};
-
-ReadableFile::ReadableFile(MemoryPool* pool) { impl_.reset(new ReadableFileImpl(pool)); }
-
-ReadableFile::~ReadableFile() { internal::CloseFromDestructor(this); }
-
-Result<std::shared_ptr<ReadableFile>> ReadableFile::Open(const std::string& path,
- MemoryPool* pool) {
- auto file = std::shared_ptr<ReadableFile>(new ReadableFile(pool));
- RETURN_NOT_OK(file->impl_->Open(path));
- return file;
-}
-
-Result<std::shared_ptr<ReadableFile>> ReadableFile::Open(int fd, MemoryPool* pool) {
- auto file = std::shared_ptr<ReadableFile>(new ReadableFile(pool));
- RETURN_NOT_OK(file->impl_->Open(fd));
- return file;
-}
-
-Status ReadableFile::DoClose() { return impl_->Close(); }
-
-bool ReadableFile::closed() const { return !impl_->is_open(); }
-
-Status ReadableFile::WillNeed(const std::vector<ReadRange>& ranges) {
- return impl_->WillNeed(ranges);
-}
-
-Result<int64_t> ReadableFile::DoTell() const { return impl_->Tell(); }
-
-Result<int64_t> ReadableFile::DoRead(int64_t nbytes, void* out) {
- return impl_->Read(nbytes, out);
-}
-
-Result<int64_t> ReadableFile::DoReadAt(int64_t position, int64_t nbytes, void* out) {
- return impl_->ReadAt(position, nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> ReadableFile::DoReadAt(int64_t position, int64_t nbytes) {
- return impl_->ReadBufferAt(position, nbytes);
-}
-
-Result<std::shared_ptr<Buffer>> ReadableFile::DoRead(int64_t nbytes) {
- return impl_->ReadBuffer(nbytes);
-}
-
-Result<int64_t> ReadableFile::DoGetSize() { return impl_->size(); }
-
-Status ReadableFile::DoSeek(int64_t pos) { return impl_->Seek(pos); }
-
-int ReadableFile::file_descriptor() const { return impl_->fd(); }
-
-// ----------------------------------------------------------------------
-// FileOutputStream
-
-class FileOutputStream::FileOutputStreamImpl : public OSFile {
- public:
- Status Open(const std::string& path, bool append) {
- const bool truncate = !append;
- return OpenWritable(path, truncate, append, true /* write_only */);
- }
- Status Open(int fd) { return OpenWritable(fd); }
-};
-
-FileOutputStream::FileOutputStream() { impl_.reset(new FileOutputStreamImpl()); }
-
-FileOutputStream::~FileOutputStream() { internal::CloseFromDestructor(this); }
-
-Result<std::shared_ptr<FileOutputStream>> FileOutputStream::Open(const std::string& path,
- bool append) {
- auto stream = std::shared_ptr<FileOutputStream>(new FileOutputStream());
- RETURN_NOT_OK(stream->impl_->Open(path, append));
- return stream;
-}
-
-Result<std::shared_ptr<FileOutputStream>> FileOutputStream::Open(int fd) {
- auto stream = std::shared_ptr<FileOutputStream>(new FileOutputStream());
- RETURN_NOT_OK(stream->impl_->Open(fd));
- return stream;
-}
-
-Status FileOutputStream::Close() { return impl_->Close(); }
-
-bool FileOutputStream::closed() const { return !impl_->is_open(); }
-
-Result<int64_t> FileOutputStream::Tell() const { return impl_->Tell(); }
-
-Status FileOutputStream::Write(const void* data, int64_t length) {
- return impl_->Write(data, length);
-}
-
-int FileOutputStream::file_descriptor() const { return impl_->fd(); }
-
-// ----------------------------------------------------------------------
-// Implement MemoryMappedFile
-
-class MemoryMappedFile::MemoryMap
- : public std::enable_shared_from_this<MemoryMappedFile::MemoryMap> {
- public:
- // An object representing the entire memory-mapped region.
- // It can be sliced in order to return individual subregions, which
- // will then keep the original region alive as long as necessary.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
+
+// sys/mman.h not present in Visual Studio or Cygwin
+#ifdef _WIN32
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include "arrow/io/mman.h"
+#undef Realloc
+#undef Free
+#else
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h> // IWYU pragma: keep
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <cerrno>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+
+// ----------------------------------------------------------------------
+// Other Arrow includes
+
+#include "arrow/io/file.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/util_internal.h"
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/future.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::IOErrorFromErrno;
+
+namespace io {
+
+class OSFile {
+ public:
+ OSFile() : fd_(-1), is_open_(false), size_(-1), need_seeking_(false) {}
+
+ ~OSFile() {}
+
+ // Note: only one of the Open* methods below may be called on a given instance
+
+ Status OpenWritable(const std::string& path, bool truncate, bool append,
+ bool write_only) {
+ RETURN_NOT_OK(SetFileName(path));
+
+ ARROW_ASSIGN_OR_RAISE(fd_, ::arrow::internal::FileOpenWritable(file_name_, write_only,
+ truncate, append));
+ is_open_ = true;
+ mode_ = write_only ? FileMode::WRITE : FileMode::READWRITE;
+
+ if (!truncate) {
+ ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd_));
+ } else {
+ size_ = 0;
+ }
+ return Status::OK();
+ }
+
+ // This is different from OpenWritable(string, ...) in that it doesn't
+ // truncate nor mandate a seekable file
+ Status OpenWritable(int fd) {
+ auto result = ::arrow::internal::FileGetSize(fd);
+ if (result.ok()) {
+ size_ = *result;
+ } else {
+ // Non-seekable file
+ size_ = -1;
+ }
+ RETURN_NOT_OK(SetFileName(fd));
+ is_open_ = true;
+ mode_ = FileMode::WRITE;
+ fd_ = fd;
+ return Status::OK();
+ }
+
+ Status OpenReadable(const std::string& path) {
+ RETURN_NOT_OK(SetFileName(path));
+
+ ARROW_ASSIGN_OR_RAISE(fd_, ::arrow::internal::FileOpenReadable(file_name_));
+ ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd_));
+
+ is_open_ = true;
+ mode_ = FileMode::READ;
+ return Status::OK();
+ }
+
+ Status OpenReadable(int fd) {
+ ARROW_ASSIGN_OR_RAISE(size_, ::arrow::internal::FileGetSize(fd));
+ RETURN_NOT_OK(SetFileName(fd));
+ is_open_ = true;
+ mode_ = FileMode::READ;
+ fd_ = fd;
+ return Status::OK();
+ }
+
+ Status CheckClosed() const {
+ if (!is_open_) {
+ return Status::Invalid("Invalid operation on closed file");
+ }
+ return Status::OK();
+ }
+
+ Status Close() {
+ if (is_open_) {
+ // Even if closing fails, the fd will likely be closed (perhaps it's
+ // already closed).
+ is_open_ = false;
+ int fd = fd_;
+ fd_ = -1;
+ RETURN_NOT_OK(::arrow::internal::FileClose(fd));
+ }
+ return Status::OK();
+ }
+
+ Result<int64_t> Read(int64_t nbytes, void* out) {
+ RETURN_NOT_OK(CheckClosed());
+ RETURN_NOT_OK(CheckPositioned());
+ return ::arrow::internal::FileRead(fd_, reinterpret_cast<uint8_t*>(out), nbytes);
+ }
+
+ Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) {
+ RETURN_NOT_OK(CheckClosed());
+ RETURN_NOT_OK(internal::ValidateRange(position, nbytes));
+ // ReadAt() leaves the file position undefined, so require that we seek
+ // before calling Read() or Write().
+ need_seeking_.store(true);
+ return ::arrow::internal::FileReadAt(fd_, reinterpret_cast<uint8_t*>(out), position,
+ nbytes);
+ }
+
+ Status Seek(int64_t pos) {
+ RETURN_NOT_OK(CheckClosed());
+ if (pos < 0) {
+ return Status::Invalid("Invalid position");
+ }
+ Status st = ::arrow::internal::FileSeek(fd_, pos);
+ if (st.ok()) {
+ need_seeking_.store(false);
+ }
+ return st;
+ }
+
+ Result<int64_t> Tell() const {
+ RETURN_NOT_OK(CheckClosed());
+ return ::arrow::internal::FileTell(fd_);
+ }
+
+ Status Write(const void* data, int64_t length) {
+ RETURN_NOT_OK(CheckClosed());
+
+ std::lock_guard<std::mutex> guard(lock_);
+ RETURN_NOT_OK(CheckPositioned());
+ if (length < 0) {
+ return Status::IOError("Length must be non-negative");
+ }
+ return ::arrow::internal::FileWrite(fd_, reinterpret_cast<const uint8_t*>(data),
+ length);
+ }
+
+ int fd() const { return fd_; }
+
+ bool is_open() const { return is_open_; }
+
+ int64_t size() const { return size_; }
+
+ FileMode::type mode() const { return mode_; }
+
+ std::mutex& lock() { return lock_; }
+
+ protected:
+ Status SetFileName(const std::string& file_name) {
+ return ::arrow::internal::PlatformFilename::FromString(file_name).Value(&file_name_);
+ }
+
+ Status SetFileName(int fd) {
+ std::stringstream ss;
+ ss << "<fd " << fd << ">";
+ return SetFileName(ss.str());
+ }
+
+ Status CheckPositioned() {
+ if (need_seeking_.load()) {
+ return Status::Invalid(
+ "Need seeking after ReadAt() before "
+ "calling implicitly-positioned operation");
+ }
+ return Status::OK();
+ }
+
+ ::arrow::internal::PlatformFilename file_name_;
+
+ std::mutex lock_;
+
+ // File descriptor
+ int fd_;
+
+ FileMode::type mode_;
+
+ bool is_open_;
+ int64_t size_;
+ // Whether ReadAt made the file position non-deterministic.
+ std::atomic<bool> need_seeking_;
+};
+
+// ----------------------------------------------------------------------
+// ReadableFile implementation
+
+class ReadableFile::ReadableFileImpl : public OSFile {
+ public:
+ explicit ReadableFileImpl(MemoryPool* pool) : OSFile(), pool_(pool) {}
+
+ Status Open(const std::string& path) { return OpenReadable(path); }
+ Status Open(int fd) { return OpenReadable(fd); }
+
+ Result<std::shared_ptr<Buffer>> ReadBuffer(int64_t nbytes) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
+ if (bytes_read < nbytes) {
+ RETURN_NOT_OK(buffer->Resize(bytes_read));
+ buffer->ZeroPadding();
+ }
+ return std::move(buffer);
+ }
+
+ Result<std::shared_ptr<Buffer>> ReadBufferAt(int64_t position, int64_t nbytes) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
+ ReadAt(position, nbytes, buffer->mutable_data()));
+ if (bytes_read < nbytes) {
+ RETURN_NOT_OK(buffer->Resize(bytes_read));
+ buffer->ZeroPadding();
+ }
+ return std::move(buffer);
+ }
+
+ Status WillNeed(const std::vector<ReadRange>& ranges) {
+ RETURN_NOT_OK(CheckClosed());
+ for (const auto& range : ranges) {
+ RETURN_NOT_OK(internal::ValidateRange(range.offset, range.length));
+#if defined(POSIX_FADV_WILLNEED)
+ if (posix_fadvise(fd_, range.offset, range.length, POSIX_FADV_WILLNEED)) {
+ return IOErrorFromErrno(errno, "posix_fadvise failed");
+ }
+#elif defined(F_RDADVISE) // macOS, BSD?
+ struct {
+ off_t ra_offset;
+ int ra_count;
+ } radvisory{range.offset, static_cast<int>(range.length)};
+ if (radvisory.ra_count > 0 && fcntl(fd_, F_RDADVISE, &radvisory) == -1) {
+ return IOErrorFromErrno(errno, "fcntl(fd, F_RDADVISE, ...) failed");
+ }
+#endif
+ }
+ return Status::OK();
+ }
+
+ private:
+ MemoryPool* pool_;
+};
+
+ReadableFile::ReadableFile(MemoryPool* pool) { impl_.reset(new ReadableFileImpl(pool)); }
+
+ReadableFile::~ReadableFile() { internal::CloseFromDestructor(this); }
+
+Result<std::shared_ptr<ReadableFile>> ReadableFile::Open(const std::string& path,
+ MemoryPool* pool) {
+ auto file = std::shared_ptr<ReadableFile>(new ReadableFile(pool));
+ RETURN_NOT_OK(file->impl_->Open(path));
+ return file;
+}
+
+Result<std::shared_ptr<ReadableFile>> ReadableFile::Open(int fd, MemoryPool* pool) {
+ auto file = std::shared_ptr<ReadableFile>(new ReadableFile(pool));
+ RETURN_NOT_OK(file->impl_->Open(fd));
+ return file;
+}
+
+Status ReadableFile::DoClose() { return impl_->Close(); }
+
+bool ReadableFile::closed() const { return !impl_->is_open(); }
+
+Status ReadableFile::WillNeed(const std::vector<ReadRange>& ranges) {
+ return impl_->WillNeed(ranges);
+}
+
+Result<int64_t> ReadableFile::DoTell() const { return impl_->Tell(); }
+
+Result<int64_t> ReadableFile::DoRead(int64_t nbytes, void* out) {
+ return impl_->Read(nbytes, out);
+}
+
+Result<int64_t> ReadableFile::DoReadAt(int64_t position, int64_t nbytes, void* out) {
+ return impl_->ReadAt(position, nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> ReadableFile::DoReadAt(int64_t position, int64_t nbytes) {
+ return impl_->ReadBufferAt(position, nbytes);
+}
+
+Result<std::shared_ptr<Buffer>> ReadableFile::DoRead(int64_t nbytes) {
+ return impl_->ReadBuffer(nbytes);
+}
+
+Result<int64_t> ReadableFile::DoGetSize() { return impl_->size(); }
+
+Status ReadableFile::DoSeek(int64_t pos) { return impl_->Seek(pos); }
+
+int ReadableFile::file_descriptor() const { return impl_->fd(); }
+
+// ----------------------------------------------------------------------
+// FileOutputStream
+
+class FileOutputStream::FileOutputStreamImpl : public OSFile {
+ public:
+ Status Open(const std::string& path, bool append) {
+ const bool truncate = !append;
+ return OpenWritable(path, truncate, append, true /* write_only */);
+ }
+ Status Open(int fd) { return OpenWritable(fd); }
+};
+
+FileOutputStream::FileOutputStream() { impl_.reset(new FileOutputStreamImpl()); }
+
+FileOutputStream::~FileOutputStream() { internal::CloseFromDestructor(this); }
+
+Result<std::shared_ptr<FileOutputStream>> FileOutputStream::Open(const std::string& path,
+ bool append) {
+ auto stream = std::shared_ptr<FileOutputStream>(new FileOutputStream());
+ RETURN_NOT_OK(stream->impl_->Open(path, append));
+ return stream;
+}
+
+Result<std::shared_ptr<FileOutputStream>> FileOutputStream::Open(int fd) {
+ auto stream = std::shared_ptr<FileOutputStream>(new FileOutputStream());
+ RETURN_NOT_OK(stream->impl_->Open(fd));
+ return stream;
+}
+
+Status FileOutputStream::Close() { return impl_->Close(); }
+
+bool FileOutputStream::closed() const { return !impl_->is_open(); }
+
+Result<int64_t> FileOutputStream::Tell() const { return impl_->Tell(); }
+
+Status FileOutputStream::Write(const void* data, int64_t length) {
+ return impl_->Write(data, length);
+}
+
+int FileOutputStream::file_descriptor() const { return impl_->fd(); }
+
+// ----------------------------------------------------------------------
+// Implement MemoryMappedFile
+
+class MemoryMappedFile::MemoryMap
+ : public std::enable_shared_from_this<MemoryMappedFile::MemoryMap> {
+ public:
+ // An object representing the entire memory-mapped region.
+ // It can be sliced in order to return individual subregions, which
+ // will then keep the original region alive as long as necessary.
class Region : public Buffer {
- public:
- Region(std::shared_ptr<MemoryMappedFile::MemoryMap> memory_map, uint8_t* data,
- int64_t size)
+ public:
+ Region(std::shared_ptr<MemoryMappedFile::MemoryMap> memory_map, uint8_t* data,
+ int64_t size)
: Buffer(data, size) {
- is_mutable_ = memory_map->writable();
- }
-
- ~Region() {
- if (data_ != nullptr) {
- int result = munmap(data(), static_cast<size_t>(size_));
- ARROW_CHECK_EQ(result, 0) << "munmap failed";
- }
- }
-
- // For convenience
- uint8_t* data() { return const_cast<uint8_t*>(data_); }
-
- void Detach() { data_ = nullptr; }
- };
-
- MemoryMap() : file_size_(0), map_len_(0) {}
-
- ~MemoryMap() { ARROW_CHECK_OK(Close()); }
-
- Status Close() {
- if (file_->is_open()) {
- // Lose our reference to the MemoryMappedRegion, so that munmap()
- // is called as soon as all buffer exports are released.
- region_.reset();
- return file_->Close();
- } else {
- return Status::OK();
- }
- }
-
- bool closed() const { return !file_->is_open(); }
-
- Status CheckClosed() const {
- if (closed()) {
- return Status::Invalid("Invalid operation on closed file");
- }
- return Status::OK();
- }
-
- Status Open(const std::string& path, FileMode::type mode, const int64_t offset = 0,
- const int64_t length = -1) {
- file_.reset(new OSFile());
-
- if (mode != FileMode::READ) {
- // Memory mapping has permission failures if PROT_READ not set
- prot_flags_ = PROT_READ | PROT_WRITE;
- map_mode_ = MAP_SHARED;
- constexpr bool append = false;
- constexpr bool truncate = false;
- constexpr bool write_only = false;
- RETURN_NOT_OK(file_->OpenWritable(path, truncate, append, write_only));
- } else {
- prot_flags_ = PROT_READ;
- map_mode_ = MAP_PRIVATE; // Changes are not to be committed back to the file
- RETURN_NOT_OK(file_->OpenReadable(path));
- }
- map_len_ = offset_ = 0;
-
- // Memory mapping fails when file size is 0
- // delay it until the first resize
- if (file_->size() > 0) {
- RETURN_NOT_OK(InitMMap(file_->size(), false, offset, length));
- }
-
- position_ = 0;
-
- return Status::OK();
- }
-
- // Resize the mmap and file to the specified size.
- // Resize on memory mapped file region is not supported.
- Status Resize(const int64_t new_size) {
- if (!writable()) {
- return Status::IOError("Cannot resize a readonly memory map");
- }
- if (map_len_ != file_size_) {
- return Status::IOError("Cannot resize a partial memory map");
- }
- if (region_.use_count() > 1) {
- // There are buffer exports currently, the MemoryMapRemap() call
- // would make the buffers invalid
- return Status::IOError("Cannot resize memory map while there are active readers");
- }
-
- if (new_size == 0) {
- if (map_len_ > 0) {
- // Just unmap the mmap and truncate the file to 0 size
- region_.reset();
- RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), 0));
- map_len_ = offset_ = file_size_ = 0;
- }
- position_ = 0;
- return Status::OK();
- }
-
- if (map_len_ > 0) {
- void* result;
- auto data = region_->data();
- RETURN_NOT_OK(::arrow::internal::MemoryMapRemap(data, map_len_, new_size,
- file_->fd(), &result));
- region_->Detach(); // avoid munmap() on destruction
- region_ = std::make_shared<Region>(shared_from_this(),
- static_cast<uint8_t*>(result), new_size);
- map_len_ = file_size_ = new_size;
- offset_ = 0;
- if (position_ > map_len_) {
- position_ = map_len_;
- }
- } else {
- DCHECK_EQ(position_, 0);
- // the mmap is not yet initialized, resize the underlying
- // file, since it might have been 0-sized
- RETURN_NOT_OK(InitMMap(new_size, /*resize_file*/ true));
- }
- return Status::OK();
- }
-
- Status Seek(int64_t position) {
- if (position < 0) {
- return Status::Invalid("position is out of bounds");
- }
- position_ = position;
- return Status::OK();
- }
-
- Result<std::shared_ptr<Buffer>> Slice(int64_t offset, int64_t length) {
- length = std::max<int64_t>(0, std::min(length, map_len_ - offset));
-
- if (length > 0) {
- DCHECK_NE(region_, nullptr);
- return SliceBuffer(region_, offset, length);
- } else {
- return std::make_shared<Buffer>(nullptr, 0);
- }
- }
-
- // map_len_ == file_size_ if memory mapping on the whole file
- int64_t size() const { return map_len_; }
-
- int64_t position() { return position_; }
-
- void advance(int64_t nbytes) { position_ = position_ + nbytes; }
-
+ is_mutable_ = memory_map->writable();
+ }
+
+ ~Region() {
+ if (data_ != nullptr) {
+ int result = munmap(data(), static_cast<size_t>(size_));
+ ARROW_CHECK_EQ(result, 0) << "munmap failed";
+ }
+ }
+
+ // For convenience
+ uint8_t* data() { return const_cast<uint8_t*>(data_); }
+
+ void Detach() { data_ = nullptr; }
+ };
+
+ MemoryMap() : file_size_(0), map_len_(0) {}
+
+ ~MemoryMap() { ARROW_CHECK_OK(Close()); }
+
+ Status Close() {
+ if (file_->is_open()) {
+ // Lose our reference to the MemoryMappedRegion, so that munmap()
+ // is called as soon as all buffer exports are released.
+ region_.reset();
+ return file_->Close();
+ } else {
+ return Status::OK();
+ }
+ }
+
+ bool closed() const { return !file_->is_open(); }
+
+ Status CheckClosed() const {
+ if (closed()) {
+ return Status::Invalid("Invalid operation on closed file");
+ }
+ return Status::OK();
+ }
+
+ Status Open(const std::string& path, FileMode::type mode, const int64_t offset = 0,
+ const int64_t length = -1) {
+ file_.reset(new OSFile());
+
+ if (mode != FileMode::READ) {
+ // Memory mapping has permission failures if PROT_READ not set
+ prot_flags_ = PROT_READ | PROT_WRITE;
+ map_mode_ = MAP_SHARED;
+ constexpr bool append = false;
+ constexpr bool truncate = false;
+ constexpr bool write_only = false;
+ RETURN_NOT_OK(file_->OpenWritable(path, truncate, append, write_only));
+ } else {
+ prot_flags_ = PROT_READ;
+ map_mode_ = MAP_PRIVATE; // Changes are not to be committed back to the file
+ RETURN_NOT_OK(file_->OpenReadable(path));
+ }
+ map_len_ = offset_ = 0;
+
+ // Memory mapping fails when file size is 0
+ // delay it until the first resize
+ if (file_->size() > 0) {
+ RETURN_NOT_OK(InitMMap(file_->size(), false, offset, length));
+ }
+
+ position_ = 0;
+
+ return Status::OK();
+ }
+
+ // Resize the mmap and file to the specified size.
+ // Resize on memory mapped file region is not supported.
+ Status Resize(const int64_t new_size) {
+ if (!writable()) {
+ return Status::IOError("Cannot resize a readonly memory map");
+ }
+ if (map_len_ != file_size_) {
+ return Status::IOError("Cannot resize a partial memory map");
+ }
+ if (region_.use_count() > 1) {
+ // There are buffer exports currently, the MemoryMapRemap() call
+ // would make the buffers invalid
+ return Status::IOError("Cannot resize memory map while there are active readers");
+ }
+
+ if (new_size == 0) {
+ if (map_len_ > 0) {
+ // Just unmap the mmap and truncate the file to 0 size
+ region_.reset();
+ RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), 0));
+ map_len_ = offset_ = file_size_ = 0;
+ }
+ position_ = 0;
+ return Status::OK();
+ }
+
+ if (map_len_ > 0) {
+ void* result;
+ auto data = region_->data();
+ RETURN_NOT_OK(::arrow::internal::MemoryMapRemap(data, map_len_, new_size,
+ file_->fd(), &result));
+ region_->Detach(); // avoid munmap() on destruction
+ region_ = std::make_shared<Region>(shared_from_this(),
+ static_cast<uint8_t*>(result), new_size);
+ map_len_ = file_size_ = new_size;
+ offset_ = 0;
+ if (position_ > map_len_) {
+ position_ = map_len_;
+ }
+ } else {
+ DCHECK_EQ(position_, 0);
+ // the mmap is not yet initialized, resize the underlying
+ // file, since it might have been 0-sized
+ RETURN_NOT_OK(InitMMap(new_size, /*resize_file*/ true));
+ }
+ return Status::OK();
+ }
+
+ Status Seek(int64_t position) {
+ if (position < 0) {
+ return Status::Invalid("position is out of bounds");
+ }
+ position_ = position;
+ return Status::OK();
+ }
+
+ Result<std::shared_ptr<Buffer>> Slice(int64_t offset, int64_t length) {
+ length = std::max<int64_t>(0, std::min(length, map_len_ - offset));
+
+ if (length > 0) {
+ DCHECK_NE(region_, nullptr);
+ return SliceBuffer(region_, offset, length);
+ } else {
+ return std::make_shared<Buffer>(nullptr, 0);
+ }
+ }
+
+ // map_len_ == file_size_ if memory mapping on the whole file
+ int64_t size() const { return map_len_; }
+
+ int64_t position() { return position_; }
+
+ void advance(int64_t nbytes) { position_ = position_ + nbytes; }
+
uint8_t* data() { return region_ ? region_->data() : nullptr; }
- uint8_t* head() { return data() + position_; }
-
- bool writable() { return file_->mode() != FileMode::READ; }
-
- bool opened() { return file_->is_open(); }
-
- int fd() const { return file_->fd(); }
-
- std::mutex& write_lock() { return file_->lock(); }
-
- std::mutex& resize_lock() { return resize_lock_; }
-
- private:
- // Initialize the mmap and set size, capacity and the data pointers
- Status InitMMap(int64_t initial_size, bool resize_file = false,
- const int64_t offset = 0, const int64_t length = -1) {
- DCHECK(!region_);
-
- if (resize_file) {
- RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), initial_size));
- }
-
- size_t mmap_length = static_cast<size_t>(initial_size);
- if (length > initial_size) {
- return Status::Invalid("mapping length is beyond file size");
- }
- if (length >= 0 && length < initial_size) {
- // memory mapping a file region
- mmap_length = static_cast<size_t>(length);
- }
-
- void* result = mmap(nullptr, mmap_length, prot_flags_, map_mode_, file_->fd(),
- static_cast<off_t>(offset));
- if (result == MAP_FAILED) {
- return Status::IOError("Memory mapping file failed: ",
- ::arrow::internal::ErrnoMessage(errno));
- }
- map_len_ = mmap_length;
- offset_ = offset;
- region_ = std::make_shared<Region>(shared_from_this(), static_cast<uint8_t*>(result),
- map_len_);
- file_size_ = initial_size;
-
- return Status::OK();
- }
-
- std::unique_ptr<OSFile> file_;
- int prot_flags_;
- int map_mode_;
-
- std::shared_ptr<Region> region_;
- int64_t file_size_;
- int64_t position_;
- int64_t offset_;
- int64_t map_len_;
- std::mutex resize_lock_;
-};
-
-MemoryMappedFile::MemoryMappedFile() {}
-
-MemoryMappedFile::~MemoryMappedFile() { internal::CloseFromDestructor(this); }
-
-Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Create(
- const std::string& path, int64_t size) {
- ARROW_ASSIGN_OR_RAISE(auto file, FileOutputStream::Open(path));
- RETURN_NOT_OK(::arrow::internal::FileTruncate(file->file_descriptor(), size));
- RETURN_NOT_OK(file->Close());
- return MemoryMappedFile::Open(path, FileMode::READWRITE);
-}
-
-Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Open(const std::string& path,
- FileMode::type mode) {
- std::shared_ptr<MemoryMappedFile> result(new MemoryMappedFile());
-
- result->memory_map_.reset(new MemoryMap());
- RETURN_NOT_OK(result->memory_map_->Open(path, mode));
- return result;
-}
-
-Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Open(const std::string& path,
- FileMode::type mode,
- const int64_t offset,
- const int64_t length) {
- std::shared_ptr<MemoryMappedFile> result(new MemoryMappedFile());
-
- result->memory_map_.reset(new MemoryMap());
- RETURN_NOT_OK(result->memory_map_->Open(path, mode, offset, length));
- return result;
-}
-
-Result<int64_t> MemoryMappedFile::GetSize() {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- return memory_map_->size();
-}
-
-Result<int64_t> MemoryMappedFile::Tell() const {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- return memory_map_->position();
-}
-
-Status MemoryMappedFile::Seek(int64_t position) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- return memory_map_->Seek(position);
-}
-
-Status MemoryMappedFile::Close() { return memory_map_->Close(); }
-
-bool MemoryMappedFile::closed() const { return memory_map_->closed(); }
-
-Result<std::shared_ptr<Buffer>> MemoryMappedFile::ReadAt(int64_t position,
- int64_t nbytes) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- // if the file is writable, we acquire the lock before creating any slices
- // in case a resize is triggered concurrently, otherwise we wouldn't detect
- // a change in the use count
- auto guard_resize = memory_map_->writable()
- ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
- : std::unique_lock<std::mutex>();
-
- ARROW_ASSIGN_OR_RAISE(
- nbytes, internal::ValidateReadRange(position, nbytes, memory_map_->size()));
- // Arrange to page data in
- RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
- {{memory_map_->data() + position, static_cast<size_t>(nbytes)}}));
- return memory_map_->Slice(position, nbytes);
-}
-
-Result<int64_t> MemoryMappedFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- auto guard_resize = memory_map_->writable()
- ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
- : std::unique_lock<std::mutex>();
-
- ARROW_ASSIGN_OR_RAISE(
- nbytes, internal::ValidateReadRange(position, nbytes, memory_map_->size()));
- if (nbytes > 0) {
- memcpy(out, memory_map_->data() + position, static_cast<size_t>(nbytes));
- }
- return nbytes;
-}
-
-Result<int64_t> MemoryMappedFile::Read(int64_t nbytes, void* out) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, ReadAt(memory_map_->position(), nbytes, out));
- memory_map_->advance(bytes_read);
- return bytes_read;
-}
-
-Result<std::shared_ptr<Buffer>> MemoryMappedFile::Read(int64_t nbytes) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(memory_map_->position(), nbytes));
- memory_map_->advance(buffer->size());
- return buffer;
-}
-
+ uint8_t* head() { return data() + position_; }
+
+ bool writable() { return file_->mode() != FileMode::READ; }
+
+ bool opened() { return file_->is_open(); }
+
+ int fd() const { return file_->fd(); }
+
+ std::mutex& write_lock() { return file_->lock(); }
+
+ std::mutex& resize_lock() { return resize_lock_; }
+
+ private:
+ // Initialize the mmap and set size, capacity and the data pointers
+ Status InitMMap(int64_t initial_size, bool resize_file = false,
+ const int64_t offset = 0, const int64_t length = -1) {
+ DCHECK(!region_);
+
+ if (resize_file) {
+ RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), initial_size));
+ }
+
+ size_t mmap_length = static_cast<size_t>(initial_size);
+ if (length > initial_size) {
+ return Status::Invalid("mapping length is beyond file size");
+ }
+ if (length >= 0 && length < initial_size) {
+ // memory mapping a file region
+ mmap_length = static_cast<size_t>(length);
+ }
+
+ void* result = mmap(nullptr, mmap_length, prot_flags_, map_mode_, file_->fd(),
+ static_cast<off_t>(offset));
+ if (result == MAP_FAILED) {
+ return Status::IOError("Memory mapping file failed: ",
+ ::arrow::internal::ErrnoMessage(errno));
+ }
+ map_len_ = mmap_length;
+ offset_ = offset;
+ region_ = std::make_shared<Region>(shared_from_this(), static_cast<uint8_t*>(result),
+ map_len_);
+ file_size_ = initial_size;
+
+ return Status::OK();
+ }
+
+ std::unique_ptr<OSFile> file_;
+ int prot_flags_;
+ int map_mode_;
+
+ std::shared_ptr<Region> region_;
+ int64_t file_size_;
+ int64_t position_;
+ int64_t offset_;
+ int64_t map_len_;
+ std::mutex resize_lock_;
+};
+
+MemoryMappedFile::MemoryMappedFile() {}
+
+MemoryMappedFile::~MemoryMappedFile() { internal::CloseFromDestructor(this); }
+
+Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Create(
+ const std::string& path, int64_t size) {
+ ARROW_ASSIGN_OR_RAISE(auto file, FileOutputStream::Open(path));
+ RETURN_NOT_OK(::arrow::internal::FileTruncate(file->file_descriptor(), size));
+ RETURN_NOT_OK(file->Close());
+ return MemoryMappedFile::Open(path, FileMode::READWRITE);
+}
+
+Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Open(const std::string& path,
+ FileMode::type mode) {
+ std::shared_ptr<MemoryMappedFile> result(new MemoryMappedFile());
+
+ result->memory_map_.reset(new MemoryMap());
+ RETURN_NOT_OK(result->memory_map_->Open(path, mode));
+ return result;
+}
+
+Result<std::shared_ptr<MemoryMappedFile>> MemoryMappedFile::Open(const std::string& path,
+ FileMode::type mode,
+ const int64_t offset,
+ const int64_t length) {
+ std::shared_ptr<MemoryMappedFile> result(new MemoryMappedFile());
+
+ result->memory_map_.reset(new MemoryMap());
+ RETURN_NOT_OK(result->memory_map_->Open(path, mode, offset, length));
+ return result;
+}
+
+Result<int64_t> MemoryMappedFile::GetSize() {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ return memory_map_->size();
+}
+
+Result<int64_t> MemoryMappedFile::Tell() const {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ return memory_map_->position();
+}
+
+Status MemoryMappedFile::Seek(int64_t position) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ return memory_map_->Seek(position);
+}
+
+Status MemoryMappedFile::Close() { return memory_map_->Close(); }
+
+bool MemoryMappedFile::closed() const { return memory_map_->closed(); }
+
+Result<std::shared_ptr<Buffer>> MemoryMappedFile::ReadAt(int64_t position,
+ int64_t nbytes) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ // if the file is writable, we acquire the lock before creating any slices
+ // in case a resize is triggered concurrently, otherwise we wouldn't detect
+ // a change in the use count
+ auto guard_resize = memory_map_->writable()
+ ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
+ : std::unique_lock<std::mutex>();
+
+ ARROW_ASSIGN_OR_RAISE(
+ nbytes, internal::ValidateReadRange(position, nbytes, memory_map_->size()));
+ // Arrange to page data in
+ RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
+ {{memory_map_->data() + position, static_cast<size_t>(nbytes)}}));
+ return memory_map_->Slice(position, nbytes);
+}
+
+Result<int64_t> MemoryMappedFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ auto guard_resize = memory_map_->writable()
+ ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
+ : std::unique_lock<std::mutex>();
+
+ ARROW_ASSIGN_OR_RAISE(
+ nbytes, internal::ValidateReadRange(position, nbytes, memory_map_->size()));
+ if (nbytes > 0) {
+ memcpy(out, memory_map_->data() + position, static_cast<size_t>(nbytes));
+ }
+ return nbytes;
+}
+
+Result<int64_t> MemoryMappedFile::Read(int64_t nbytes, void* out) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, ReadAt(memory_map_->position(), nbytes, out));
+ memory_map_->advance(bytes_read);
+ return bytes_read;
+}
+
+Result<std::shared_ptr<Buffer>> MemoryMappedFile::Read(int64_t nbytes) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(memory_map_->position(), nbytes));
+ memory_map_->advance(buffer->size());
+ return buffer;
+}
+
Future<std::shared_ptr<Buffer>> MemoryMappedFile::ReadAsync(const IOContext&,
- int64_t position,
- int64_t nbytes) {
- return Future<std::shared_ptr<Buffer>>::MakeFinished(ReadAt(position, nbytes));
-}
-
-Status MemoryMappedFile::WillNeed(const std::vector<ReadRange>& ranges) {
- using ::arrow::internal::MemoryRegion;
-
- RETURN_NOT_OK(memory_map_->CheckClosed());
- auto guard_resize = memory_map_->writable()
- ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
- : std::unique_lock<std::mutex>();
-
- std::vector<MemoryRegion> regions(ranges.size());
- for (size_t i = 0; i < ranges.size(); ++i) {
- const auto& range = ranges[i];
- ARROW_ASSIGN_OR_RAISE(
- auto size,
- internal::ValidateReadRange(range.offset, range.length, memory_map_->size()));
- DCHECK_NE(memory_map_->data(), nullptr);
- regions[i] = {const_cast<uint8_t*>(memory_map_->data() + range.offset),
- static_cast<size_t>(size)};
- }
- return ::arrow::internal::MemoryAdviseWillNeed(regions);
-}
-
-bool MemoryMappedFile::supports_zero_copy() const { return true; }
-
-Status MemoryMappedFile::WriteAt(int64_t position, const void* data, int64_t nbytes) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- std::lock_guard<std::mutex> guard(memory_map_->write_lock());
-
- if (!memory_map_->opened() || !memory_map_->writable()) {
- return Status::IOError("Unable to write");
- }
- RETURN_NOT_OK(internal::ValidateWriteRange(position, nbytes, memory_map_->size()));
-
- RETURN_NOT_OK(memory_map_->Seek(position));
- return WriteInternal(data, nbytes);
-}
-
-Status MemoryMappedFile::Write(const void* data, int64_t nbytes) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- std::lock_guard<std::mutex> guard(memory_map_->write_lock());
-
- if (!memory_map_->opened() || !memory_map_->writable()) {
- return Status::IOError("Unable to write");
- }
- RETURN_NOT_OK(
- internal::ValidateWriteRange(memory_map_->position(), nbytes, memory_map_->size()));
-
- return WriteInternal(data, nbytes);
-}
-
-Status MemoryMappedFile::WriteInternal(const void* data, int64_t nbytes) {
- memcpy(memory_map_->head(), data, static_cast<size_t>(nbytes));
- memory_map_->advance(nbytes);
- return Status::OK();
-}
-
-Status MemoryMappedFile::Resize(int64_t new_size) {
- RETURN_NOT_OK(memory_map_->CheckClosed());
- std::unique_lock<std::mutex> write_guard(memory_map_->write_lock(), std::defer_lock);
- std::unique_lock<std::mutex> resize_guard(memory_map_->resize_lock(), std::defer_lock);
- std::lock(write_guard, resize_guard);
- RETURN_NOT_OK(memory_map_->Resize(new_size));
- return Status::OK();
-}
-
-int MemoryMappedFile::file_descriptor() const { return memory_map_->fd(); }
-
-} // namespace io
-} // namespace arrow
+ int64_t position,
+ int64_t nbytes) {
+ return Future<std::shared_ptr<Buffer>>::MakeFinished(ReadAt(position, nbytes));
+}
+
+Status MemoryMappedFile::WillNeed(const std::vector<ReadRange>& ranges) {
+ using ::arrow::internal::MemoryRegion;
+
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ auto guard_resize = memory_map_->writable()
+ ? std::unique_lock<std::mutex>(memory_map_->resize_lock())
+ : std::unique_lock<std::mutex>();
+
+ std::vector<MemoryRegion> regions(ranges.size());
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ const auto& range = ranges[i];
+ ARROW_ASSIGN_OR_RAISE(
+ auto size,
+ internal::ValidateReadRange(range.offset, range.length, memory_map_->size()));
+ DCHECK_NE(memory_map_->data(), nullptr);
+ regions[i] = {const_cast<uint8_t*>(memory_map_->data() + range.offset),
+ static_cast<size_t>(size)};
+ }
+ return ::arrow::internal::MemoryAdviseWillNeed(regions);
+}
+
+bool MemoryMappedFile::supports_zero_copy() const { return true; }
+
+Status MemoryMappedFile::WriteAt(int64_t position, const void* data, int64_t nbytes) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ std::lock_guard<std::mutex> guard(memory_map_->write_lock());
+
+ if (!memory_map_->opened() || !memory_map_->writable()) {
+ return Status::IOError("Unable to write");
+ }
+ RETURN_NOT_OK(internal::ValidateWriteRange(position, nbytes, memory_map_->size()));
+
+ RETURN_NOT_OK(memory_map_->Seek(position));
+ return WriteInternal(data, nbytes);
+}
+
+Status MemoryMappedFile::Write(const void* data, int64_t nbytes) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ std::lock_guard<std::mutex> guard(memory_map_->write_lock());
+
+ if (!memory_map_->opened() || !memory_map_->writable()) {
+ return Status::IOError("Unable to write");
+ }
+ RETURN_NOT_OK(
+ internal::ValidateWriteRange(memory_map_->position(), nbytes, memory_map_->size()));
+
+ return WriteInternal(data, nbytes);
+}
+
+Status MemoryMappedFile::WriteInternal(const void* data, int64_t nbytes) {
+ memcpy(memory_map_->head(), data, static_cast<size_t>(nbytes));
+ memory_map_->advance(nbytes);
+ return Status::OK();
+}
+
+Status MemoryMappedFile::Resize(int64_t new_size) {
+ RETURN_NOT_OK(memory_map_->CheckClosed());
+ std::unique_lock<std::mutex> write_guard(memory_map_->write_lock(), std::defer_lock);
+ std::unique_lock<std::mutex> resize_guard(memory_map_->resize_lock(), std::defer_lock);
+ std::lock(write_guard, resize_guard);
+ RETURN_NOT_OK(memory_map_->Resize(new_size));
+ return Status::OK();
+}
+
+int MemoryMappedFile::file_descriptor() const { return memory_map_->fd(); }
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
index 50d4f2c4dfc..5265c546a32 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
@@ -1,221 +1,221 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// IO interface implementations for OS files
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/io/concurrency.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Buffer;
-class MemoryPool;
-class Status;
-
-namespace io {
-
-/// \brief An operating system file open in write-only mode.
-class ARROW_EXPORT FileOutputStream : public OutputStream {
- public:
- ~FileOutputStream() override;
-
- /// \brief Open a local file for writing, truncating any existing file
- /// \param[in] path with UTF8 encoding
- /// \param[in] append append to existing file, otherwise truncate to 0 bytes
- /// \return an open FileOutputStream
- ///
- /// When opening a new file, any existing file with the indicated path is
- /// truncated to 0 bytes, deleting any existing data
- static Result<std::shared_ptr<FileOutputStream>> Open(const std::string& path,
- bool append = false);
-
- /// \brief Open a file descriptor for writing. The underlying file isn't
- /// truncated.
- /// \param[in] fd file descriptor
- /// \return an open FileOutputStream
- ///
- /// The file descriptor becomes owned by the OutputStream, and will be closed
- /// on Close() or destruction.
- static Result<std::shared_ptr<FileOutputStream>> Open(int fd);
-
- // OutputStream interface
- Status Close() override;
- bool closed() const override;
- Result<int64_t> Tell() const override;
-
- // Write bytes to the stream. Thread-safe
- Status Write(const void* data, int64_t nbytes) override;
- /// \cond FALSE
- using Writable::Write;
- /// \endcond
-
- int file_descriptor() const;
-
- private:
- FileOutputStream();
-
- class ARROW_NO_EXPORT FileOutputStreamImpl;
- std::unique_ptr<FileOutputStreamImpl> impl_;
-};
-
-/// \brief An operating system file open in read-only mode.
-///
-/// Reads through this implementation are unbuffered. If many small reads
-/// need to be issued, it is recommended to use a buffering layer for good
-/// performance.
-class ARROW_EXPORT ReadableFile
- : public internal::RandomAccessFileConcurrencyWrapper<ReadableFile> {
- public:
- ~ReadableFile() override;
-
- /// \brief Open a local file for reading
- /// \param[in] path with UTF8 encoding
- /// \param[in] pool a MemoryPool for memory allocations
- /// \return ReadableFile instance
- static Result<std::shared_ptr<ReadableFile>> Open(
- const std::string& path, MemoryPool* pool = default_memory_pool());
-
- /// \brief Open a local file for reading
- /// \param[in] fd file descriptor
- /// \param[in] pool a MemoryPool for memory allocations
- /// \return ReadableFile instance
- ///
- /// The file descriptor becomes owned by the ReadableFile, and will be closed
- /// on Close() or destruction.
- static Result<std::shared_ptr<ReadableFile>> Open(
- int fd, MemoryPool* pool = default_memory_pool());
-
- bool closed() const override;
-
- int file_descriptor() const;
-
- Status WillNeed(const std::vector<ReadRange>& ranges) override;
-
- private:
- friend RandomAccessFileConcurrencyWrapper<ReadableFile>;
-
- explicit ReadableFile(MemoryPool* pool);
-
- Status DoClose();
- Result<int64_t> DoTell() const;
- Result<int64_t> DoRead(int64_t nbytes, void* buffer);
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
-
- /// \brief Thread-safe implementation of ReadAt
- Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
-
- /// \brief Thread-safe implementation of ReadAt
- Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
-
- Result<int64_t> DoGetSize();
- Status DoSeek(int64_t position);
-
- class ARROW_NO_EXPORT ReadableFileImpl;
- std::unique_ptr<ReadableFileImpl> impl_;
-};
-
-/// \brief A file interface that uses memory-mapped files for memory interactions
-///
-/// This implementation supports zero-copy reads. The same class is used
-/// for both reading and writing.
-///
-/// If opening a file in a writable mode, it is not truncated first as with
-/// FileOutputStream.
-class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
- public:
- ~MemoryMappedFile() override;
-
- /// Create new file with indicated size, return in read/write mode
- static Result<std::shared_ptr<MemoryMappedFile>> Create(const std::string& path,
- int64_t size);
-
- // mmap() with whole file
- static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
- FileMode::type mode);
-
- // mmap() with a region of file, the offset must be a multiple of the page size
- static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
- FileMode::type mode,
- const int64_t offset,
- const int64_t length);
-
- Status Close() override;
-
- bool closed() const override;
-
- Result<int64_t> Tell() const override;
-
- Status Seek(int64_t position) override;
-
- // Required by RandomAccessFile, copies memory into out. Not thread-safe
- Result<int64_t> Read(int64_t nbytes, void* out) override;
-
- // Zero copy read, moves position pointer. Not thread-safe
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
-
- // Zero-copy read, leaves position unchanged. Acquires a reader lock
- // for the duration of slice creation (typically very short). Is thread-safe.
- Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
-
- // Raw copy of the memory at specified position. Thread-safe, but
- // locks out other readers for the duration of memcpy. Prefer the
- // zero copy method
- Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
-
- // Synchronous ReadAsync override
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// IO interface implementations for OS files
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/io/concurrency.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+class Status;
+
+namespace io {
+
+/// \brief An operating system file open in write-only mode.
+class ARROW_EXPORT FileOutputStream : public OutputStream {
+ public:
+ ~FileOutputStream() override;
+
+ /// \brief Open a local file for writing, truncating any existing file
+ /// \param[in] path with UTF8 encoding
+ /// \param[in] append append to existing file, otherwise truncate to 0 bytes
+ /// \return an open FileOutputStream
+ ///
+ /// When opening a new file, any existing file with the indicated path is
+ /// truncated to 0 bytes, deleting any existing data
+ static Result<std::shared_ptr<FileOutputStream>> Open(const std::string& path,
+ bool append = false);
+
+ /// \brief Open a file descriptor for writing. The underlying file isn't
+ /// truncated.
+ /// \param[in] fd file descriptor
+ /// \return an open FileOutputStream
+ ///
+ /// The file descriptor becomes owned by the OutputStream, and will be closed
+ /// on Close() or destruction.
+ static Result<std::shared_ptr<FileOutputStream>> Open(int fd);
+
+ // OutputStream interface
+ Status Close() override;
+ bool closed() const override;
+ Result<int64_t> Tell() const override;
+
+ // Write bytes to the stream. Thread-safe
+ Status Write(const void* data, int64_t nbytes) override;
+ /// \cond FALSE
+ using Writable::Write;
+ /// \endcond
+
+ int file_descriptor() const;
+
+ private:
+ FileOutputStream();
+
+ class ARROW_NO_EXPORT FileOutputStreamImpl;
+ std::unique_ptr<FileOutputStreamImpl> impl_;
+};
+
+/// \brief An operating system file open in read-only mode.
+///
+/// Reads through this implementation are unbuffered. If many small reads
+/// need to be issued, it is recommended to use a buffering layer for good
+/// performance.
+class ARROW_EXPORT ReadableFile
+ : public internal::RandomAccessFileConcurrencyWrapper<ReadableFile> {
+ public:
+ ~ReadableFile() override;
+
+ /// \brief Open a local file for reading
+ /// \param[in] path with UTF8 encoding
+ /// \param[in] pool a MemoryPool for memory allocations
+ /// \return ReadableFile instance
+ static Result<std::shared_ptr<ReadableFile>> Open(
+ const std::string& path, MemoryPool* pool = default_memory_pool());
+
+ /// \brief Open a local file for reading
+ /// \param[in] fd file descriptor
+ /// \param[in] pool a MemoryPool for memory allocations
+ /// \return ReadableFile instance
+ ///
+ /// The file descriptor becomes owned by the ReadableFile, and will be closed
+ /// on Close() or destruction.
+ static Result<std::shared_ptr<ReadableFile>> Open(
+ int fd, MemoryPool* pool = default_memory_pool());
+
+ bool closed() const override;
+
+ int file_descriptor() const;
+
+ Status WillNeed(const std::vector<ReadRange>& ranges) override;
+
+ private:
+ friend RandomAccessFileConcurrencyWrapper<ReadableFile>;
+
+ explicit ReadableFile(MemoryPool* pool);
+
+ Status DoClose();
+ Result<int64_t> DoTell() const;
+ Result<int64_t> DoRead(int64_t nbytes, void* buffer);
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+
+ /// \brief Thread-safe implementation of ReadAt
+ Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
+
+ /// \brief Thread-safe implementation of ReadAt
+ Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
+
+ Result<int64_t> DoGetSize();
+ Status DoSeek(int64_t position);
+
+ class ARROW_NO_EXPORT ReadableFileImpl;
+ std::unique_ptr<ReadableFileImpl> impl_;
+};
+
+/// \brief A file interface that uses memory-mapped files for memory interactions
+///
+/// This implementation supports zero-copy reads. The same class is used
+/// for both reading and writing.
+///
+/// If opening a file in a writable mode, it is not truncated first as with
+/// FileOutputStream.
+class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
+ public:
+ ~MemoryMappedFile() override;
+
+ /// Create new file with indicated size, return in read/write mode
+ static Result<std::shared_ptr<MemoryMappedFile>> Create(const std::string& path,
+ int64_t size);
+
+ // mmap() with whole file
+ static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
+ FileMode::type mode);
+
+ // mmap() with a region of file, the offset must be a multiple of the page size
+ static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
+ FileMode::type mode,
+ const int64_t offset,
+ const int64_t length);
+
+ Status Close() override;
+
+ bool closed() const override;
+
+ Result<int64_t> Tell() const override;
+
+ Status Seek(int64_t position) override;
+
+ // Required by RandomAccessFile, copies memory into out. Not thread-safe
+ Result<int64_t> Read(int64_t nbytes, void* out) override;
+
+ // Zero copy read, moves position pointer. Not thread-safe
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+
+ // Zero-copy read, leaves position unchanged. Acquires a reader lock
+ // for the duration of slice creation (typically very short). Is thread-safe.
+ Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
+
+ // Raw copy of the memory at specified position. Thread-safe, but
+ // locks out other readers for the duration of memcpy. Prefer the
+ // zero copy method
+ Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
+
+ // Synchronous ReadAsync override
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
- int64_t nbytes) override;
-
- Status WillNeed(const std::vector<ReadRange>& ranges) override;
-
- bool supports_zero_copy() const override;
-
- /// Write data at the current position in the file. Thread-safe
- Status Write(const void* data, int64_t nbytes) override;
- /// \cond FALSE
- using Writable::Write;
- /// \endcond
-
- /// Set the size of the map to new_size.
- Status Resize(int64_t new_size);
-
- /// Write data at a particular position in the file. Thread-safe
- Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
-
- Result<int64_t> GetSize() override;
-
- int file_descriptor() const;
-
- private:
- MemoryMappedFile();
-
- Status WriteInternal(const void* data, int64_t nbytes);
-
- class ARROW_NO_EXPORT MemoryMap;
- std::shared_ptr<MemoryMap> memory_map_;
-};
-
-} // namespace io
-} // namespace arrow
+ int64_t nbytes) override;
+
+ Status WillNeed(const std::vector<ReadRange>& ranges) override;
+
+ bool supports_zero_copy() const override;
+
+ /// Write data at the current position in the file. Thread-safe
+ Status Write(const void* data, int64_t nbytes) override;
+ /// \cond FALSE
+ using Writable::Write;
+ /// \endcond
+
+ /// Set the size of the map to new_size.
+ Status Resize(int64_t new_size);
+
+ /// Write data at a particular position in the file. Thread-safe
+ Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
+
+ Result<int64_t> GetSize() override;
+
+ int file_descriptor() const;
+
+ private:
+ MemoryMappedFile();
+
+ Status WriteInternal(const void* data, int64_t nbytes);
+
+ class ARROW_NO_EXPORT MemoryMap;
+ std::shared_ptr<MemoryMap> memory_map_;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
index 954c0f37b2d..02d9ea3269e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
@@ -1,59 +1,59 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/interfaces.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <iterator>
-#include <list>
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <typeinfo>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/concurrency.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/interfaces.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <typeinfo>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/concurrency.h"
#include "arrow/io/type_fwd.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
#include "arrow/util/checked_cast.h"
-#include "arrow/util/future.h"
-#include "arrow/util/iterator.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/thread_pool.h"
-
-namespace arrow {
-
+#include "arrow/util/future.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
using internal::checked_pointer_cast;
-using internal::Executor;
-using internal::TaskHints;
-using internal::ThreadPool;
-
-namespace io {
-
+using internal::Executor;
+using internal::TaskHints;
+using internal::ThreadPool;
+
+namespace io {
+
static IOContext g_default_io_context{};
-
+
IOContext::IOContext(MemoryPool* pool, StopToken stop_token)
: IOContext(pool, internal::GetIOThreadPool(), std::move(stop_token)) {}
-
+
const IOContext& default_io_context() { return g_default_io_context; }
int GetIOThreadPoolCapacity() { return internal::GetIOThreadPool()->GetCapacity(); }
@@ -62,51 +62,51 @@ Status SetIOThreadPoolCapacity(int threads) {
return internal::GetIOThreadPool()->SetCapacity(threads);
}
-FileInterface::~FileInterface() = default;
-
-Status FileInterface::Abort() { return Close(); }
-
+FileInterface::~FileInterface() = default;
+
+Status FileInterface::Abort() { return Close(); }
+
namespace {
-class InputStreamBlockIterator {
- public:
- InputStreamBlockIterator(std::shared_ptr<InputStream> stream, int64_t block_size)
+class InputStreamBlockIterator {
+ public:
+ InputStreamBlockIterator(std::shared_ptr<InputStream> stream, int64_t block_size)
: stream_(std::move(stream)), block_size_(block_size) {}
-
- Result<std::shared_ptr<Buffer>> Next() {
- if (done_) {
- return nullptr;
- }
-
- ARROW_ASSIGN_OR_RAISE(auto out, stream_->Read(block_size_));
-
- if (out->size() == 0) {
- done_ = true;
- stream_.reset();
- out.reset();
- }
-
- return out;
- }
-
- protected:
- std::shared_ptr<InputStream> stream_;
- int64_t block_size_;
- bool done_ = false;
-};
-
+
+ Result<std::shared_ptr<Buffer>> Next() {
+ if (done_) {
+ return nullptr;
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto out, stream_->Read(block_size_));
+
+ if (out->size() == 0) {
+ done_ = true;
+ stream_.reset();
+ out.reset();
+ }
+
+ return out;
+ }
+
+ protected:
+ std::shared_ptr<InputStream> stream_;
+ int64_t block_size_;
+ bool done_ = false;
+};
+
} // namespace
const IOContext& Readable::io_context() const { return g_default_io_context; }
-Status InputStream::Advance(int64_t nbytes) { return Read(nbytes).status(); }
-
-Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
- return Status::NotImplemented("Peek not implemented");
-}
-
-bool InputStream::supports_zero_copy() const { return false; }
-
+Status InputStream::Advance(int64_t nbytes) { return Read(nbytes).status(); }
+
+Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
+ return Status::NotImplemented("Peek not implemented");
+}
+
+bool InputStream::supports_zero_copy() const { return false; }
+
Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() {
return std::shared_ptr<const KeyValueMetadata>{};
}
@@ -123,265 +123,265 @@ Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync()
return ReadMetadataAsync(io_context());
}
-Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
- std::shared_ptr<InputStream> stream, int64_t block_size) {
- if (stream->closed()) {
- return Status::Invalid("Cannot take iterator on closed stream");
- }
- DCHECK_GT(block_size, 0);
- return Iterator<std::shared_ptr<Buffer>>(InputStreamBlockIterator(stream, block_size));
-}
-
+Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
+ std::shared_ptr<InputStream> stream, int64_t block_size) {
+ if (stream->closed()) {
+ return Status::Invalid("Cannot take iterator on closed stream");
+ }
+ DCHECK_GT(block_size, 0);
+ return Iterator<std::shared_ptr<Buffer>>(InputStreamBlockIterator(stream, block_size));
+}
+
struct RandomAccessFile::Impl {
- std::mutex lock_;
-};
-
-RandomAccessFile::~RandomAccessFile() = default;
-
+ std::mutex lock_;
+};
+
+RandomAccessFile::~RandomAccessFile() = default;
+
RandomAccessFile::RandomAccessFile() : interface_impl_(new Impl()) {}
-
-Result<int64_t> RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
- std::lock_guard<std::mutex> lock(interface_impl_->lock_);
- RETURN_NOT_OK(Seek(position));
- return Read(nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> RandomAccessFile::ReadAt(int64_t position,
- int64_t nbytes) {
- std::lock_guard<std::mutex> lock(interface_impl_->lock_);
- RETURN_NOT_OK(Seek(position));
- return Read(nbytes);
-}
-
-// Default ReadAsync() implementation: simply issue the read on the context's executor
+
+Result<int64_t> RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
+ std::lock_guard<std::mutex> lock(interface_impl_->lock_);
+ RETURN_NOT_OK(Seek(position));
+ return Read(nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> RandomAccessFile::ReadAt(int64_t position,
+ int64_t nbytes) {
+ std::lock_guard<std::mutex> lock(interface_impl_->lock_);
+ RETURN_NOT_OK(Seek(position));
+ return Read(nbytes);
+}
+
+// Default ReadAsync() implementation: simply issue the read on the context's executor
Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx,
- int64_t position,
- int64_t nbytes) {
+ int64_t position,
+ int64_t nbytes) {
auto self = checked_pointer_cast<RandomAccessFile>(shared_from_this());
return DeferNotOk(internal::SubmitIO(
ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); }));
-}
-
+}
+
Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(int64_t position,
int64_t nbytes) {
return ReadAsync(io_context(), position, nbytes);
}
-// Default WillNeed() implementation: no-op
-Status RandomAccessFile::WillNeed(const std::vector<ReadRange>& ranges) {
- return Status::OK();
-}
-
+// Default WillNeed() implementation: no-op
+Status RandomAccessFile::WillNeed(const std::vector<ReadRange>& ranges) {
+ return Status::OK();
+}
+
Status Writable::Write(util::string_view data) {
return Write(data.data(), static_cast<int64_t>(data.size()));
-}
-
-Status Writable::Write(const std::shared_ptr<Buffer>& data) {
- return Write(data->data(), data->size());
-}
-
-Status Writable::Flush() { return Status::OK(); }
-
-// An InputStream that reads from a delimited range of a RandomAccessFile
-class FileSegmentReader
- : public internal::InputStreamConcurrencyWrapper<FileSegmentReader> {
- public:
- FileSegmentReader(std::shared_ptr<RandomAccessFile> file, int64_t file_offset,
- int64_t nbytes)
- : file_(std::move(file)),
- closed_(false),
- position_(0),
- file_offset_(file_offset),
- nbytes_(nbytes) {
- FileInterface::set_mode(FileMode::READ);
- }
-
- Status CheckOpen() const {
- if (closed_) {
- return Status::IOError("Stream is closed");
- }
- return Status::OK();
- }
-
- Status DoClose() {
- closed_ = true;
- return Status::OK();
- }
-
- Result<int64_t> DoTell() const {
- RETURN_NOT_OK(CheckOpen());
- return position_;
- }
-
- bool closed() const override { return closed_; }
-
- Result<int64_t> DoRead(int64_t nbytes, void* out) {
- RETURN_NOT_OK(CheckOpen());
- int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_);
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
- file_->ReadAt(file_offset_ + position_, bytes_to_read, out));
- position_ += bytes_read;
- return bytes_read;
- }
-
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes) {
- RETURN_NOT_OK(CheckOpen());
- int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_);
- ARROW_ASSIGN_OR_RAISE(auto buffer,
- file_->ReadAt(file_offset_ + position_, bytes_to_read));
- position_ += buffer->size();
- return buffer;
- }
-
- private:
- std::shared_ptr<RandomAccessFile> file_;
- bool closed_;
- int64_t position_;
- int64_t file_offset_;
- int64_t nbytes_;
-};
-
-std::shared_ptr<InputStream> RandomAccessFile::GetStream(
- std::shared_ptr<RandomAccessFile> file, int64_t file_offset, int64_t nbytes) {
- return std::make_shared<FileSegmentReader>(std::move(file), file_offset, nbytes);
-}
-
-// -----------------------------------------------------------------------
-// Implement utilities exported from concurrency.h and util_internal.h
-
-namespace internal {
-
-void CloseFromDestructor(FileInterface* file) {
- Status st = file->Close();
- if (!st.ok()) {
- auto file_type = typeid(*file).name();
-#ifdef NDEBUG
- ARROW_LOG(ERROR) << "Error ignored when destroying file of type " << file_type << ": "
- << st;
-#else
- std::stringstream ss;
- ss << "When destroying file of type " << file_type << ": " << st.message();
- ARROW_LOG(FATAL) << st.WithMessage(ss.str());
-#endif
- }
-}
-
-Result<int64_t> ValidateReadRange(int64_t offset, int64_t size, int64_t file_size) {
- if (offset < 0 || size < 0) {
- return Status::Invalid("Invalid read (offset = ", offset, ", size = ", size, ")");
- }
- if (offset > file_size) {
- return Status::IOError("Read out of bounds (offset = ", offset, ", size = ", size,
- ") in file of size ", file_size);
- }
- return std::min(size, file_size - offset);
-}
-
-Status ValidateWriteRange(int64_t offset, int64_t size, int64_t file_size) {
- if (offset < 0 || size < 0) {
- return Status::Invalid("Invalid write (offset = ", offset, ", size = ", size, ")");
- }
- if (offset + size > file_size) {
- return Status::IOError("Write out of bounds (offset = ", offset, ", size = ", size,
- ") in file of size ", file_size);
- }
- return Status::OK();
-}
-
-Status ValidateRange(int64_t offset, int64_t size) {
- if (offset < 0 || size < 0) {
- return Status::Invalid("Invalid IO range (offset = ", offset, ", size = ", size, ")");
- }
- return Status::OK();
-}
-
-#ifndef NDEBUG
-
-// Debug mode concurrency checking
-
-struct SharedExclusiveChecker::Impl {
- std::mutex mutex;
- int64_t n_shared = 0;
- int64_t n_exclusive = 0;
-};
-
-SharedExclusiveChecker::SharedExclusiveChecker() : impl_(new Impl) {}
-
-void SharedExclusiveChecker::LockShared() {
- std::lock_guard<std::mutex> lock(impl_->mutex);
- // XXX The error message doesn't really describe the actual situation
- // (e.g. ReadAt() called while Read() call in progress)
- ARROW_CHECK_EQ(impl_->n_exclusive, 0)
- << "Attempted to take shared lock while locked exclusive";
- ++impl_->n_shared;
-}
-
-void SharedExclusiveChecker::UnlockShared() {
- std::lock_guard<std::mutex> lock(impl_->mutex);
- ARROW_CHECK_GT(impl_->n_shared, 0);
- --impl_->n_shared;
-}
-
-void SharedExclusiveChecker::LockExclusive() {
- std::lock_guard<std::mutex> lock(impl_->mutex);
- ARROW_CHECK_EQ(impl_->n_shared, 0)
- << "Attempted to take exclusive lock while locked shared";
- ARROW_CHECK_EQ(impl_->n_exclusive, 0)
- << "Attempted to take exclusive lock while already locked exclusive";
- ++impl_->n_exclusive;
-}
-
-void SharedExclusiveChecker::UnlockExclusive() {
- std::lock_guard<std::mutex> lock(impl_->mutex);
- ARROW_CHECK_EQ(impl_->n_exclusive, 1);
- --impl_->n_exclusive;
-}
-
-#else
-
-// Release mode no-op concurrency checking
-
-struct SharedExclusiveChecker::Impl {};
-
-SharedExclusiveChecker::SharedExclusiveChecker() {}
-
-void SharedExclusiveChecker::LockShared() {}
-void SharedExclusiveChecker::UnlockShared() {}
-void SharedExclusiveChecker::LockExclusive() {}
-void SharedExclusiveChecker::UnlockExclusive() {}
-
-#endif
-
-static std::shared_ptr<ThreadPool> MakeIOThreadPool() {
- auto maybe_pool = ThreadPool::MakeEternal(/*threads=*/8);
- if (!maybe_pool.ok()) {
- maybe_pool.status().Abort("Failed to create global IO thread pool");
- }
- return *std::move(maybe_pool);
-}
-
-ThreadPool* GetIOThreadPool() {
- static std::shared_ptr<ThreadPool> pool = MakeIOThreadPool();
- return pool.get();
-}
-
-// -----------------------------------------------------------------------
-// CoalesceReadRanges
-
-namespace {
-
-struct ReadRangeCombiner {
- std::vector<ReadRange> Coalesce(std::vector<ReadRange> ranges) {
- if (ranges.empty()) {
- return ranges;
- }
-
- // Remove zero-sized ranges
- auto end = std::remove_if(ranges.begin(), ranges.end(),
- [](const ReadRange& range) { return range.length == 0; });
- // Sort in position order
+}
+
+Status Writable::Write(const std::shared_ptr<Buffer>& data) {
+ return Write(data->data(), data->size());
+}
+
+Status Writable::Flush() { return Status::OK(); }
+
+// An InputStream that reads from a delimited range of a RandomAccessFile
+class FileSegmentReader
+ : public internal::InputStreamConcurrencyWrapper<FileSegmentReader> {
+ public:
+ FileSegmentReader(std::shared_ptr<RandomAccessFile> file, int64_t file_offset,
+ int64_t nbytes)
+ : file_(std::move(file)),
+ closed_(false),
+ position_(0),
+ file_offset_(file_offset),
+ nbytes_(nbytes) {
+ FileInterface::set_mode(FileMode::READ);
+ }
+
+ Status CheckOpen() const {
+ if (closed_) {
+ return Status::IOError("Stream is closed");
+ }
+ return Status::OK();
+ }
+
+ Status DoClose() {
+ closed_ = true;
+ return Status::OK();
+ }
+
+ Result<int64_t> DoTell() const {
+ RETURN_NOT_OK(CheckOpen());
+ return position_;
+ }
+
+ bool closed() const override { return closed_; }
+
+ Result<int64_t> DoRead(int64_t nbytes, void* out) {
+ RETURN_NOT_OK(CheckOpen());
+ int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_);
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
+ file_->ReadAt(file_offset_ + position_, bytes_to_read, out));
+ position_ += bytes_read;
+ return bytes_read;
+ }
+
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes) {
+ RETURN_NOT_OK(CheckOpen());
+ int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_);
+ ARROW_ASSIGN_OR_RAISE(auto buffer,
+ file_->ReadAt(file_offset_ + position_, bytes_to_read));
+ position_ += buffer->size();
+ return buffer;
+ }
+
+ private:
+ std::shared_ptr<RandomAccessFile> file_;
+ bool closed_;
+ int64_t position_;
+ int64_t file_offset_;
+ int64_t nbytes_;
+};
+
+std::shared_ptr<InputStream> RandomAccessFile::GetStream(
+ std::shared_ptr<RandomAccessFile> file, int64_t file_offset, int64_t nbytes) {
+ return std::make_shared<FileSegmentReader>(std::move(file), file_offset, nbytes);
+}
+
+// -----------------------------------------------------------------------
+// Implement utilities exported from concurrency.h and util_internal.h
+
+namespace internal {
+
+void CloseFromDestructor(FileInterface* file) {
+ Status st = file->Close();
+ if (!st.ok()) {
+ auto file_type = typeid(*file).name();
+#ifdef NDEBUG
+ ARROW_LOG(ERROR) << "Error ignored when destroying file of type " << file_type << ": "
+ << st;
+#else
+ std::stringstream ss;
+ ss << "When destroying file of type " << file_type << ": " << st.message();
+ ARROW_LOG(FATAL) << st.WithMessage(ss.str());
+#endif
+ }
+}
+
+Result<int64_t> ValidateReadRange(int64_t offset, int64_t size, int64_t file_size) {
+ if (offset < 0 || size < 0) {
+ return Status::Invalid("Invalid read (offset = ", offset, ", size = ", size, ")");
+ }
+ if (offset > file_size) {
+ return Status::IOError("Read out of bounds (offset = ", offset, ", size = ", size,
+ ") in file of size ", file_size);
+ }
+ return std::min(size, file_size - offset);
+}
+
+Status ValidateWriteRange(int64_t offset, int64_t size, int64_t file_size) {
+ if (offset < 0 || size < 0) {
+ return Status::Invalid("Invalid write (offset = ", offset, ", size = ", size, ")");
+ }
+ if (offset + size > file_size) {
+ return Status::IOError("Write out of bounds (offset = ", offset, ", size = ", size,
+ ") in file of size ", file_size);
+ }
+ return Status::OK();
+}
+
+Status ValidateRange(int64_t offset, int64_t size) {
+ if (offset < 0 || size < 0) {
+ return Status::Invalid("Invalid IO range (offset = ", offset, ", size = ", size, ")");
+ }
+ return Status::OK();
+}
+
+#ifndef NDEBUG
+
+// Debug mode concurrency checking
+
+struct SharedExclusiveChecker::Impl {
+ std::mutex mutex;
+ int64_t n_shared = 0;
+ int64_t n_exclusive = 0;
+};
+
+SharedExclusiveChecker::SharedExclusiveChecker() : impl_(new Impl) {}
+
+void SharedExclusiveChecker::LockShared() {
+ std::lock_guard<std::mutex> lock(impl_->mutex);
+ // XXX The error message doesn't really describe the actual situation
+ // (e.g. ReadAt() called while Read() call in progress)
+ ARROW_CHECK_EQ(impl_->n_exclusive, 0)
+ << "Attempted to take shared lock while locked exclusive";
+ ++impl_->n_shared;
+}
+
+void SharedExclusiveChecker::UnlockShared() {
+ std::lock_guard<std::mutex> lock(impl_->mutex);
+ ARROW_CHECK_GT(impl_->n_shared, 0);
+ --impl_->n_shared;
+}
+
+void SharedExclusiveChecker::LockExclusive() {
+ std::lock_guard<std::mutex> lock(impl_->mutex);
+ ARROW_CHECK_EQ(impl_->n_shared, 0)
+ << "Attempted to take exclusive lock while locked shared";
+ ARROW_CHECK_EQ(impl_->n_exclusive, 0)
+ << "Attempted to take exclusive lock while already locked exclusive";
+ ++impl_->n_exclusive;
+}
+
+void SharedExclusiveChecker::UnlockExclusive() {
+ std::lock_guard<std::mutex> lock(impl_->mutex);
+ ARROW_CHECK_EQ(impl_->n_exclusive, 1);
+ --impl_->n_exclusive;
+}
+
+#else
+
+// Release mode no-op concurrency checking
+
+struct SharedExclusiveChecker::Impl {};
+
+SharedExclusiveChecker::SharedExclusiveChecker() {}
+
+void SharedExclusiveChecker::LockShared() {}
+void SharedExclusiveChecker::UnlockShared() {}
+void SharedExclusiveChecker::LockExclusive() {}
+void SharedExclusiveChecker::UnlockExclusive() {}
+
+#endif
+
+static std::shared_ptr<ThreadPool> MakeIOThreadPool() {
+ auto maybe_pool = ThreadPool::MakeEternal(/*threads=*/8);
+ if (!maybe_pool.ok()) {
+ maybe_pool.status().Abort("Failed to create global IO thread pool");
+ }
+ return *std::move(maybe_pool);
+}
+
+ThreadPool* GetIOThreadPool() {
+ static std::shared_ptr<ThreadPool> pool = MakeIOThreadPool();
+ return pool.get();
+}
+
+// -----------------------------------------------------------------------
+// CoalesceReadRanges
+
+namespace {
+
+struct ReadRangeCombiner {
+ std::vector<ReadRange> Coalesce(std::vector<ReadRange> ranges) {
+ if (ranges.empty()) {
+ return ranges;
+ }
+
+ // Remove zero-sized ranges
+ auto end = std::remove_if(ranges.begin(), ranges.end(),
+ [](const ReadRange& range) { return range.length == 0; });
+ // Sort in position order
std::sort(ranges.begin(), end,
- [](const ReadRange& a, const ReadRange& b) { return a.offset < b.offset; });
+ [](const ReadRange& a, const ReadRange& b) { return a.offset < b.offset; });
// Remove ranges that overlap 100%
end = std::unique(ranges.begin(), end,
[](const ReadRange& left, const ReadRange& right) {
@@ -389,81 +389,81 @@ struct ReadRangeCombiner {
right.offset + right.length <= left.offset + left.length;
});
ranges.resize(end - ranges.begin());
-
- // Skip further processing if ranges is empty after removing zero-sized ranges.
- if (ranges.empty()) {
- return ranges;
- }
-
-#ifndef NDEBUG
- for (size_t i = 0; i < ranges.size() - 1; ++i) {
- const auto& left = ranges[i];
- const auto& right = ranges[i + 1];
- DCHECK_LE(left.offset, right.offset);
- DCHECK_LE(left.offset + left.length, right.offset) << "Some read ranges overlap";
- }
-#endif
-
- std::vector<ReadRange> coalesced;
-
- auto itr = ranges.begin();
- // Ensure ranges is not empty.
- DCHECK_LE(itr, ranges.end());
- // Start of the current coalesced range and end (exclusive) of previous range.
- // Both are initialized with the start of first range which is a placeholder value.
- int64_t coalesced_start = itr->offset;
- int64_t prev_range_end = coalesced_start;
-
- for (; itr < ranges.end(); ++itr) {
- const int64_t current_range_start = itr->offset;
- const int64_t current_range_end = current_range_start + itr->length;
- // We don't expect to have 0 sized ranges.
- DCHECK_LT(current_range_start, current_range_end);
-
- // At this point, the coalesced range is [coalesced_start, prev_range_end).
- // Stop coalescing if:
- // - coalesced range is too large, or
- // - distance (hole/gap) between consecutive ranges is too large.
- if (current_range_end - coalesced_start > range_size_limit_ ||
- current_range_start - prev_range_end > hole_size_limit_) {
- DCHECK_LE(coalesced_start, prev_range_end);
- // Append the coalesced range only if coalesced range size > 0.
- if (prev_range_end > coalesced_start) {
- coalesced.push_back({coalesced_start, prev_range_end - coalesced_start});
- }
- // Start a new coalesced range.
- coalesced_start = current_range_start;
- }
-
- // Update the prev_range_end with the current range.
- prev_range_end = current_range_end;
- }
- // Append the coalesced range only if coalesced range size > 0.
- if (prev_range_end > coalesced_start) {
- coalesced.push_back({coalesced_start, prev_range_end - coalesced_start});
- }
-
- DCHECK_EQ(coalesced.front().offset, ranges.front().offset);
- DCHECK_EQ(coalesced.back().offset + coalesced.back().length,
- ranges.back().offset + ranges.back().length);
- return coalesced;
- }
-
- const int64_t hole_size_limit_;
- const int64_t range_size_limit_;
-};
-
-}; // namespace
-
-std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
- int64_t hole_size_limit,
- int64_t range_size_limit) {
- DCHECK_GT(range_size_limit, hole_size_limit);
-
- ReadRangeCombiner combiner{hole_size_limit, range_size_limit};
- return combiner.Coalesce(std::move(ranges));
-}
-
-} // namespace internal
-} // namespace io
-} // namespace arrow
+
+ // Skip further processing if ranges is empty after removing zero-sized ranges.
+ if (ranges.empty()) {
+ return ranges;
+ }
+
+#ifndef NDEBUG
+ for (size_t i = 0; i < ranges.size() - 1; ++i) {
+ const auto& left = ranges[i];
+ const auto& right = ranges[i + 1];
+ DCHECK_LE(left.offset, right.offset);
+ DCHECK_LE(left.offset + left.length, right.offset) << "Some read ranges overlap";
+ }
+#endif
+
+ std::vector<ReadRange> coalesced;
+
+ auto itr = ranges.begin();
+ // Ensure ranges is not empty.
+ DCHECK_LE(itr, ranges.end());
+ // Start of the current coalesced range and end (exclusive) of previous range.
+ // Both are initialized with the start of first range which is a placeholder value.
+ int64_t coalesced_start = itr->offset;
+ int64_t prev_range_end = coalesced_start;
+
+ for (; itr < ranges.end(); ++itr) {
+ const int64_t current_range_start = itr->offset;
+ const int64_t current_range_end = current_range_start + itr->length;
+ // We don't expect to have 0 sized ranges.
+ DCHECK_LT(current_range_start, current_range_end);
+
+ // At this point, the coalesced range is [coalesced_start, prev_range_end).
+ // Stop coalescing if:
+ // - coalesced range is too large, or
+ // - distance (hole/gap) between consecutive ranges is too large.
+ if (current_range_end - coalesced_start > range_size_limit_ ||
+ current_range_start - prev_range_end > hole_size_limit_) {
+ DCHECK_LE(coalesced_start, prev_range_end);
+ // Append the coalesced range only if coalesced range size > 0.
+ if (prev_range_end > coalesced_start) {
+ coalesced.push_back({coalesced_start, prev_range_end - coalesced_start});
+ }
+ // Start a new coalesced range.
+ coalesced_start = current_range_start;
+ }
+
+ // Update the prev_range_end with the current range.
+ prev_range_end = current_range_end;
+ }
+ // Append the coalesced range only if coalesced range size > 0.
+ if (prev_range_end > coalesced_start) {
+ coalesced.push_back({coalesced_start, prev_range_end - coalesced_start});
+ }
+
+ DCHECK_EQ(coalesced.front().offset, ranges.front().offset);
+ DCHECK_EQ(coalesced.back().offset + coalesced.back().length,
+ ranges.back().offset + ranges.back().length);
+ return coalesced;
+ }
+
+ const int64_t hole_size_limit_;
+ const int64_t range_size_limit_;
+};
+
+}; // namespace
+
+std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
+ int64_t hole_size_limit,
+ int64_t range_size_limit) {
+ DCHECK_GT(range_size_limit, hole_size_limit);
+
+ ReadRangeCombiner combiner{hole_size_limit, range_size_limit};
+ return combiner.Coalesce(std::move(ranges));
+}
+
+} // namespace internal
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
index e524afa99a3..01c43f6dfe0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
@@ -1,54 +1,54 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/io/type_fwd.h"
-#include "arrow/type_fwd.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/io/type_fwd.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/cancel.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace io {
-
-struct ReadRange {
- int64_t offset;
- int64_t length;
-
- friend bool operator==(const ReadRange& left, const ReadRange& right) {
- return (left.offset == right.offset && left.length == right.length);
- }
- friend bool operator!=(const ReadRange& left, const ReadRange& right) {
- return !(left == right);
- }
-
- bool Contains(const ReadRange& other) const {
- return (offset <= other.offset && offset + length >= other.offset + other.length);
- }
-};
-
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+struct ReadRange {
+ int64_t offset;
+ int64_t length;
+
+ friend bool operator==(const ReadRange& left, const ReadRange& right) {
+ return (left.offset == right.offset && left.length == right.length);
+ }
+ friend bool operator!=(const ReadRange& left, const ReadRange& right) {
+ return !(left == right);
+ }
+
+ bool Contains(const ReadRange& other) const {
+ return (offset <= other.offset && offset + length >= other.offset + other.length);
+ }
+};
+
/// EXPERIMENTAL: options provider for IO tasks
///
/// Includes an Executor (which will be used to execute asynchronous reads),
@@ -84,9 +84,9 @@ struct ARROW_EXPORT IOContext {
::arrow::internal::Executor* executor() const { return executor_; }
- // An application-specific ID, forwarded to executor task submissions
+ // An application-specific ID, forwarded to executor task submissions
int64_t external_id() const { return external_id_; }
-
+
StopToken stop_token() const { return stop_token_; }
private:
@@ -94,139 +94,139 @@ struct ARROW_EXPORT IOContext {
::arrow::internal::Executor* executor_;
int64_t external_id_;
StopToken stop_token_;
-};
-
+};
+
struct ARROW_DEPRECATED("renamed to IOContext in 4.0.0") AsyncContext : public IOContext {
using IOContext::IOContext;
};
-class ARROW_EXPORT FileInterface {
- public:
- virtual ~FileInterface() = 0;
-
- /// \brief Close the stream cleanly
- ///
- /// For writable streams, this will attempt to flush any pending data
- /// before releasing the underlying resource.
- ///
- /// After Close() is called, closed() returns true and the stream is not
- /// available for further operations.
- virtual Status Close() = 0;
-
- /// \brief Close the stream abruptly
- ///
- /// This method does not guarantee that any pending data is flushed.
- /// It merely releases any underlying resource used by the stream for
- /// its operation.
- ///
- /// After Abort() is called, closed() returns true and the stream is not
- /// available for further operations.
- virtual Status Abort();
-
- /// \brief Return the position in this stream
- virtual Result<int64_t> Tell() const = 0;
-
- /// \brief Return whether the stream is closed
- virtual bool closed() const = 0;
-
- FileMode::type mode() const { return mode_; }
-
- protected:
- FileInterface() : mode_(FileMode::READ) {}
- FileMode::type mode_;
- void set_mode(FileMode::type mode) { mode_ = mode; }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
-};
-
-class ARROW_EXPORT Seekable {
- public:
- virtual ~Seekable() = default;
- virtual Status Seek(int64_t position) = 0;
-};
-
-class ARROW_EXPORT Writable {
- public:
- virtual ~Writable() = default;
-
- /// \brief Write the given data to the stream
- ///
- /// This method always processes the bytes in full. Depending on the
- /// semantics of the stream, the data may be written out immediately,
- /// held in a buffer, or written asynchronously. In the case where
- /// the stream buffers the data, it will be copied. To avoid potentially
- /// large copies, use the Write variant that takes an owned Buffer.
- virtual Status Write(const void* data, int64_t nbytes) = 0;
-
- /// \brief Write the given data to the stream
- ///
- /// Since the Buffer owns its memory, this method can avoid a copy if
- /// buffering is required. See Write(const void*, int64_t) for details.
- virtual Status Write(const std::shared_ptr<Buffer>& data);
-
- /// \brief Flush buffered bytes, if any
- virtual Status Flush();
-
+class ARROW_EXPORT FileInterface {
+ public:
+ virtual ~FileInterface() = 0;
+
+ /// \brief Close the stream cleanly
+ ///
+ /// For writable streams, this will attempt to flush any pending data
+ /// before releasing the underlying resource.
+ ///
+ /// After Close() is called, closed() returns true and the stream is not
+ /// available for further operations.
+ virtual Status Close() = 0;
+
+ /// \brief Close the stream abruptly
+ ///
+ /// This method does not guarantee that any pending data is flushed.
+ /// It merely releases any underlying resource used by the stream for
+ /// its operation.
+ ///
+ /// After Abort() is called, closed() returns true and the stream is not
+ /// available for further operations.
+ virtual Status Abort();
+
+ /// \brief Return the position in this stream
+ virtual Result<int64_t> Tell() const = 0;
+
+ /// \brief Return whether the stream is closed
+ virtual bool closed() const = 0;
+
+ FileMode::type mode() const { return mode_; }
+
+ protected:
+ FileInterface() : mode_(FileMode::READ) {}
+ FileMode::type mode_;
+ void set_mode(FileMode::type mode) { mode_ = mode; }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
+};
+
+class ARROW_EXPORT Seekable {
+ public:
+ virtual ~Seekable() = default;
+ virtual Status Seek(int64_t position) = 0;
+};
+
+class ARROW_EXPORT Writable {
+ public:
+ virtual ~Writable() = default;
+
+ /// \brief Write the given data to the stream
+ ///
+ /// This method always processes the bytes in full. Depending on the
+ /// semantics of the stream, the data may be written out immediately,
+ /// held in a buffer, or written asynchronously. In the case where
+ /// the stream buffers the data, it will be copied. To avoid potentially
+ /// large copies, use the Write variant that takes an owned Buffer.
+ virtual Status Write(const void* data, int64_t nbytes) = 0;
+
+ /// \brief Write the given data to the stream
+ ///
+ /// Since the Buffer owns its memory, this method can avoid a copy if
+ /// buffering is required. See Write(const void*, int64_t) for details.
+ virtual Status Write(const std::shared_ptr<Buffer>& data);
+
+ /// \brief Flush buffered bytes, if any
+ virtual Status Flush();
+
Status Write(util::string_view data);
-};
-
-class ARROW_EXPORT Readable {
- public:
- virtual ~Readable() = default;
-
- /// \brief Read data from current file position.
- ///
- /// Read at most `nbytes` from the current file position into `out`.
- /// The number of bytes read is returned.
- virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
-
- /// \brief Read data from current file position.
- ///
- /// Read at most `nbytes` from the current file position. Less bytes may
- /// be read if EOF is reached. This method updates the current file position.
- ///
- /// In some cases (e.g. a memory-mapped file), this method may avoid a
- /// memory copy.
- virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
+};
+
+class ARROW_EXPORT Readable {
+ public:
+ virtual ~Readable() = default;
+
+ /// \brief Read data from current file position.
+ ///
+ /// Read at most `nbytes` from the current file position into `out`.
+ /// The number of bytes read is returned.
+ virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
+
+ /// \brief Read data from current file position.
+ ///
+ /// Read at most `nbytes` from the current file position. Less bytes may
+ /// be read if EOF is reached. This method updates the current file position.
+ ///
+ /// In some cases (e.g. a memory-mapped file), this method may avoid a
+ /// memory copy.
+ virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
/// EXPERIMENTAL: The IOContext associated with this file.
///
/// By default, this is the same as default_io_context(), but it may be
/// overriden by subclasses.
virtual const IOContext& io_context() const;
-};
-
-class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
- protected:
- OutputStream() = default;
-};
-
+};
+
+class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
+ protected:
+ OutputStream() = default;
+};
+
class ARROW_EXPORT InputStream : virtual public FileInterface,
virtual public Readable,
public std::enable_shared_from_this<InputStream> {
- public:
- /// \brief Advance or skip stream indicated number of bytes
- /// \param[in] nbytes the number to move forward
- /// \return Status
- Status Advance(int64_t nbytes);
-
- /// \brief Return zero-copy string_view to upcoming bytes.
- ///
- /// Do not modify the stream position. The view becomes invalid after
- /// any operation on the stream. May trigger buffering if the requested
- /// size is larger than the number of buffered bytes.
- ///
- /// May return NotImplemented on streams that don't support it.
- ///
- /// \param[in] nbytes the maximum number of bytes to see
- virtual Result<util::string_view> Peek(int64_t nbytes);
-
- /// \brief Return true if InputStream is capable of zero copy Buffer reads
- ///
- /// Zero copy reads imply the use of Buffer-returning Read() overloads.
- virtual bool supports_zero_copy() const;
-
+ public:
+ /// \brief Advance or skip stream indicated number of bytes
+ /// \param[in] nbytes the number to move forward
+ /// \return Status
+ Status Advance(int64_t nbytes);
+
+ /// \brief Return zero-copy string_view to upcoming bytes.
+ ///
+ /// Do not modify the stream position. The view becomes invalid after
+ /// any operation on the stream. May trigger buffering if the requested
+ /// size is larger than the number of buffered bytes.
+ ///
+ /// May return NotImplemented on streams that don't support it.
+ ///
+ /// \param[in] nbytes the maximum number of bytes to see
+ virtual Result<util::string_view> Peek(int64_t nbytes);
+
+ /// \brief Return true if InputStream is capable of zero copy Buffer reads
+ ///
+ /// Zero copy reads imply the use of Buffer-returning Read() overloads.
+ virtual bool supports_zero_copy() const;
+
/// \brief Read and return stream metadata
///
/// If the stream implementation doesn't support metadata, empty metadata
@@ -239,102 +239,102 @@ class ARROW_EXPORT InputStream : virtual public FileInterface,
const IOContext& io_context);
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
- protected:
- InputStream() = default;
-};
-
+ protected:
+ InputStream() = default;
+};
+
class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
- public:
- /// Necessary because we hold a std::unique_ptr
- ~RandomAccessFile() override;
-
- /// \brief Create an isolated InputStream that reads a segment of a
- /// RandomAccessFile. Multiple such stream can be created and used
- /// independently without interference
- /// \param[in] file a file instance
- /// \param[in] file_offset the starting position in the file
- /// \param[in] nbytes the extent of bytes to read. The file should have
- /// sufficient bytes available
- static std::shared_ptr<InputStream> GetStream(std::shared_ptr<RandomAccessFile> file,
- int64_t file_offset, int64_t nbytes);
-
- /// \brief Return the total file size in bytes.
- ///
- /// This method does not read or move the current file position, so is safe
- /// to call concurrently with e.g. ReadAt().
- virtual Result<int64_t> GetSize() = 0;
-
- /// \brief Read data from given file position.
- ///
- /// At most `nbytes` bytes are read. The number of bytes read is returned
- /// (it can be less than `nbytes` if EOF is reached).
- ///
- /// This method can be safely called from multiple threads concurrently.
- /// It is unspecified whether this method updates the file position or not.
- ///
- /// The default RandomAccessFile-provided implementation uses Seek() and Read(),
- /// but subclasses may override it with a more efficient implementation
- /// that doesn't depend on implicit file positioning.
- ///
- /// \param[in] position Where to read bytes from
- /// \param[in] nbytes The number of bytes to read
- /// \param[out] out The buffer to read bytes into
- /// \return The number of bytes read, or an error
- virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
-
- /// \brief Read data from given file position.
- ///
- /// At most `nbytes` bytes are read, but it can be less if EOF is reached.
- ///
- /// \param[in] position Where to read bytes from
- /// \param[in] nbytes The number of bytes to read
- /// \return A buffer containing the bytes read, or an error
- virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
-
- /// EXPERIMENTAL: Read data asynchronously.
+ public:
+ /// Necessary because we hold a std::unique_ptr
+ ~RandomAccessFile() override;
+
+ /// \brief Create an isolated InputStream that reads a segment of a
+ /// RandomAccessFile. Multiple such stream can be created and used
+ /// independently without interference
+ /// \param[in] file a file instance
+ /// \param[in] file_offset the starting position in the file
+ /// \param[in] nbytes the extent of bytes to read. The file should have
+ /// sufficient bytes available
+ static std::shared_ptr<InputStream> GetStream(std::shared_ptr<RandomAccessFile> file,
+ int64_t file_offset, int64_t nbytes);
+
+ /// \brief Return the total file size in bytes.
+ ///
+ /// This method does not read or move the current file position, so is safe
+ /// to call concurrently with e.g. ReadAt().
+ virtual Result<int64_t> GetSize() = 0;
+
+ /// \brief Read data from given file position.
+ ///
+ /// At most `nbytes` bytes are read. The number of bytes read is returned
+ /// (it can be less than `nbytes` if EOF is reached).
+ ///
+ /// This method can be safely called from multiple threads concurrently.
+ /// It is unspecified whether this method updates the file position or not.
+ ///
+ /// The default RandomAccessFile-provided implementation uses Seek() and Read(),
+ /// but subclasses may override it with a more efficient implementation
+ /// that doesn't depend on implicit file positioning.
+ ///
+ /// \param[in] position Where to read bytes from
+ /// \param[in] nbytes The number of bytes to read
+ /// \param[out] out The buffer to read bytes into
+ /// \return The number of bytes read, or an error
+ virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
+
+ /// \brief Read data from given file position.
+ ///
+ /// At most `nbytes` bytes are read, but it can be less if EOF is reached.
+ ///
+ /// \param[in] position Where to read bytes from
+ /// \param[in] nbytes The number of bytes to read
+ /// \return A buffer containing the bytes read, or an error
+ virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
+
+ /// EXPERIMENTAL: Read data asynchronously.
virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
- int64_t nbytes);
-
+ int64_t nbytes);
+
/// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
- /// EXPERIMENTAL: Inform that the given ranges may be read soon.
- ///
- /// Some implementations might arrange to prefetch some of the data.
- /// However, no guarantee is made and the default implementation does nothing.
- /// For robust prefetching, use ReadAt() or ReadAsync().
- virtual Status WillNeed(const std::vector<ReadRange>& ranges);
-
- protected:
- RandomAccessFile();
-
- private:
+ /// EXPERIMENTAL: Inform that the given ranges may be read soon.
+ ///
+ /// Some implementations might arrange to prefetch some of the data.
+ /// However, no guarantee is made and the default implementation does nothing.
+ /// For robust prefetching, use ReadAt() or ReadAsync().
+ virtual Status WillNeed(const std::vector<ReadRange>& ranges);
+
+ protected:
+ RandomAccessFile();
+
+ private:
struct ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> interface_impl_;
-};
-
-class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
- public:
- virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
-
- protected:
- WritableFile() = default;
-};
-
-class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
- protected:
- ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
-};
-
-/// \brief Return an iterator on an input stream
-///
-/// The iterator yields a fixed-size block on each Next() call, except the
-/// last block in the stream which may be smaller.
-/// Once the end of stream is reached, Next() returns nullptr
-/// (unlike InputStream::Read() which returns an empty buffer).
-ARROW_EXPORT
-Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
- std::shared_ptr<InputStream> stream, int64_t block_size);
-
-} // namespace io
-} // namespace arrow
+};
+
+class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
+ public:
+ virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
+
+ protected:
+ WritableFile() = default;
+};
+
+class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
+ protected:
+ ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
+};
+
+/// \brief Return an iterator on an input stream
+///
+/// The iterator yields a fixed-size block on each Next() call, except the
+/// last block in the stream which may be smaller.
+/// Once the end of stream is reached, Next() returns nullptr
+/// (unlike InputStream::Read() which returns an empty buffer).
+ARROW_EXPORT
+Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
+ std::shared_ptr<InputStream> stream, int64_t block_size);
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
index 6495242e63b..258c6b1403f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
@@ -1,388 +1,388 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/memory.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <mutex>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/util/future.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/memory.h"
-
-namespace arrow {
-namespace io {
-
-// ----------------------------------------------------------------------
-// OutputStream that writes to resizable buffer
-
-static constexpr int64_t kBufferMinimumSize = 256;
-
-BufferOutputStream::BufferOutputStream()
- : is_open_(false), capacity_(0), position_(0), mutable_data_(nullptr) {}
-
-BufferOutputStream::BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer)
- : buffer_(buffer),
- is_open_(true),
- capacity_(buffer->size()),
- position_(0),
- mutable_data_(buffer->mutable_data()) {}
-
-Result<std::shared_ptr<BufferOutputStream>> BufferOutputStream::Create(
- int64_t initial_capacity, MemoryPool* pool) {
- // ctor is private, so cannot use make_shared
- auto ptr = std::shared_ptr<BufferOutputStream>(new BufferOutputStream);
- RETURN_NOT_OK(ptr->Reset(initial_capacity, pool));
- return ptr;
-}
-
-Status BufferOutputStream::Reset(int64_t initial_capacity, MemoryPool* pool) {
- ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(initial_capacity, pool));
- is_open_ = true;
- capacity_ = initial_capacity;
- position_ = 0;
- mutable_data_ = buffer_->mutable_data();
- return Status::OK();
-}
-
-BufferOutputStream::~BufferOutputStream() {
- if (buffer_) {
- internal::CloseFromDestructor(this);
- }
-}
-
-Status BufferOutputStream::Close() {
- if (is_open_) {
- is_open_ = false;
- if (position_ < capacity_) {
- RETURN_NOT_OK(buffer_->Resize(position_, false));
- }
- }
- return Status::OK();
-}
-
-bool BufferOutputStream::closed() const { return !is_open_; }
-
-Result<std::shared_ptr<Buffer>> BufferOutputStream::Finish() {
- RETURN_NOT_OK(Close());
- buffer_->ZeroPadding();
- is_open_ = false;
- return std::move(buffer_);
-}
-
-Result<int64_t> BufferOutputStream::Tell() const { return position_; }
-
-Status BufferOutputStream::Write(const void* data, int64_t nbytes) {
- if (ARROW_PREDICT_FALSE(!is_open_)) {
- return Status::IOError("OutputStream is closed");
- }
- DCHECK(buffer_);
- if (ARROW_PREDICT_TRUE(nbytes > 0)) {
- if (ARROW_PREDICT_FALSE(position_ + nbytes >= capacity_)) {
- RETURN_NOT_OK(Reserve(nbytes));
- }
- memcpy(mutable_data_ + position_, data, nbytes);
- position_ += nbytes;
- }
- return Status::OK();
-}
-
-Status BufferOutputStream::Reserve(int64_t nbytes) {
- // Always overallocate by doubling. It seems that it is a better growth
- // strategy, at least for memory_benchmark.cc.
- // This may be because it helps match the allocator's allocation buckets
- // more exactly. Or perhaps it hits a sweet spot in jemalloc.
- int64_t new_capacity = std::max(kBufferMinimumSize, capacity_);
- while (new_capacity < position_ + nbytes) {
- new_capacity = new_capacity * 2;
- }
- if (new_capacity > capacity_) {
- RETURN_NOT_OK(buffer_->Resize(new_capacity));
- capacity_ = new_capacity;
- mutable_data_ = buffer_->mutable_data();
- }
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// OutputStream that doesn't write anything
-
-Status MockOutputStream::Close() {
- is_open_ = false;
- return Status::OK();
-}
-
-bool MockOutputStream::closed() const { return !is_open_; }
-
-Result<int64_t> MockOutputStream::Tell() const { return extent_bytes_written_; }
-
-Status MockOutputStream::Write(const void* data, int64_t nbytes) {
- extent_bytes_written_ += nbytes;
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// In-memory buffer writer
-
-static constexpr int kMemcopyDefaultNumThreads = 1;
-static constexpr int64_t kMemcopyDefaultBlocksize = 64;
-static constexpr int64_t kMemcopyDefaultThreshold = 1024 * 1024;
-
-class FixedSizeBufferWriter::FixedSizeBufferWriterImpl {
- public:
- /// Input buffer must be mutable, will abort if not
-
- /// Input buffer must be mutable, will abort if not
- explicit FixedSizeBufferWriterImpl(const std::shared_ptr<Buffer>& buffer)
- : is_open_(true),
- memcopy_num_threads_(kMemcopyDefaultNumThreads),
- memcopy_blocksize_(kMemcopyDefaultBlocksize),
- memcopy_threshold_(kMemcopyDefaultThreshold) {
- buffer_ = buffer;
- ARROW_CHECK(buffer->is_mutable()) << "Must pass mutable buffer";
- mutable_data_ = buffer->mutable_data();
- size_ = buffer->size();
- position_ = 0;
- }
-
- Status Close() {
- is_open_ = false;
- return Status::OK();
- }
-
- bool closed() const { return !is_open_; }
-
- Status Seek(int64_t position) {
- if (position < 0 || position > size_) {
- return Status::IOError("Seek out of bounds");
- }
- position_ = position;
- return Status::OK();
- }
-
- Result<int64_t> Tell() { return position_; }
-
- Status Write(const void* data, int64_t nbytes) {
- RETURN_NOT_OK(internal::ValidateWriteRange(position_, nbytes, size_));
- if (nbytes > memcopy_threshold_ && memcopy_num_threads_ > 1) {
- ::arrow::internal::parallel_memcopy(mutable_data_ + position_,
- reinterpret_cast<const uint8_t*>(data), nbytes,
- memcopy_blocksize_, memcopy_num_threads_);
- } else {
- memcpy(mutable_data_ + position_, data, nbytes);
- }
- position_ += nbytes;
- return Status::OK();
- }
-
- Status WriteAt(int64_t position, const void* data, int64_t nbytes) {
- std::lock_guard<std::mutex> guard(lock_);
- RETURN_NOT_OK(internal::ValidateWriteRange(position, nbytes, size_));
- RETURN_NOT_OK(Seek(position));
- return Write(data, nbytes);
- }
-
- void set_memcopy_threads(int num_threads) { memcopy_num_threads_ = num_threads; }
-
- void set_memcopy_blocksize(int64_t blocksize) { memcopy_blocksize_ = blocksize; }
-
- void set_memcopy_threshold(int64_t threshold) { memcopy_threshold_ = threshold; }
-
- private:
- std::mutex lock_;
- std::shared_ptr<Buffer> buffer_;
- uint8_t* mutable_data_;
- int64_t size_;
- int64_t position_;
- bool is_open_;
-
- int memcopy_num_threads_;
- int64_t memcopy_blocksize_;
- int64_t memcopy_threshold_;
-};
-
-FixedSizeBufferWriter::FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer)
- : impl_(new FixedSizeBufferWriterImpl(buffer)) {}
-
-FixedSizeBufferWriter::~FixedSizeBufferWriter() = default;
-
-Status FixedSizeBufferWriter::Close() { return impl_->Close(); }
-
-bool FixedSizeBufferWriter::closed() const { return impl_->closed(); }
-
-Status FixedSizeBufferWriter::Seek(int64_t position) { return impl_->Seek(position); }
-
-Result<int64_t> FixedSizeBufferWriter::Tell() const { return impl_->Tell(); }
-
-Status FixedSizeBufferWriter::Write(const void* data, int64_t nbytes) {
- return impl_->Write(data, nbytes);
-}
-
-Status FixedSizeBufferWriter::WriteAt(int64_t position, const void* data,
- int64_t nbytes) {
- return impl_->WriteAt(position, data, nbytes);
-}
-
-void FixedSizeBufferWriter::set_memcopy_threads(int num_threads) {
- impl_->set_memcopy_threads(num_threads);
-}
-
-void FixedSizeBufferWriter::set_memcopy_blocksize(int64_t blocksize) {
- impl_->set_memcopy_blocksize(blocksize);
-}
-
-void FixedSizeBufferWriter::set_memcopy_threshold(int64_t threshold) {
- impl_->set_memcopy_threshold(threshold);
-}
-
-// ----------------------------------------------------------------------
-// In-memory buffer reader
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/memory.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <mutex>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/future.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/memory.h"
+
+namespace arrow {
+namespace io {
+
+// ----------------------------------------------------------------------
+// OutputStream that writes to resizable buffer
+
+static constexpr int64_t kBufferMinimumSize = 256;
+
+BufferOutputStream::BufferOutputStream()
+ : is_open_(false), capacity_(0), position_(0), mutable_data_(nullptr) {}
+
+BufferOutputStream::BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer)
+ : buffer_(buffer),
+ is_open_(true),
+ capacity_(buffer->size()),
+ position_(0),
+ mutable_data_(buffer->mutable_data()) {}
+
+Result<std::shared_ptr<BufferOutputStream>> BufferOutputStream::Create(
+ int64_t initial_capacity, MemoryPool* pool) {
+ // ctor is private, so cannot use make_shared
+ auto ptr = std::shared_ptr<BufferOutputStream>(new BufferOutputStream);
+ RETURN_NOT_OK(ptr->Reset(initial_capacity, pool));
+ return ptr;
+}
+
+Status BufferOutputStream::Reset(int64_t initial_capacity, MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(initial_capacity, pool));
+ is_open_ = true;
+ capacity_ = initial_capacity;
+ position_ = 0;
+ mutable_data_ = buffer_->mutable_data();
+ return Status::OK();
+}
+
+BufferOutputStream::~BufferOutputStream() {
+ if (buffer_) {
+ internal::CloseFromDestructor(this);
+ }
+}
+
+Status BufferOutputStream::Close() {
+ if (is_open_) {
+ is_open_ = false;
+ if (position_ < capacity_) {
+ RETURN_NOT_OK(buffer_->Resize(position_, false));
+ }
+ }
+ return Status::OK();
+}
+
+bool BufferOutputStream::closed() const { return !is_open_; }
+
+Result<std::shared_ptr<Buffer>> BufferOutputStream::Finish() {
+ RETURN_NOT_OK(Close());
+ buffer_->ZeroPadding();
+ is_open_ = false;
+ return std::move(buffer_);
+}
+
+Result<int64_t> BufferOutputStream::Tell() const { return position_; }
+
+Status BufferOutputStream::Write(const void* data, int64_t nbytes) {
+ if (ARROW_PREDICT_FALSE(!is_open_)) {
+ return Status::IOError("OutputStream is closed");
+ }
+ DCHECK(buffer_);
+ if (ARROW_PREDICT_TRUE(nbytes > 0)) {
+ if (ARROW_PREDICT_FALSE(position_ + nbytes >= capacity_)) {
+ RETURN_NOT_OK(Reserve(nbytes));
+ }
+ memcpy(mutable_data_ + position_, data, nbytes);
+ position_ += nbytes;
+ }
+ return Status::OK();
+}
+
+Status BufferOutputStream::Reserve(int64_t nbytes) {
+ // Always overallocate by doubling. It seems that it is a better growth
+ // strategy, at least for memory_benchmark.cc.
+ // This may be because it helps match the allocator's allocation buckets
+ // more exactly. Or perhaps it hits a sweet spot in jemalloc.
+ int64_t new_capacity = std::max(kBufferMinimumSize, capacity_);
+ while (new_capacity < position_ + nbytes) {
+ new_capacity = new_capacity * 2;
+ }
+ if (new_capacity > capacity_) {
+ RETURN_NOT_OK(buffer_->Resize(new_capacity));
+ capacity_ = new_capacity;
+ mutable_data_ = buffer_->mutable_data();
+ }
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// OutputStream that doesn't write anything
+
+Status MockOutputStream::Close() {
+ is_open_ = false;
+ return Status::OK();
+}
+
+bool MockOutputStream::closed() const { return !is_open_; }
+
+Result<int64_t> MockOutputStream::Tell() const { return extent_bytes_written_; }
+
+Status MockOutputStream::Write(const void* data, int64_t nbytes) {
+ extent_bytes_written_ += nbytes;
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// In-memory buffer writer
+
+static constexpr int kMemcopyDefaultNumThreads = 1;
+static constexpr int64_t kMemcopyDefaultBlocksize = 64;
+static constexpr int64_t kMemcopyDefaultThreshold = 1024 * 1024;
+
+class FixedSizeBufferWriter::FixedSizeBufferWriterImpl {
+ public:
+ /// Input buffer must be mutable, will abort if not
+
+ /// Input buffer must be mutable, will abort if not
+ explicit FixedSizeBufferWriterImpl(const std::shared_ptr<Buffer>& buffer)
+ : is_open_(true),
+ memcopy_num_threads_(kMemcopyDefaultNumThreads),
+ memcopy_blocksize_(kMemcopyDefaultBlocksize),
+ memcopy_threshold_(kMemcopyDefaultThreshold) {
+ buffer_ = buffer;
+ ARROW_CHECK(buffer->is_mutable()) << "Must pass mutable buffer";
+ mutable_data_ = buffer->mutable_data();
+ size_ = buffer->size();
+ position_ = 0;
+ }
+
+ Status Close() {
+ is_open_ = false;
+ return Status::OK();
+ }
+
+ bool closed() const { return !is_open_; }
+
+ Status Seek(int64_t position) {
+ if (position < 0 || position > size_) {
+ return Status::IOError("Seek out of bounds");
+ }
+ position_ = position;
+ return Status::OK();
+ }
+
+ Result<int64_t> Tell() { return position_; }
+
+ Status Write(const void* data, int64_t nbytes) {
+ RETURN_NOT_OK(internal::ValidateWriteRange(position_, nbytes, size_));
+ if (nbytes > memcopy_threshold_ && memcopy_num_threads_ > 1) {
+ ::arrow::internal::parallel_memcopy(mutable_data_ + position_,
+ reinterpret_cast<const uint8_t*>(data), nbytes,
+ memcopy_blocksize_, memcopy_num_threads_);
+ } else {
+ memcpy(mutable_data_ + position_, data, nbytes);
+ }
+ position_ += nbytes;
+ return Status::OK();
+ }
+
+ Status WriteAt(int64_t position, const void* data, int64_t nbytes) {
+ std::lock_guard<std::mutex> guard(lock_);
+ RETURN_NOT_OK(internal::ValidateWriteRange(position, nbytes, size_));
+ RETURN_NOT_OK(Seek(position));
+ return Write(data, nbytes);
+ }
+
+ void set_memcopy_threads(int num_threads) { memcopy_num_threads_ = num_threads; }
+
+ void set_memcopy_blocksize(int64_t blocksize) { memcopy_blocksize_ = blocksize; }
+
+ void set_memcopy_threshold(int64_t threshold) { memcopy_threshold_ = threshold; }
+
+ private:
+ std::mutex lock_;
+ std::shared_ptr<Buffer> buffer_;
+ uint8_t* mutable_data_;
+ int64_t size_;
+ int64_t position_;
+ bool is_open_;
+
+ int memcopy_num_threads_;
+ int64_t memcopy_blocksize_;
+ int64_t memcopy_threshold_;
+};
+
+FixedSizeBufferWriter::FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer)
+ : impl_(new FixedSizeBufferWriterImpl(buffer)) {}
+
+FixedSizeBufferWriter::~FixedSizeBufferWriter() = default;
+
+Status FixedSizeBufferWriter::Close() { return impl_->Close(); }
+
+bool FixedSizeBufferWriter::closed() const { return impl_->closed(); }
+
+Status FixedSizeBufferWriter::Seek(int64_t position) { return impl_->Seek(position); }
+
+Result<int64_t> FixedSizeBufferWriter::Tell() const { return impl_->Tell(); }
+
+Status FixedSizeBufferWriter::Write(const void* data, int64_t nbytes) {
+ return impl_->Write(data, nbytes);
+}
+
+Status FixedSizeBufferWriter::WriteAt(int64_t position, const void* data,
+ int64_t nbytes) {
+ return impl_->WriteAt(position, data, nbytes);
+}
+
+void FixedSizeBufferWriter::set_memcopy_threads(int num_threads) {
+ impl_->set_memcopy_threads(num_threads);
+}
+
+void FixedSizeBufferWriter::set_memcopy_blocksize(int64_t blocksize) {
+ impl_->set_memcopy_blocksize(blocksize);
+}
+
+void FixedSizeBufferWriter::set_memcopy_threshold(int64_t threshold) {
+ impl_->set_memcopy_threshold(threshold);
+}
+
+// ----------------------------------------------------------------------
+// In-memory buffer reader
+
BufferReader::BufferReader(std::shared_ptr<Buffer> buffer)
: buffer_(std::move(buffer)),
data_(buffer_ ? buffer_->data() : reinterpret_cast<const uint8_t*>("")),
size_(buffer_ ? buffer_->size() : 0),
- position_(0),
- is_open_(true) {}
-
-BufferReader::BufferReader(const uint8_t* data, int64_t size)
- : buffer_(nullptr), data_(data), size_(size), position_(0), is_open_(true) {}
-
-BufferReader::BufferReader(const Buffer& buffer)
- : BufferReader(buffer.data(), buffer.size()) {}
-
-BufferReader::BufferReader(const util::string_view& data)
- : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
- static_cast<int64_t>(data.size())) {}
-
-Status BufferReader::DoClose() {
- is_open_ = false;
- return Status::OK();
-}
-
-bool BufferReader::closed() const { return !is_open_; }
-
-Result<int64_t> BufferReader::DoTell() const {
- RETURN_NOT_OK(CheckClosed());
- return position_;
-}
-
-Result<util::string_view> BufferReader::DoPeek(int64_t nbytes) {
- RETURN_NOT_OK(CheckClosed());
-
- const int64_t bytes_available = std::min(nbytes, size_ - position_);
- return util::string_view(reinterpret_cast<const char*>(data_) + position_,
- static_cast<size_t>(bytes_available));
-}
-
-bool BufferReader::supports_zero_copy() const { return true; }
-
-Status BufferReader::WillNeed(const std::vector<ReadRange>& ranges) {
- using ::arrow::internal::MemoryRegion;
-
- RETURN_NOT_OK(CheckClosed());
-
- std::vector<MemoryRegion> regions(ranges.size());
- for (size_t i = 0; i < ranges.size(); ++i) {
- const auto& range = ranges[i];
- ARROW_ASSIGN_OR_RAISE(auto size,
- internal::ValidateReadRange(range.offset, range.length, size_));
- regions[i] = {const_cast<uint8_t*>(data_ + range.offset), static_cast<size_t>(size)};
- }
- const auto st = ::arrow::internal::MemoryAdviseWillNeed(regions);
- if (st.IsIOError()) {
- // Ignore any system-level errors, in case the memory area isn't madvise()-able
- return Status::OK();
- }
- return st;
-}
-
+ position_(0),
+ is_open_(true) {}
+
+BufferReader::BufferReader(const uint8_t* data, int64_t size)
+ : buffer_(nullptr), data_(data), size_(size), position_(0), is_open_(true) {}
+
+BufferReader::BufferReader(const Buffer& buffer)
+ : BufferReader(buffer.data(), buffer.size()) {}
+
+BufferReader::BufferReader(const util::string_view& data)
+ : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
+ static_cast<int64_t>(data.size())) {}
+
+Status BufferReader::DoClose() {
+ is_open_ = false;
+ return Status::OK();
+}
+
+bool BufferReader::closed() const { return !is_open_; }
+
+Result<int64_t> BufferReader::DoTell() const {
+ RETURN_NOT_OK(CheckClosed());
+ return position_;
+}
+
+Result<util::string_view> BufferReader::DoPeek(int64_t nbytes) {
+ RETURN_NOT_OK(CheckClosed());
+
+ const int64_t bytes_available = std::min(nbytes, size_ - position_);
+ return util::string_view(reinterpret_cast<const char*>(data_) + position_,
+ static_cast<size_t>(bytes_available));
+}
+
+bool BufferReader::supports_zero_copy() const { return true; }
+
+Status BufferReader::WillNeed(const std::vector<ReadRange>& ranges) {
+ using ::arrow::internal::MemoryRegion;
+
+ RETURN_NOT_OK(CheckClosed());
+
+ std::vector<MemoryRegion> regions(ranges.size());
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ const auto& range = ranges[i];
+ ARROW_ASSIGN_OR_RAISE(auto size,
+ internal::ValidateReadRange(range.offset, range.length, size_));
+ regions[i] = {const_cast<uint8_t*>(data_ + range.offset), static_cast<size_t>(size)};
+ }
+ const auto st = ::arrow::internal::MemoryAdviseWillNeed(regions);
+ if (st.IsIOError()) {
+ // Ignore any system-level errors, in case the memory area isn't madvise()-able
+ return Status::OK();
+ }
+ return st;
+}
+
Future<std::shared_ptr<Buffer>> BufferReader::ReadAsync(const IOContext&,
- int64_t position,
- int64_t nbytes) {
- return Future<std::shared_ptr<Buffer>>::MakeFinished(DoReadAt(position, nbytes));
-}
-
-Result<int64_t> BufferReader::DoReadAt(int64_t position, int64_t nbytes, void* buffer) {
- RETURN_NOT_OK(CheckClosed());
-
- ARROW_ASSIGN_OR_RAISE(nbytes, internal::ValidateReadRange(position, nbytes, size_));
- DCHECK_GE(nbytes, 0);
- if (nbytes) {
- memcpy(buffer, data_ + position, nbytes);
- }
- return nbytes;
-}
-
-Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t nbytes) {
- RETURN_NOT_OK(CheckClosed());
-
- ARROW_ASSIGN_OR_RAISE(nbytes, internal::ValidateReadRange(position, nbytes, size_));
- DCHECK_GE(nbytes, 0);
-
- // Arrange for data to be paged in
+ int64_t position,
+ int64_t nbytes) {
+ return Future<std::shared_ptr<Buffer>>::MakeFinished(DoReadAt(position, nbytes));
+}
+
+Result<int64_t> BufferReader::DoReadAt(int64_t position, int64_t nbytes, void* buffer) {
+ RETURN_NOT_OK(CheckClosed());
+
+ ARROW_ASSIGN_OR_RAISE(nbytes, internal::ValidateReadRange(position, nbytes, size_));
+ DCHECK_GE(nbytes, 0);
+ if (nbytes) {
+ memcpy(buffer, data_ + position, nbytes);
+ }
+ return nbytes;
+}
+
+Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t nbytes) {
+ RETURN_NOT_OK(CheckClosed());
+
+ ARROW_ASSIGN_OR_RAISE(nbytes, internal::ValidateReadRange(position, nbytes, size_));
+ DCHECK_GE(nbytes, 0);
+
+ // Arrange for data to be paged in
// RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
// {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
-
- if (nbytes > 0 && buffer_ != nullptr) {
- return SliceBuffer(buffer_, position, nbytes);
- } else {
- return std::make_shared<Buffer>(data_ + position, nbytes);
- }
-}
-
-Result<int64_t> BufferReader::DoRead(int64_t nbytes, void* out) {
- RETURN_NOT_OK(CheckClosed());
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, DoReadAt(position_, nbytes, out));
- position_ += bytes_read;
- return bytes_read;
-}
-
-Result<std::shared_ptr<Buffer>> BufferReader::DoRead(int64_t nbytes) {
- RETURN_NOT_OK(CheckClosed());
- ARROW_ASSIGN_OR_RAISE(auto buffer, DoReadAt(position_, nbytes));
- position_ += buffer->size();
- return buffer;
-}
-
-Result<int64_t> BufferReader::DoGetSize() {
- RETURN_NOT_OK(CheckClosed());
- return size_;
-}
-
-Status BufferReader::DoSeek(int64_t position) {
- RETURN_NOT_OK(CheckClosed());
-
- if (position < 0 || position > size_) {
- return Status::IOError("Seek out of bounds");
- }
-
- position_ = position;
- return Status::OK();
-}
-
-} // namespace io
-} // namespace arrow
+
+ if (nbytes > 0 && buffer_ != nullptr) {
+ return SliceBuffer(buffer_, position, nbytes);
+ } else {
+ return std::make_shared<Buffer>(data_ + position, nbytes);
+ }
+}
+
+Result<int64_t> BufferReader::DoRead(int64_t nbytes, void* out) {
+ RETURN_NOT_OK(CheckClosed());
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, DoReadAt(position_, nbytes, out));
+ position_ += bytes_read;
+ return bytes_read;
+}
+
+Result<std::shared_ptr<Buffer>> BufferReader::DoRead(int64_t nbytes) {
+ RETURN_NOT_OK(CheckClosed());
+ ARROW_ASSIGN_OR_RAISE(auto buffer, DoReadAt(position_, nbytes));
+ position_ += buffer->size();
+ return buffer;
+}
+
+Result<int64_t> BufferReader::DoGetSize() {
+ RETURN_NOT_OK(CheckClosed());
+ return size_;
+}
+
+Status BufferReader::DoSeek(int64_t position) {
+ RETURN_NOT_OK(CheckClosed());
+
+ if (position < 0 || position > size_) {
+ return Status::IOError("Seek out of bounds");
+ }
+
+ position_ = position;
+ return Status::OK();
+}
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
index 8213439ef74..7d2f68797d0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
@@ -1,197 +1,197 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Public API for different memory sharing / IO mechanisms
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/io/concurrency.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Status;
-
-namespace io {
-
-/// \brief An output stream that writes to a resizable buffer
-class ARROW_EXPORT BufferOutputStream : public OutputStream {
- public:
- explicit BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer);
-
- /// \brief Create in-memory output stream with indicated capacity using a
- /// memory pool
- /// \param[in] initial_capacity the initial allocated internal capacity of
- /// the OutputStream
- /// \param[in,out] pool a MemoryPool to use for allocations
- /// \return the created stream
- static Result<std::shared_ptr<BufferOutputStream>> Create(
- int64_t initial_capacity = 4096, MemoryPool* pool = default_memory_pool());
-
- ~BufferOutputStream() override;
-
- // Implement the OutputStream interface
-
- /// Close the stream, preserving the buffer (retrieve it with Finish()).
- Status Close() override;
- bool closed() const override;
- Result<int64_t> Tell() const override;
- Status Write(const void* data, int64_t nbytes) override;
-
- /// \cond FALSE
- using OutputStream::Write;
- /// \endcond
-
- /// Close the stream and return the buffer
- Result<std::shared_ptr<Buffer>> Finish();
-
- /// \brief Initialize state of OutputStream with newly allocated memory and
- /// set position to 0
- /// \param[in] initial_capacity the starting allocated capacity
- /// \param[in,out] pool the memory pool to use for allocations
- /// \return Status
- Status Reset(int64_t initial_capacity = 1024, MemoryPool* pool = default_memory_pool());
-
- int64_t capacity() const { return capacity_; }
-
- private:
- BufferOutputStream();
-
- // Ensures there is sufficient space available to write nbytes
- Status Reserve(int64_t nbytes);
-
- std::shared_ptr<ResizableBuffer> buffer_;
- bool is_open_;
- int64_t capacity_;
- int64_t position_;
- uint8_t* mutable_data_;
-};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for different memory sharing / IO mechanisms
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/io/concurrency.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Status;
+
+namespace io {
+
+/// \brief An output stream that writes to a resizable buffer
+class ARROW_EXPORT BufferOutputStream : public OutputStream {
+ public:
+ explicit BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer);
+
+ /// \brief Create in-memory output stream with indicated capacity using a
+ /// memory pool
+ /// \param[in] initial_capacity the initial allocated internal capacity of
+ /// the OutputStream
+ /// \param[in,out] pool a MemoryPool to use for allocations
+ /// \return the created stream
+ static Result<std::shared_ptr<BufferOutputStream>> Create(
+ int64_t initial_capacity = 4096, MemoryPool* pool = default_memory_pool());
+
+ ~BufferOutputStream() override;
+
+ // Implement the OutputStream interface
+
+ /// Close the stream, preserving the buffer (retrieve it with Finish()).
+ Status Close() override;
+ bool closed() const override;
+ Result<int64_t> Tell() const override;
+ Status Write(const void* data, int64_t nbytes) override;
+
+ /// \cond FALSE
+ using OutputStream::Write;
+ /// \endcond
+
+ /// Close the stream and return the buffer
+ Result<std::shared_ptr<Buffer>> Finish();
+
+ /// \brief Initialize state of OutputStream with newly allocated memory and
+ /// set position to 0
+ /// \param[in] initial_capacity the starting allocated capacity
+ /// \param[in,out] pool the memory pool to use for allocations
+ /// \return Status
+ Status Reset(int64_t initial_capacity = 1024, MemoryPool* pool = default_memory_pool());
+
+ int64_t capacity() const { return capacity_; }
+
+ private:
+ BufferOutputStream();
+
+ // Ensures there is sufficient space available to write nbytes
+ Status Reserve(int64_t nbytes);
+
+ std::shared_ptr<ResizableBuffer> buffer_;
+ bool is_open_;
+ int64_t capacity_;
+ int64_t position_;
+ uint8_t* mutable_data_;
+};
+
/// \brief A helper class to track the size of allocations
-///
-/// Writes to this stream do not copy or retain any data, they just bump
-/// a size counter that can be later used to know exactly which data size
-/// needs to be allocated for actual writing.
-class ARROW_EXPORT MockOutputStream : public OutputStream {
- public:
- MockOutputStream() : extent_bytes_written_(0), is_open_(true) {}
-
- // Implement the OutputStream interface
- Status Close() override;
- bool closed() const override;
- Result<int64_t> Tell() const override;
- Status Write(const void* data, int64_t nbytes) override;
- /// \cond FALSE
- using Writable::Write;
- /// \endcond
-
- int64_t GetExtentBytesWritten() const { return extent_bytes_written_; }
-
- private:
- int64_t extent_bytes_written_;
- bool is_open_;
-};
-
-/// \brief An output stream that writes into a fixed-size mutable buffer
-class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
- public:
- /// Input buffer must be mutable, will abort if not
- explicit FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer);
- ~FixedSizeBufferWriter() override;
-
- Status Close() override;
- bool closed() const override;
- Status Seek(int64_t position) override;
- Result<int64_t> Tell() const override;
- Status Write(const void* data, int64_t nbytes) override;
- /// \cond FALSE
- using Writable::Write;
- /// \endcond
-
- Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
-
- void set_memcopy_threads(int num_threads);
- void set_memcopy_blocksize(int64_t blocksize);
- void set_memcopy_threshold(int64_t threshold);
-
- protected:
- class FixedSizeBufferWriterImpl;
- std::unique_ptr<FixedSizeBufferWriterImpl> impl_;
-};
-
-/// \class BufferReader
-/// \brief Random access zero-copy reads on an arrow::Buffer
-class ARROW_EXPORT BufferReader
- : public internal::RandomAccessFileConcurrencyWrapper<BufferReader> {
- public:
+///
+/// Writes to this stream do not copy or retain any data, they just bump
+/// a size counter that can be later used to know exactly which data size
+/// needs to be allocated for actual writing.
+class ARROW_EXPORT MockOutputStream : public OutputStream {
+ public:
+ MockOutputStream() : extent_bytes_written_(0), is_open_(true) {}
+
+ // Implement the OutputStream interface
+ Status Close() override;
+ bool closed() const override;
+ Result<int64_t> Tell() const override;
+ Status Write(const void* data, int64_t nbytes) override;
+ /// \cond FALSE
+ using Writable::Write;
+ /// \endcond
+
+ int64_t GetExtentBytesWritten() const { return extent_bytes_written_; }
+
+ private:
+ int64_t extent_bytes_written_;
+ bool is_open_;
+};
+
+/// \brief An output stream that writes into a fixed-size mutable buffer
+class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
+ public:
+ /// Input buffer must be mutable, will abort if not
+ explicit FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer);
+ ~FixedSizeBufferWriter() override;
+
+ Status Close() override;
+ bool closed() const override;
+ Status Seek(int64_t position) override;
+ Result<int64_t> Tell() const override;
+ Status Write(const void* data, int64_t nbytes) override;
+ /// \cond FALSE
+ using Writable::Write;
+ /// \endcond
+
+ Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
+
+ void set_memcopy_threads(int num_threads);
+ void set_memcopy_blocksize(int64_t blocksize);
+ void set_memcopy_threshold(int64_t threshold);
+
+ protected:
+ class FixedSizeBufferWriterImpl;
+ std::unique_ptr<FixedSizeBufferWriterImpl> impl_;
+};
+
+/// \class BufferReader
+/// \brief Random access zero-copy reads on an arrow::Buffer
+class ARROW_EXPORT BufferReader
+ : public internal::RandomAccessFileConcurrencyWrapper<BufferReader> {
+ public:
explicit BufferReader(std::shared_ptr<Buffer> buffer);
- explicit BufferReader(const Buffer& buffer);
- BufferReader(const uint8_t* data, int64_t size);
-
- /// \brief Instantiate from std::string or arrow::util::string_view. Does not
- /// own data
- explicit BufferReader(const util::string_view& data);
-
- bool closed() const override;
-
- bool supports_zero_copy() const override;
-
- std::shared_ptr<Buffer> buffer() const { return buffer_; }
-
- // Synchronous ReadAsync override
+ explicit BufferReader(const Buffer& buffer);
+ BufferReader(const uint8_t* data, int64_t size);
+
+ /// \brief Instantiate from std::string or arrow::util::string_view. Does not
+ /// own data
+ explicit BufferReader(const util::string_view& data);
+
+ bool closed() const override;
+
+ bool supports_zero_copy() const override;
+
+ std::shared_ptr<Buffer> buffer() const { return buffer_; }
+
+ // Synchronous ReadAsync override
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
- int64_t nbytes) override;
- Status WillNeed(const std::vector<ReadRange>& ranges) override;
-
- protected:
- friend RandomAccessFileConcurrencyWrapper<BufferReader>;
-
- Status DoClose();
-
- Result<int64_t> DoRead(int64_t nbytes, void* buffer);
- Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
- Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
- Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
- Result<util::string_view> DoPeek(int64_t nbytes) override;
-
- Result<int64_t> DoTell() const;
- Status DoSeek(int64_t position);
- Result<int64_t> DoGetSize();
-
- Status CheckClosed() const {
- if (!is_open_) {
- return Status::Invalid("Operation forbidden on closed BufferReader");
- }
- return Status::OK();
- }
-
- std::shared_ptr<Buffer> buffer_;
- const uint8_t* data_;
- int64_t size_;
- int64_t position_;
- bool is_open_;
-};
-
-} // namespace io
-} // namespace arrow
+ int64_t nbytes) override;
+ Status WillNeed(const std::vector<ReadRange>& ranges) override;
+
+ protected:
+ friend RandomAccessFileConcurrencyWrapper<BufferReader>;
+
+ Status DoClose();
+
+ Result<int64_t> DoRead(int64_t nbytes, void* buffer);
+ Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
+ Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
+ Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
+ Result<util::string_view> DoPeek(int64_t nbytes) override;
+
+ Result<int64_t> DoTell() const;
+ Status DoSeek(int64_t position);
+ Result<int64_t> DoGetSize();
+
+ Status CheckClosed() const {
+ if (!is_open_) {
+ return Status::Invalid("Operation forbidden on closed BufferReader");
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<Buffer> buffer_;
+ const uint8_t* data_;
+ int64_t size_;
+ int64_t position_;
+ bool is_open_;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/mman.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/mman.h
index 9b06ac8e7b5..ce6dc3be645 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/mman.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/mman.h
@@ -1,169 +1,169 @@
-// Copyright https://code.google.com/p/mman-win32/
-//
-// Licensed under the MIT License;
-// You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/MIT
-
-#pragma once
-
-#include "arrow/util/windows_compatibility.h"
-
-#include <errno.h>
-#include <io.h>
-#include <sys/types.h>
-
-#include <cstdint>
-
-#define PROT_NONE 0
-#define PROT_READ 1
-#define PROT_WRITE 2
-#define PROT_EXEC 4
-
-#define MAP_FILE 0
-#define MAP_SHARED 1
-#define MAP_PRIVATE 2
-#define MAP_TYPE 0xf
-#define MAP_FIXED 0x10
-#define MAP_ANONYMOUS 0x20
-#define MAP_ANON MAP_ANONYMOUS
-
-#define MAP_FAILED ((void*)-1)
-
-/* Flags for msync. */
-#define MS_ASYNC 1
-#define MS_SYNC 2
-#define MS_INVALIDATE 4
-
-#ifndef FILE_MAP_EXECUTE
-#define FILE_MAP_EXECUTE 0x0020
-#endif
-
-static inline int __map_mman_error(const DWORD err, const int deferr) {
- if (err == 0) return 0;
- // TODO: implement
- return err;
-}
-
-static inline DWORD __map_mmap_prot_page(const int prot) {
- DWORD protect = 0;
-
- if (prot == PROT_NONE) return protect;
-
- if ((prot & PROT_EXEC) != 0) {
- protect = ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
- } else {
- protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY;
- }
-
- return protect;
-}
-
-static inline DWORD __map_mmap_prot_file(const int prot) {
- DWORD desiredAccess = 0;
-
- if (prot == PROT_NONE) return desiredAccess;
-
- if ((prot & PROT_READ) != 0) desiredAccess |= FILE_MAP_READ;
- if ((prot & PROT_WRITE) != 0) desiredAccess |= FILE_MAP_WRITE;
- if ((prot & PROT_EXEC) != 0) desiredAccess |= FILE_MAP_EXECUTE;
-
- return desiredAccess;
-}
-
-static inline void* mmap(void* addr, size_t len, int prot, int flags, int fildes,
- off_t off) {
- HANDLE fm, h;
-
- void* map = MAP_FAILED;
- const uint64_t off64 = static_cast<uint64_t>(off);
- const uint64_t maxSize = off64 + len;
-
- const DWORD dwFileOffsetLow = static_cast<DWORD>(off64 & 0xFFFFFFFFUL);
- const DWORD dwFileOffsetHigh = static_cast<DWORD>((off64 >> 32) & 0xFFFFFFFFUL);
- const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFUL);
- const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFUL);
-
- const DWORD protect = __map_mmap_prot_page(prot);
- const DWORD desiredAccess = __map_mmap_prot_file(prot);
-
- errno = 0;
-
- if (len == 0
- /* Unsupported flag combinations */
- || (flags & MAP_FIXED) != 0
- /* Unsupported protection combinations */
- || prot == PROT_EXEC) {
- errno = EINVAL;
- return MAP_FAILED;
- }
-
- h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
- : INVALID_HANDLE_VALUE;
-
- if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) {
- errno = EBADF;
- return MAP_FAILED;
- }
-
- fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
-
- if (fm == NULL) {
- errno = __map_mman_error(GetLastError(), EPERM);
- return MAP_FAILED;
- }
-
- map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
-
- CloseHandle(fm);
-
- if (map == NULL) {
- errno = __map_mman_error(GetLastError(), EPERM);
- return MAP_FAILED;
- }
-
- return map;
-}
-
-static inline int munmap(void* addr, size_t len) {
- if (UnmapViewOfFile(addr)) return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-static inline int mprotect(void* addr, size_t len, int prot) {
- DWORD newProtect = __map_mmap_prot_page(prot);
- DWORD oldProtect = 0;
-
- if (VirtualProtect(addr, len, newProtect, &oldProtect)) return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-static inline int msync(void* addr, size_t len, int flags) {
- if (FlushViewOfFile(addr, len)) return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-static inline int mlock(const void* addr, size_t len) {
- if (VirtualLock((LPVOID)addr, len)) return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-static inline int munlock(const void* addr, size_t len) {
- if (VirtualUnlock((LPVOID)addr, len)) return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
+// Copyright https://code.google.com/p/mman-win32/
+//
+// Licensed under the MIT License;
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/MIT
+
+#pragma once
+
+#include "arrow/util/windows_compatibility.h"
+
+#include <errno.h>
+#include <io.h>
+#include <sys/types.h>
+
+#include <cstdint>
+
+#define PROT_NONE 0
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_EXEC 4
+
+#define MAP_FILE 0
+#define MAP_SHARED 1
+#define MAP_PRIVATE 2
+#define MAP_TYPE 0xf
+#define MAP_FIXED 0x10
+#define MAP_ANONYMOUS 0x20
+#define MAP_ANON MAP_ANONYMOUS
+
+#define MAP_FAILED ((void*)-1)
+
+/* Flags for msync. */
+#define MS_ASYNC 1
+#define MS_SYNC 2
+#define MS_INVALIDATE 4
+
+#ifndef FILE_MAP_EXECUTE
+#define FILE_MAP_EXECUTE 0x0020
+#endif
+
+static inline int __map_mman_error(const DWORD err, const int deferr) {
+ if (err == 0) return 0;
+ // TODO: implement
+ return err;
+}
+
+static inline DWORD __map_mmap_prot_page(const int prot) {
+ DWORD protect = 0;
+
+ if (prot == PROT_NONE) return protect;
+
+ if ((prot & PROT_EXEC) != 0) {
+ protect = ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
+ } else {
+ protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY;
+ }
+
+ return protect;
+}
+
+static inline DWORD __map_mmap_prot_file(const int prot) {
+ DWORD desiredAccess = 0;
+
+ if (prot == PROT_NONE) return desiredAccess;
+
+ if ((prot & PROT_READ) != 0) desiredAccess |= FILE_MAP_READ;
+ if ((prot & PROT_WRITE) != 0) desiredAccess |= FILE_MAP_WRITE;
+ if ((prot & PROT_EXEC) != 0) desiredAccess |= FILE_MAP_EXECUTE;
+
+ return desiredAccess;
+}
+
+static inline void* mmap(void* addr, size_t len, int prot, int flags, int fildes,
+ off_t off) {
+ HANDLE fm, h;
+
+ void* map = MAP_FAILED;
+ const uint64_t off64 = static_cast<uint64_t>(off);
+ const uint64_t maxSize = off64 + len;
+
+ const DWORD dwFileOffsetLow = static_cast<DWORD>(off64 & 0xFFFFFFFFUL);
+ const DWORD dwFileOffsetHigh = static_cast<DWORD>((off64 >> 32) & 0xFFFFFFFFUL);
+ const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFUL);
+ const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFUL);
+
+ const DWORD protect = __map_mmap_prot_page(prot);
+ const DWORD desiredAccess = __map_mmap_prot_file(prot);
+
+ errno = 0;
+
+ if (len == 0
+ /* Unsupported flag combinations */
+ || (flags & MAP_FIXED) != 0
+ /* Unsupported protection combinations */
+ || prot == PROT_EXEC) {
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
+ : INVALID_HANDLE_VALUE;
+
+ if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) {
+ errno = EBADF;
+ return MAP_FAILED;
+ }
+
+ fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
+
+ if (fm == NULL) {
+ errno = __map_mman_error(GetLastError(), EPERM);
+ return MAP_FAILED;
+ }
+
+ map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
+
+ CloseHandle(fm);
+
+ if (map == NULL) {
+ errno = __map_mman_error(GetLastError(), EPERM);
+ return MAP_FAILED;
+ }
+
+ return map;
+}
+
+static inline int munmap(void* addr, size_t len) {
+ if (UnmapViewOfFile(addr)) return 0;
+
+ errno = __map_mman_error(GetLastError(), EPERM);
+
+ return -1;
+}
+
+static inline int mprotect(void* addr, size_t len, int prot) {
+ DWORD newProtect = __map_mmap_prot_page(prot);
+ DWORD oldProtect = 0;
+
+ if (VirtualProtect(addr, len, newProtect, &oldProtect)) return 0;
+
+ errno = __map_mman_error(GetLastError(), EPERM);
+
+ return -1;
+}
+
+static inline int msync(void* addr, size_t len, int flags) {
+ if (FlushViewOfFile(addr, len)) return 0;
+
+ errno = __map_mman_error(GetLastError(), EPERM);
+
+ return -1;
+}
+
+static inline int mlock(const void* addr, size_t len) {
+ if (VirtualLock((LPVOID)addr, len)) return 0;
+
+ errno = __map_mman_error(GetLastError(), EPERM);
+
+ return -1;
+}
+
+static inline int munlock(const void* addr, size_t len) {
+ if (VirtualUnlock((LPVOID)addr, len)) return 0;
+
+ errno = __map_mman_error(GetLastError(), EPERM);
+
+ return -1;
+}
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.cc
index 1042691fa59..17e2708df3b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.cc
@@ -1,148 +1,148 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/slow.h"
-
-#include <algorithm>
-#include <cstring>
-#include <mutex>
-#include <random>
-#include <thread>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace io {
-
-// Multiply the average by this ratio to get the intended standard deviation
-static constexpr double kStandardDeviationRatio = 0.1;
-
-class LatencyGeneratorImpl : public LatencyGenerator {
- public:
- ~LatencyGeneratorImpl() override = default;
-
- LatencyGeneratorImpl(double average_latency, int32_t seed)
- : gen_(static_cast<decltype(gen_)::result_type>(seed)),
- latency_dist_(average_latency, average_latency * kStandardDeviationRatio) {}
-
- double NextLatency() override {
- // std::random distributions are unlikely to be thread-safe, and
- // a RandomAccessFile may be called from multiple threads
- std::lock_guard<std::mutex> lock(mutex_);
- return std::max<double>(0.0, latency_dist_(gen_));
- }
-
- private:
- std::default_random_engine gen_;
- std::normal_distribution<double> latency_dist_;
- std::mutex mutex_;
-};
-
-LatencyGenerator::~LatencyGenerator() {}
-
-void LatencyGenerator::Sleep() {
- std::this_thread::sleep_for(std::chrono::duration<double>(NextLatency()));
-}
-
-std::shared_ptr<LatencyGenerator> LatencyGenerator::Make(double average_latency) {
- return std::make_shared<LatencyGeneratorImpl>(
- average_latency, static_cast<int32_t>(::arrow::internal::GetRandomSeed()));
-}
-
-std::shared_ptr<LatencyGenerator> LatencyGenerator::Make(double average_latency,
- int32_t seed) {
- return std::make_shared<LatencyGeneratorImpl>(average_latency, seed);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// SlowInputStream implementation
-
-SlowInputStream::~SlowInputStream() { internal::CloseFromDestructor(this); }
-
-Status SlowInputStream::Close() { return stream_->Close(); }
-
-Status SlowInputStream::Abort() { return stream_->Abort(); }
-
-bool SlowInputStream::closed() const { return stream_->closed(); }
-
-Result<int64_t> SlowInputStream::Tell() const { return stream_->Tell(); }
-
-Result<int64_t> SlowInputStream::Read(int64_t nbytes, void* out) {
- latencies_->Sleep();
- return stream_->Read(nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> SlowInputStream::Read(int64_t nbytes) {
- latencies_->Sleep();
- return stream_->Read(nbytes);
-}
-
-Result<util::string_view> SlowInputStream::Peek(int64_t nbytes) {
- return stream_->Peek(nbytes);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// SlowRandomAccessFile implementation
-
-SlowRandomAccessFile::~SlowRandomAccessFile() { internal::CloseFromDestructor(this); }
-
-Status SlowRandomAccessFile::Close() { return stream_->Close(); }
-
-Status SlowRandomAccessFile::Abort() { return stream_->Abort(); }
-
-bool SlowRandomAccessFile::closed() const { return stream_->closed(); }
-
-Result<int64_t> SlowRandomAccessFile::GetSize() { return stream_->GetSize(); }
-
-Status SlowRandomAccessFile::Seek(int64_t position) { return stream_->Seek(position); }
-
-Result<int64_t> SlowRandomAccessFile::Tell() const { return stream_->Tell(); }
-
-Result<int64_t> SlowRandomAccessFile::Read(int64_t nbytes, void* out) {
- latencies_->Sleep();
- return stream_->Read(nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> SlowRandomAccessFile::Read(int64_t nbytes) {
- latencies_->Sleep();
- return stream_->Read(nbytes);
-}
-
-Result<int64_t> SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes,
- void* out) {
- latencies_->Sleep();
- return stream_->ReadAt(position, nbytes, out);
-}
-
-Result<std::shared_ptr<Buffer>> SlowRandomAccessFile::ReadAt(int64_t position,
- int64_t nbytes) {
- latencies_->Sleep();
- return stream_->ReadAt(position, nbytes);
-}
-
-Result<util::string_view> SlowRandomAccessFile::Peek(int64_t nbytes) {
- return stream_->Peek(nbytes);
-}
-
-} // namespace io
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/slow.h"
+
+#include <algorithm>
+#include <cstring>
+#include <mutex>
+#include <random>
+#include <thread>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace io {
+
+// Multiply the average by this ratio to get the intended standard deviation
+static constexpr double kStandardDeviationRatio = 0.1;
+
+class LatencyGeneratorImpl : public LatencyGenerator {
+ public:
+ ~LatencyGeneratorImpl() override = default;
+
+ LatencyGeneratorImpl(double average_latency, int32_t seed)
+ : gen_(static_cast<decltype(gen_)::result_type>(seed)),
+ latency_dist_(average_latency, average_latency * kStandardDeviationRatio) {}
+
+ double NextLatency() override {
+ // std::random distributions are unlikely to be thread-safe, and
+ // a RandomAccessFile may be called from multiple threads
+ std::lock_guard<std::mutex> lock(mutex_);
+ return std::max<double>(0.0, latency_dist_(gen_));
+ }
+
+ private:
+ std::default_random_engine gen_;
+ std::normal_distribution<double> latency_dist_;
+ std::mutex mutex_;
+};
+
+LatencyGenerator::~LatencyGenerator() {}
+
+void LatencyGenerator::Sleep() {
+ std::this_thread::sleep_for(std::chrono::duration<double>(NextLatency()));
+}
+
+std::shared_ptr<LatencyGenerator> LatencyGenerator::Make(double average_latency) {
+ return std::make_shared<LatencyGeneratorImpl>(
+ average_latency, static_cast<int32_t>(::arrow::internal::GetRandomSeed()));
+}
+
+std::shared_ptr<LatencyGenerator> LatencyGenerator::Make(double average_latency,
+ int32_t seed) {
+ return std::make_shared<LatencyGeneratorImpl>(average_latency, seed);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SlowInputStream implementation
+
+SlowInputStream::~SlowInputStream() { internal::CloseFromDestructor(this); }
+
+Status SlowInputStream::Close() { return stream_->Close(); }
+
+Status SlowInputStream::Abort() { return stream_->Abort(); }
+
+bool SlowInputStream::closed() const { return stream_->closed(); }
+
+Result<int64_t> SlowInputStream::Tell() const { return stream_->Tell(); }
+
+Result<int64_t> SlowInputStream::Read(int64_t nbytes, void* out) {
+ latencies_->Sleep();
+ return stream_->Read(nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> SlowInputStream::Read(int64_t nbytes) {
+ latencies_->Sleep();
+ return stream_->Read(nbytes);
+}
+
+Result<util::string_view> SlowInputStream::Peek(int64_t nbytes) {
+ return stream_->Peek(nbytes);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SlowRandomAccessFile implementation
+
+SlowRandomAccessFile::~SlowRandomAccessFile() { internal::CloseFromDestructor(this); }
+
+Status SlowRandomAccessFile::Close() { return stream_->Close(); }
+
+Status SlowRandomAccessFile::Abort() { return stream_->Abort(); }
+
+bool SlowRandomAccessFile::closed() const { return stream_->closed(); }
+
+Result<int64_t> SlowRandomAccessFile::GetSize() { return stream_->GetSize(); }
+
+Status SlowRandomAccessFile::Seek(int64_t position) { return stream_->Seek(position); }
+
+Result<int64_t> SlowRandomAccessFile::Tell() const { return stream_->Tell(); }
+
+Result<int64_t> SlowRandomAccessFile::Read(int64_t nbytes, void* out) {
+ latencies_->Sleep();
+ return stream_->Read(nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> SlowRandomAccessFile::Read(int64_t nbytes) {
+ latencies_->Sleep();
+ return stream_->Read(nbytes);
+}
+
+Result<int64_t> SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes,
+ void* out) {
+ latencies_->Sleep();
+ return stream_->ReadAt(position, nbytes, out);
+}
+
+Result<std::shared_ptr<Buffer>> SlowRandomAccessFile::ReadAt(int64_t position,
+ int64_t nbytes) {
+ latencies_->Sleep();
+ return stream_->ReadAt(position, nbytes);
+}
+
+Result<util::string_view> SlowRandomAccessFile::Peek(int64_t nbytes) {
+ return stream_->Peek(nbytes);
+}
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.h
index b0c02a85ac6..256b795db52 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/slow.h
@@ -1,118 +1,118 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Slow stream implementations, mainly for testing and benchmarking
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <utility>
-
-#include "arrow/io/interfaces.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Buffer;
-class Status;
-
-namespace io {
-
-class ARROW_EXPORT LatencyGenerator {
- public:
- virtual ~LatencyGenerator();
-
- void Sleep();
-
- virtual double NextLatency() = 0;
-
- static std::shared_ptr<LatencyGenerator> Make(double average_latency);
- static std::shared_ptr<LatencyGenerator> Make(double average_latency, int32_t seed);
-};
-
-// XXX use ConcurrencyWrapper? It could increase chances of finding a race.
-
-template <class StreamType>
-class ARROW_EXPORT SlowInputStreamBase : public StreamType {
- public:
- SlowInputStreamBase(std::shared_ptr<StreamType> stream,
- std::shared_ptr<LatencyGenerator> latencies)
- : stream_(std::move(stream)), latencies_(std::move(latencies)) {}
-
- SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency)
- : stream_(std::move(stream)), latencies_(LatencyGenerator::Make(average_latency)) {}
-
- SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency,
- int32_t seed)
- : stream_(std::move(stream)),
- latencies_(LatencyGenerator::Make(average_latency, seed)) {}
-
- protected:
- std::shared_ptr<StreamType> stream_;
- std::shared_ptr<LatencyGenerator> latencies_;
-};
-
-/// \brief An InputStream wrapper that makes reads slower.
-///
-/// Read() calls are made slower by an average latency (in seconds).
-/// Actual latencies form a normal distribution closely centered
-/// on the average latency.
-/// Other calls are forwarded directly.
-class ARROW_EXPORT SlowInputStream : public SlowInputStreamBase<InputStream> {
- public:
- ~SlowInputStream() override;
-
- using SlowInputStreamBase<InputStream>::SlowInputStreamBase;
-
- Status Close() override;
- Status Abort() override;
- bool closed() const override;
-
- Result<int64_t> Read(int64_t nbytes, void* out) override;
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
- Result<util::string_view> Peek(int64_t nbytes) override;
-
- Result<int64_t> Tell() const override;
-};
-
-/// \brief A RandomAccessFile wrapper that makes reads slower.
-///
-/// Similar to SlowInputStream, but allows random access and seeking.
-class ARROW_EXPORT SlowRandomAccessFile : public SlowInputStreamBase<RandomAccessFile> {
- public:
- ~SlowRandomAccessFile() override;
-
- using SlowInputStreamBase<RandomAccessFile>::SlowInputStreamBase;
-
- Status Close() override;
- Status Abort() override;
- bool closed() const override;
-
- Result<int64_t> Read(int64_t nbytes, void* out) override;
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
- Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
- Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
- Result<util::string_view> Peek(int64_t nbytes) override;
-
- Result<int64_t> GetSize() override;
- Status Seek(int64_t position) override;
- Result<int64_t> Tell() const override;
-};
-
-} // namespace io
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Slow stream implementations, mainly for testing and benchmarking
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class Status;
+
+namespace io {
+
+class ARROW_EXPORT LatencyGenerator {
+ public:
+ virtual ~LatencyGenerator();
+
+ void Sleep();
+
+ virtual double NextLatency() = 0;
+
+ static std::shared_ptr<LatencyGenerator> Make(double average_latency);
+ static std::shared_ptr<LatencyGenerator> Make(double average_latency, int32_t seed);
+};
+
+// XXX use ConcurrencyWrapper? It could increase chances of finding a race.
+
+template <class StreamType>
+class ARROW_EXPORT SlowInputStreamBase : public StreamType {
+ public:
+ SlowInputStreamBase(std::shared_ptr<StreamType> stream,
+ std::shared_ptr<LatencyGenerator> latencies)
+ : stream_(std::move(stream)), latencies_(std::move(latencies)) {}
+
+ SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency)
+ : stream_(std::move(stream)), latencies_(LatencyGenerator::Make(average_latency)) {}
+
+ SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency,
+ int32_t seed)
+ : stream_(std::move(stream)),
+ latencies_(LatencyGenerator::Make(average_latency, seed)) {}
+
+ protected:
+ std::shared_ptr<StreamType> stream_;
+ std::shared_ptr<LatencyGenerator> latencies_;
+};
+
+/// \brief An InputStream wrapper that makes reads slower.
+///
+/// Read() calls are made slower by an average latency (in seconds).
+/// Actual latencies form a normal distribution closely centered
+/// on the average latency.
+/// Other calls are forwarded directly.
+class ARROW_EXPORT SlowInputStream : public SlowInputStreamBase<InputStream> {
+ public:
+ ~SlowInputStream() override;
+
+ using SlowInputStreamBase<InputStream>::SlowInputStreamBase;
+
+ Status Close() override;
+ Status Abort() override;
+ bool closed() const override;
+
+ Result<int64_t> Read(int64_t nbytes, void* out) override;
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+ Result<util::string_view> Peek(int64_t nbytes) override;
+
+ Result<int64_t> Tell() const override;
+};
+
+/// \brief A RandomAccessFile wrapper that makes reads slower.
+///
+/// Similar to SlowInputStream, but allows random access and seeking.
+class ARROW_EXPORT SlowRandomAccessFile : public SlowInputStreamBase<RandomAccessFile> {
+ public:
+ ~SlowRandomAccessFile() override;
+
+ using SlowInputStreamBase<RandomAccessFile>::SlowInputStreamBase;
+
+ Status Close() override;
+ Status Abort() override;
+ bool closed() const override;
+
+ Result<int64_t> Read(int64_t nbytes, void* out) override;
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+ Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
+ Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
+ Result<util::string_view> Peek(int64_t nbytes) override;
+
+ Result<int64_t> GetSize() override;
+ Status Seek(int64_t position) override;
+ Result<int64_t> Tell() const override;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
index 3fdf5a7a9ba..61aef85701f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
@@ -1,150 +1,150 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/io/transform.h"
-
-#include <algorithm>
-#include <cstring>
-#include <mutex>
-#include <random>
-#include <thread>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/io/util_internal.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace io {
-
-struct TransformInputStream::Impl {
- std::shared_ptr<InputStream> wrapped_;
- TransformInputStream::TransformFunc transform_;
- std::shared_ptr<Buffer> pending_;
- int64_t pos_ = 0;
- bool closed_ = false;
-
- Impl(std::shared_ptr<InputStream> wrapped,
- TransformInputStream::TransformFunc transform)
- : wrapped_(std::move(wrapped)), transform_(std::move(transform)) {}
-
- void Close() {
- closed_ = true;
- pending_.reset();
- }
-
- Status CheckClosed() const {
- if (closed_) {
- return Status::Invalid("Operation on closed file");
- }
- return Status::OK();
- }
-};
-
-TransformInputStream::TransformInputStream(std::shared_ptr<InputStream> wrapped,
- TransformInputStream::TransformFunc transform)
- : impl_(new Impl{std::move(wrapped), std::move(transform)}) {}
-
-TransformInputStream::~TransformInputStream() {}
-
-Status TransformInputStream::Close() {
- impl_->Close();
- return impl_->wrapped_->Close();
-}
-
-Status TransformInputStream::Abort() { return impl_->wrapped_->Abort(); }
-
-bool TransformInputStream::closed() const { return impl_->closed_; }
-
-Result<std::shared_ptr<Buffer>> TransformInputStream::Read(int64_t nbytes) {
- RETURN_NOT_OK(impl_->CheckClosed());
-
- ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes));
- ARROW_ASSIGN_OR_RAISE(auto bytes_read, this->Read(nbytes, buf->mutable_data()));
- if (bytes_read < nbytes) {
- RETURN_NOT_OK(buf->Resize(bytes_read, /*shrink_to_fit=*/true));
- }
- return std::shared_ptr<Buffer>(std::move(buf));
-}
-
-Result<int64_t> TransformInputStream::Read(int64_t nbytes, void* out) {
- RETURN_NOT_OK(impl_->CheckClosed());
-
- if (nbytes == 0) {
- return 0;
- }
-
- int64_t avail_size = 0;
- std::vector<std::shared_ptr<Buffer>> avail;
- if (impl_->pending_) {
- avail.push_back(impl_->pending_);
- avail_size += impl_->pending_->size();
- }
- // Accumulate enough transformed data to satisfy read
- while (avail_size < nbytes) {
- ARROW_ASSIGN_OR_RAISE(auto buf, impl_->wrapped_->Read(nbytes));
- const bool have_eof = (buf->size() == 0);
- // Even if EOF is met, let the transform function run a last time
- // (for example to flush internal buffers)
- ARROW_ASSIGN_OR_RAISE(buf, impl_->transform_(std::move(buf)));
- avail_size += buf->size();
- avail.push_back(std::move(buf));
- if (have_eof) {
- break;
- }
- }
- DCHECK(!avail.empty());
-
- // Coalesce buffer data
- uint8_t* out_data = reinterpret_cast<uint8_t*>(out);
- int64_t copied_bytes = 0;
- for (size_t i = 0; i < avail.size() - 1; ++i) {
- // All buffers except the last fit fully into `nbytes`
- const auto buf = std::move(avail[i]);
- DCHECK_LE(buf->size(), nbytes);
- memcpy(out_data, buf->data(), static_cast<size_t>(buf->size()));
- out_data += buf->size();
- nbytes -= buf->size();
- copied_bytes += buf->size();
- }
- {
- // Last buffer: splice into `out` and `pending_`
- const auto buf = std::move(avail.back());
- const int64_t to_copy = std::min(buf->size(), nbytes);
- memcpy(out_data, buf->data(), static_cast<size_t>(to_copy));
- copied_bytes += to_copy;
- if (buf->size() > to_copy) {
- impl_->pending_ = SliceBuffer(buf, to_copy);
- } else {
- impl_->pending_.reset();
- }
- }
- impl_->pos_ += copied_bytes;
- return copied_bytes;
-}
-
-Result<int64_t> TransformInputStream::Tell() const {
- RETURN_NOT_OK(impl_->CheckClosed());
-
- return impl_->pos_;
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/transform.h"
+
+#include <algorithm>
+#include <cstring>
+#include <mutex>
+#include <random>
+#include <thread>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace io {
+
+struct TransformInputStream::Impl {
+ std::shared_ptr<InputStream> wrapped_;
+ TransformInputStream::TransformFunc transform_;
+ std::shared_ptr<Buffer> pending_;
+ int64_t pos_ = 0;
+ bool closed_ = false;
+
+ Impl(std::shared_ptr<InputStream> wrapped,
+ TransformInputStream::TransformFunc transform)
+ : wrapped_(std::move(wrapped)), transform_(std::move(transform)) {}
+
+ void Close() {
+ closed_ = true;
+ pending_.reset();
+ }
+
+ Status CheckClosed() const {
+ if (closed_) {
+ return Status::Invalid("Operation on closed file");
+ }
+ return Status::OK();
+ }
+};
+
+TransformInputStream::TransformInputStream(std::shared_ptr<InputStream> wrapped,
+ TransformInputStream::TransformFunc transform)
+ : impl_(new Impl{std::move(wrapped), std::move(transform)}) {}
+
+TransformInputStream::~TransformInputStream() {}
+
+Status TransformInputStream::Close() {
+ impl_->Close();
+ return impl_->wrapped_->Close();
+}
+
+Status TransformInputStream::Abort() { return impl_->wrapped_->Abort(); }
+
+bool TransformInputStream::closed() const { return impl_->closed_; }
+
+Result<std::shared_ptr<Buffer>> TransformInputStream::Read(int64_t nbytes) {
+ RETURN_NOT_OK(impl_->CheckClosed());
+
+ ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes));
+ ARROW_ASSIGN_OR_RAISE(auto bytes_read, this->Read(nbytes, buf->mutable_data()));
+ if (bytes_read < nbytes) {
+ RETURN_NOT_OK(buf->Resize(bytes_read, /*shrink_to_fit=*/true));
+ }
+ return std::shared_ptr<Buffer>(std::move(buf));
+}
+
+Result<int64_t> TransformInputStream::Read(int64_t nbytes, void* out) {
+ RETURN_NOT_OK(impl_->CheckClosed());
+
+ if (nbytes == 0) {
+ return 0;
+ }
+
+ int64_t avail_size = 0;
+ std::vector<std::shared_ptr<Buffer>> avail;
+ if (impl_->pending_) {
+ avail.push_back(impl_->pending_);
+ avail_size += impl_->pending_->size();
+ }
+ // Accumulate enough transformed data to satisfy read
+ while (avail_size < nbytes) {
+ ARROW_ASSIGN_OR_RAISE(auto buf, impl_->wrapped_->Read(nbytes));
+ const bool have_eof = (buf->size() == 0);
+ // Even if EOF is met, let the transform function run a last time
+ // (for example to flush internal buffers)
+ ARROW_ASSIGN_OR_RAISE(buf, impl_->transform_(std::move(buf)));
+ avail_size += buf->size();
+ avail.push_back(std::move(buf));
+ if (have_eof) {
+ break;
+ }
+ }
+ DCHECK(!avail.empty());
+
+ // Coalesce buffer data
+ uint8_t* out_data = reinterpret_cast<uint8_t*>(out);
+ int64_t copied_bytes = 0;
+ for (size_t i = 0; i < avail.size() - 1; ++i) {
+ // All buffers except the last fit fully into `nbytes`
+ const auto buf = std::move(avail[i]);
+ DCHECK_LE(buf->size(), nbytes);
+ memcpy(out_data, buf->data(), static_cast<size_t>(buf->size()));
+ out_data += buf->size();
+ nbytes -= buf->size();
+ copied_bytes += buf->size();
+ }
+ {
+ // Last buffer: splice into `out` and `pending_`
+ const auto buf = std::move(avail.back());
+ const int64_t to_copy = std::min(buf->size(), nbytes);
+ memcpy(out_data, buf->data(), static_cast<size_t>(to_copy));
+ copied_bytes += to_copy;
+ if (buf->size() > to_copy) {
+ impl_->pending_ = SliceBuffer(buf, to_copy);
+ } else {
+ impl_->pending_.reset();
+ }
+ }
+ impl_->pos_ += copied_bytes;
+ return copied_bytes;
+}
+
+Result<int64_t> TransformInputStream::Tell() const {
+ RETURN_NOT_OK(impl_->CheckClosed());
+
+ return impl_->pos_;
+}
+
Result<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadata() {
RETURN_NOT_OK(impl_->CheckClosed());
@@ -158,5 +158,5 @@ Future<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetada
return impl_->wrapped_->ReadMetadataAsync(io_context);
}
-} // namespace io
-} // namespace arrow
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
index c117f275929..aed795f141a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
@@ -1,60 +1,60 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Slow stream implementations, mainly for testing and benchmarking
-
-#pragma once
-
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <utility>
-
-#include "arrow/io/interfaces.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace io {
-
-class ARROW_EXPORT TransformInputStream : public InputStream {
- public:
- using TransformFunc =
- std::function<Result<std::shared_ptr<Buffer>>(const std::shared_ptr<Buffer>&)>;
-
- TransformInputStream(std::shared_ptr<InputStream> wrapped, TransformFunc transform);
- ~TransformInputStream() override;
-
- Status Close() override;
- Status Abort() override;
- bool closed() const override;
-
- Result<int64_t> Read(int64_t nbytes, void* out) override;
- Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Slow stream implementations, mainly for testing and benchmarking
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <utility>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+class ARROW_EXPORT TransformInputStream : public InputStream {
+ public:
+ using TransformFunc =
+ std::function<Result<std::shared_ptr<Buffer>>(const std::shared_ptr<Buffer>&)>;
+
+ TransformInputStream(std::shared_ptr<InputStream> wrapped, TransformFunc transform);
+ ~TransformInputStream() override;
+
+ Status Close() override;
+ Status Abort() override;
+ bool closed() const override;
+
+ Result<int64_t> Read(int64_t nbytes, void* out) override;
+ Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
- Result<int64_t> Tell() const override;
-
- protected:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-} // namespace io
-} // namespace arrow
+ Result<int64_t> Tell() const override;
+
+ protected:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
index a2fd33bf360..09c37927de7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"
-namespace arrow {
-namespace io {
-
-struct FileMode {
- enum type { READ, WRITE, READWRITE };
-};
-
+namespace arrow {
+namespace io {
+
+struct FileMode {
+ enum type { READ, WRITE, READWRITE };
+};
+
struct IOContext;
struct CacheOptions;
@@ -51,29 +51,29 @@ ARROW_EXPORT int GetIOThreadPoolCapacity();
/// The current number is returned by GetIOThreadPoolCapacity().
ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
-class FileInterface;
-class Seekable;
-class Writable;
-class Readable;
-class OutputStream;
-class FileOutputStream;
-class InputStream;
-class ReadableFile;
-class RandomAccessFile;
-class MemoryMappedFile;
-class WritableFile;
-class ReadWriteFileInterface;
-
-class LatencyGenerator;
-
-class BufferReader;
-
-class BufferInputStream;
-class BufferOutputStream;
-class CompressedInputStream;
-class CompressedOutputStream;
-class BufferedInputStream;
-class BufferedOutputStream;
-
-} // namespace io
-} // namespace arrow
+class FileInterface;
+class Seekable;
+class Writable;
+class Readable;
+class OutputStream;
+class FileOutputStream;
+class InputStream;
+class ReadableFile;
+class RandomAccessFile;
+class MemoryMappedFile;
+class WritableFile;
+class ReadWriteFileInterface;
+
+class LatencyGenerator;
+
+class BufferReader;
+
+class BufferInputStream;
+class BufferOutputStream;
+class CompressedInputStream;
+class CompressedOutputStream;
+class BufferedInputStream;
+class BufferedOutputStream;
+
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
index b1d75d1d0bd..adaaf428859 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
@@ -1,57 +1,57 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
#include <utility>
-#include <vector>
-
-#include "arrow/io/interfaces.h"
+#include <vector>
+
+#include "arrow/io/interfaces.h"
#include "arrow/util/thread_pool.h"
-#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace io {
-namespace internal {
-
-ARROW_EXPORT void CloseFromDestructor(FileInterface* file);
-
-// Validate a (offset, size) region (as given to ReadAt) against
-// the file size. Return the actual read size.
-ARROW_EXPORT Result<int64_t> ValidateReadRange(int64_t offset, int64_t size,
- int64_t file_size);
-// Validate a (offset, size) region (as given to WriteAt) against
-// the file size. Short writes are not allowed.
-ARROW_EXPORT Status ValidateWriteRange(int64_t offset, int64_t size, int64_t file_size);
-
-// Validate a (offset, size) region (as given to ReadAt or WriteAt), without
-// knowing the file size.
-ARROW_EXPORT Status ValidateRange(int64_t offset, int64_t size);
-
-ARROW_EXPORT
-std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
- int64_t hole_size_limit,
- int64_t range_size_limit);
-
-ARROW_EXPORT
-::arrow::internal::ThreadPool* GetIOThreadPool();
-
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+namespace internal {
+
+ARROW_EXPORT void CloseFromDestructor(FileInterface* file);
+
+// Validate a (offset, size) region (as given to ReadAt) against
+// the file size. Return the actual read size.
+ARROW_EXPORT Result<int64_t> ValidateReadRange(int64_t offset, int64_t size,
+ int64_t file_size);
+// Validate a (offset, size) region (as given to WriteAt) against
+// the file size. Short writes are not allowed.
+ARROW_EXPORT Status ValidateWriteRange(int64_t offset, int64_t size, int64_t file_size);
+
+// Validate a (offset, size) region (as given to ReadAt or WriteAt), without
+// knowing the file size.
+ARROW_EXPORT Status ValidateRange(int64_t offset, int64_t size);
+
+ARROW_EXPORT
+std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
+ int64_t hole_size_limit,
+ int64_t range_size_limit);
+
+ARROW_EXPORT
+::arrow::internal::ThreadPool* GetIOThreadPool();
+
template <typename... SubmitArgs>
auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args)
-> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) {
@@ -61,6 +61,6 @@ auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args)
std::forward<SubmitArgs>(submit_args)...);
}
-} // namespace internal
-} // namespace io
-} // namespace arrow
+} // namespace internal
+} // namespace io
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
index 3ab2c8b3847..59e28253e90 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
@@ -1,93 +1,93 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/dictionary.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <memory>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/dictionary.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
#include <set>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/array/concatenate.h"
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/concatenate.h"
#include "arrow/array/validate.h"
-#include "arrow/extension_type.h"
-#include "arrow/record_batch.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-
-namespace std {
-template <>
-struct hash<arrow::FieldPath> {
- size_t operator()(const arrow::FieldPath& path) const { return path.hash(); }
-};
-} // namespace std
-
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace ipc {
-
-using internal::FieldPosition;
-
-// ----------------------------------------------------------------------
-// DictionaryFieldMapper implementation
-
-struct DictionaryFieldMapper::Impl {
- using FieldPathMap = std::unordered_map<FieldPath, int64_t>;
-
- FieldPathMap field_path_to_id;
-
- void ImportSchema(const Schema& schema) {
- ImportFields(FieldPosition(), schema.fields());
- }
-
- Status AddSchemaFields(const Schema& schema) {
- if (!field_path_to_id.empty()) {
- return Status::Invalid("Non-empty DictionaryFieldMapper");
- }
- ImportSchema(schema);
- return Status::OK();
- }
-
- Status AddField(int64_t id, std::vector<int> field_path) {
- const auto pair = field_path_to_id.emplace(FieldPath(std::move(field_path)), id);
- if (!pair.second) {
- return Status::KeyError("Field already mapped to id");
- }
- return Status::OK();
- }
-
- Result<int64_t> GetFieldId(std::vector<int> field_path) const {
- const auto it = field_path_to_id.find(FieldPath(std::move(field_path)));
- if (it == field_path_to_id.end()) {
- return Status::KeyError("Dictionary field not found");
- }
- return it->second;
- }
-
- int num_fields() const { return static_cast<int>(field_path_to_id.size()); }
-
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace std {
+template <>
+struct hash<arrow::FieldPath> {
+ size_t operator()(const arrow::FieldPath& path) const { return path.hash(); }
+};
+} // namespace std
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace ipc {
+
+using internal::FieldPosition;
+
+// ----------------------------------------------------------------------
+// DictionaryFieldMapper implementation
+
+struct DictionaryFieldMapper::Impl {
+ using FieldPathMap = std::unordered_map<FieldPath, int64_t>;
+
+ FieldPathMap field_path_to_id;
+
+ void ImportSchema(const Schema& schema) {
+ ImportFields(FieldPosition(), schema.fields());
+ }
+
+ Status AddSchemaFields(const Schema& schema) {
+ if (!field_path_to_id.empty()) {
+ return Status::Invalid("Non-empty DictionaryFieldMapper");
+ }
+ ImportSchema(schema);
+ return Status::OK();
+ }
+
+ Status AddField(int64_t id, std::vector<int> field_path) {
+ const auto pair = field_path_to_id.emplace(FieldPath(std::move(field_path)), id);
+ if (!pair.second) {
+ return Status::KeyError("Field already mapped to id");
+ }
+ return Status::OK();
+ }
+
+ Result<int64_t> GetFieldId(std::vector<int> field_path) const {
+ const auto it = field_path_to_id.find(FieldPath(std::move(field_path)));
+ if (it == field_path_to_id.end()) {
+ return Status::KeyError("Dictionary field not found");
+ }
+ return it->second;
+ }
+
+ int num_fields() const { return static_cast<int>(field_path_to_id.size()); }
+
int num_dicts() const {
std::set<int64_t> uniqueIds;
@@ -98,64 +98,64 @@ struct DictionaryFieldMapper::Impl {
return static_cast<int>(uniqueIds.size());
}
- private:
- void ImportFields(const FieldPosition& pos,
- const std::vector<std::shared_ptr<Field>>& fields) {
- for (int i = 0; i < static_cast<int>(fields.size()); ++i) {
- ImportField(pos.child(i), *fields[i]);
- }
- }
-
- void ImportField(const FieldPosition& pos, const Field& field) {
- const DataType* type = field.type().get();
- if (type->id() == Type::EXTENSION) {
- type = checked_cast<const ExtensionType&>(*type).storage_type().get();
- }
- if (type->id() == Type::DICTIONARY) {
- InsertPath(pos);
- // Import nested dictionaries
- ImportFields(pos,
- checked_cast<const DictionaryType&>(*type).value_type()->fields());
- } else {
- ImportFields(pos, type->fields());
- }
- }
-
- void InsertPath(const FieldPosition& pos) {
- const int64_t id = field_path_to_id.size();
- const auto pair = field_path_to_id.emplace(FieldPath(pos.path()), id);
- DCHECK(pair.second); // was inserted
- ARROW_UNUSED(pair);
- }
-};
-
-DictionaryFieldMapper::DictionaryFieldMapper() : impl_(new Impl) {}
-
-DictionaryFieldMapper::DictionaryFieldMapper(const Schema& schema) : impl_(new Impl) {
- impl_->ImportSchema(schema);
-}
-
-DictionaryFieldMapper::~DictionaryFieldMapper() {}
-
-Status DictionaryFieldMapper::AddSchemaFields(const Schema& schema) {
- return impl_->AddSchemaFields(schema);
-}
-
-Status DictionaryFieldMapper::AddField(int64_t id, std::vector<int> field_path) {
- return impl_->AddField(id, std::move(field_path));
-}
-
-Result<int64_t> DictionaryFieldMapper::GetFieldId(std::vector<int> field_path) const {
- return impl_->GetFieldId(std::move(field_path));
-}
-
-int DictionaryFieldMapper::num_fields() const { return impl_->num_fields(); }
-
+ private:
+ void ImportFields(const FieldPosition& pos,
+ const std::vector<std::shared_ptr<Field>>& fields) {
+ for (int i = 0; i < static_cast<int>(fields.size()); ++i) {
+ ImportField(pos.child(i), *fields[i]);
+ }
+ }
+
+ void ImportField(const FieldPosition& pos, const Field& field) {
+ const DataType* type = field.type().get();
+ if (type->id() == Type::EXTENSION) {
+ type = checked_cast<const ExtensionType&>(*type).storage_type().get();
+ }
+ if (type->id() == Type::DICTIONARY) {
+ InsertPath(pos);
+ // Import nested dictionaries
+ ImportFields(pos,
+ checked_cast<const DictionaryType&>(*type).value_type()->fields());
+ } else {
+ ImportFields(pos, type->fields());
+ }
+ }
+
+ void InsertPath(const FieldPosition& pos) {
+ const int64_t id = field_path_to_id.size();
+ const auto pair = field_path_to_id.emplace(FieldPath(pos.path()), id);
+ DCHECK(pair.second); // was inserted
+ ARROW_UNUSED(pair);
+ }
+};
+
+DictionaryFieldMapper::DictionaryFieldMapper() : impl_(new Impl) {}
+
+DictionaryFieldMapper::DictionaryFieldMapper(const Schema& schema) : impl_(new Impl) {
+ impl_->ImportSchema(schema);
+}
+
+DictionaryFieldMapper::~DictionaryFieldMapper() {}
+
+Status DictionaryFieldMapper::AddSchemaFields(const Schema& schema) {
+ return impl_->AddSchemaFields(schema);
+}
+
+Status DictionaryFieldMapper::AddField(int64_t id, std::vector<int> field_path) {
+ return impl_->AddField(id, std::move(field_path));
+}
+
+Result<int64_t> DictionaryFieldMapper::GetFieldId(std::vector<int> field_path) const {
+ return impl_->GetFieldId(std::move(field_path));
+}
+
+int DictionaryFieldMapper::num_fields() const { return impl_->num_fields(); }
+
int DictionaryFieldMapper::num_dicts() const { return impl_->num_dicts(); }
-// ----------------------------------------------------------------------
-// DictionaryMemo implementation
-
+// ----------------------------------------------------------------------
+// DictionaryMemo implementation
+
namespace {
bool HasUnresolvedNestedDict(const ArrayData& data) {
@@ -177,236 +177,236 @@ bool HasUnresolvedNestedDict(const ArrayData& data) {
} // namespace
-struct DictionaryMemo::Impl {
- // Map of dictionary id to dictionary array(s) (several in case of deltas)
- std::unordered_map<int64_t, ArrayDataVector> id_to_dictionary_;
- std::unordered_map<int64_t, std::shared_ptr<DataType>> id_to_type_;
- DictionaryFieldMapper mapper_;
-
- Result<decltype(id_to_dictionary_)::iterator> FindDictionary(int64_t id) {
- auto it = id_to_dictionary_.find(id);
- if (it == id_to_dictionary_.end()) {
- return Status::KeyError("Dictionary with id ", id, " not found");
- }
- return it;
- }
-
- Result<std::shared_ptr<ArrayData>> ReifyDictionary(int64_t id, MemoryPool* pool) {
- ARROW_ASSIGN_OR_RAISE(auto it, FindDictionary(id));
- ArrayDataVector* data_vector = &it->second;
-
- DCHECK(!data_vector->empty());
- if (data_vector->size() > 1) {
- // There are deltas, we need to concatenate them to the first dictionary.
- ArrayVector to_combine;
- to_combine.reserve(data_vector->size());
- // IMPORTANT: At this point, the dictionary data may be untrusted.
- // We need to validate it, as concatenation can crash on invalid or
- // corrupted data. Full validation is necessary for certain types
- // (for example nested dictionaries).
- for (const auto& data : *data_vector) {
+struct DictionaryMemo::Impl {
+ // Map of dictionary id to dictionary array(s) (several in case of deltas)
+ std::unordered_map<int64_t, ArrayDataVector> id_to_dictionary_;
+ std::unordered_map<int64_t, std::shared_ptr<DataType>> id_to_type_;
+ DictionaryFieldMapper mapper_;
+
+ Result<decltype(id_to_dictionary_)::iterator> FindDictionary(int64_t id) {
+ auto it = id_to_dictionary_.find(id);
+ if (it == id_to_dictionary_.end()) {
+ return Status::KeyError("Dictionary with id ", id, " not found");
+ }
+ return it;
+ }
+
+ Result<std::shared_ptr<ArrayData>> ReifyDictionary(int64_t id, MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto it, FindDictionary(id));
+ ArrayDataVector* data_vector = &it->second;
+
+ DCHECK(!data_vector->empty());
+ if (data_vector->size() > 1) {
+ // There are deltas, we need to concatenate them to the first dictionary.
+ ArrayVector to_combine;
+ to_combine.reserve(data_vector->size());
+ // IMPORTANT: At this point, the dictionary data may be untrusted.
+ // We need to validate it, as concatenation can crash on invalid or
+ // corrupted data. Full validation is necessary for certain types
+ // (for example nested dictionaries).
+ for (const auto& data : *data_vector) {
if (HasUnresolvedNestedDict(*data)) {
return Status::NotImplemented(
"Encountered delta dictionary with an unresolved nested dictionary");
}
RETURN_NOT_OK(::arrow::internal::ValidateArray(*data));
RETURN_NOT_OK(::arrow::internal::ValidateArrayFull(*data));
- to_combine.push_back(MakeArray(data));
- }
- ARROW_ASSIGN_OR_RAISE(auto combined_dict, Concatenate(to_combine, pool));
- *data_vector = {combined_dict->data()};
- }
-
- return data_vector->back();
- }
-};
-
-DictionaryMemo::DictionaryMemo() : impl_(new Impl()) {}
-
-DictionaryMemo::~DictionaryMemo() {}
-
-DictionaryFieldMapper& DictionaryMemo::fields() { return impl_->mapper_; }
-
-const DictionaryFieldMapper& DictionaryMemo::fields() const { return impl_->mapper_; }
-
-Result<std::shared_ptr<DataType>> DictionaryMemo::GetDictionaryType(int64_t id) const {
- const auto it = impl_->id_to_type_.find(id);
- if (it == impl_->id_to_type_.end()) {
- return Status::KeyError("No record of dictionary type with id ", id);
- }
- return it->second;
-}
-
-// Returns KeyError if dictionary not found
-Result<std::shared_ptr<ArrayData>> DictionaryMemo::GetDictionary(int64_t id,
- MemoryPool* pool) const {
- return impl_->ReifyDictionary(id, pool);
-}
-
-Status DictionaryMemo::AddDictionaryType(int64_t id,
- const std::shared_ptr<DataType>& type) {
- // AddDictionaryType expects the dict value type
- DCHECK_NE(type->id(), Type::DICTIONARY);
- const auto pair = impl_->id_to_type_.emplace(id, type);
- if (!pair.second && !pair.first->second->Equals(*type)) {
- return Status::KeyError("Conflicting dictionary types for id ", id);
- }
- return Status::OK();
-}
-
-bool DictionaryMemo::HasDictionary(int64_t id) const {
- const auto it = impl_->id_to_dictionary_.find(id);
- return it != impl_->id_to_dictionary_.end();
-}
-
-Status DictionaryMemo::AddDictionary(int64_t id,
- const std::shared_ptr<ArrayData>& dictionary) {
- const auto pair = impl_->id_to_dictionary_.emplace(id, ArrayDataVector{dictionary});
- if (!pair.second) {
- return Status::KeyError("Dictionary with id ", id, " already exists");
- }
- return Status::OK();
-}
-
-Status DictionaryMemo::AddDictionaryDelta(int64_t id,
- const std::shared_ptr<ArrayData>& dictionary) {
- ARROW_ASSIGN_OR_RAISE(auto it, impl_->FindDictionary(id));
- it->second.push_back(dictionary);
- return Status::OK();
-}
-
-Result<bool> DictionaryMemo::AddOrReplaceDictionary(
- int64_t id, const std::shared_ptr<ArrayData>& dictionary) {
- ArrayDataVector value{dictionary};
-
- auto pair = impl_->id_to_dictionary_.emplace(id, value);
- if (pair.second) {
- // Inserted
- return true;
- } else {
- // Update existing value
- pair.first->second = std::move(value);
- return false;
- }
-}
-
-// ----------------------------------------------------------------------
-// CollectDictionaries implementation
-
-namespace {
-
-struct DictionaryCollector {
- const DictionaryFieldMapper& mapper_;
- DictionaryVector dictionaries_;
-
- Status WalkChildren(const FieldPosition& position, const DataType& type,
- const Array& array) {
- for (int i = 0; i < type.num_fields(); ++i) {
- auto boxed_child = MakeArray(array.data()->child_data[i]);
- RETURN_NOT_OK(Visit(position.child(i), type.field(i), boxed_child.get()));
- }
- return Status::OK();
- }
-
- Status Visit(const FieldPosition& position, const std::shared_ptr<Field>& field,
- const Array* array) {
- const DataType* type = array->type().get();
-
- if (type->id() == Type::EXTENSION) {
- type = checked_cast<const ExtensionType&>(*type).storage_type().get();
- array = checked_cast<const ExtensionArray&>(*array).storage().get();
- }
- if (type->id() == Type::DICTIONARY) {
- const auto& dict_array = checked_cast<const DictionaryArray&>(*array);
- auto dictionary = dict_array.dictionary();
-
- // Traverse the dictionary to first gather any nested dictionaries
- // (so that they appear in the output before their parent)
- const auto& dict_type = checked_cast<const DictionaryType&>(*type);
- RETURN_NOT_OK(WalkChildren(position, *dict_type.value_type(), *dictionary));
-
- // Then record the dictionary itself
- ARROW_ASSIGN_OR_RAISE(int64_t id, mapper_.GetFieldId(position.path()));
- dictionaries_.emplace_back(id, dictionary);
- } else {
- RETURN_NOT_OK(WalkChildren(position, *type, *array));
- }
- return Status::OK();
- }
-
- Status Collect(const RecordBatch& batch) {
- FieldPosition position;
- const Schema& schema = *batch.schema();
- dictionaries_.reserve(mapper_.num_fields());
-
- for (int i = 0; i < schema.num_fields(); ++i) {
- RETURN_NOT_OK(Visit(position.child(i), schema.field(i), batch.column(i).get()));
- }
- return Status::OK();
- }
-};
-
-struct DictionaryResolver {
- const DictionaryMemo& memo_;
- MemoryPool* pool_;
-
- Status VisitChildren(const ArrayDataVector& data_vector, FieldPosition parent_pos) {
- int i = 0;
- for (const auto& data : data_vector) {
- // Some data entries may be missing if reading only a subset of the schema
- if (data != nullptr) {
- RETURN_NOT_OK(VisitField(parent_pos.child(i), data.get()));
- }
- ++i;
- }
- return Status::OK();
- }
-
- Status VisitField(FieldPosition field_pos, ArrayData* data) {
- const DataType* type = data->type.get();
- if (type->id() == Type::EXTENSION) {
- type = checked_cast<const ExtensionType&>(*type).storage_type().get();
- }
- if (type->id() == Type::DICTIONARY) {
- ARROW_ASSIGN_OR_RAISE(const int64_t id,
- memo_.fields().GetFieldId(field_pos.path()));
- ARROW_ASSIGN_OR_RAISE(data->dictionary, memo_.GetDictionary(id, pool_));
- // Resolve nested dictionary data
- RETURN_NOT_OK(VisitField(field_pos, data->dictionary.get()));
- }
- // Resolve child data
- return VisitChildren(data->child_data, field_pos);
- }
-};
-
-} // namespace
-
-Result<DictionaryVector> CollectDictionaries(const RecordBatch& batch,
- const DictionaryFieldMapper& mapper) {
- DictionaryCollector collector{mapper, {}};
- RETURN_NOT_OK(collector.Collect(batch));
- return std::move(collector.dictionaries_);
-}
-
-namespace internal {
-
-Status CollectDictionaries(const RecordBatch& batch, DictionaryMemo* memo) {
- RETURN_NOT_OK(memo->fields().AddSchemaFields(*batch.schema()));
- ARROW_ASSIGN_OR_RAISE(const auto dictionaries,
- CollectDictionaries(batch, memo->fields()));
- for (const auto& pair : dictionaries) {
- RETURN_NOT_OK(memo->AddDictionary(pair.first, pair.second->data()));
- }
- return Status::OK();
-}
-
-} // namespace internal
-
-Status ResolveDictionaries(const ArrayDataVector& columns, const DictionaryMemo& memo,
- MemoryPool* pool) {
- DictionaryResolver resolver{memo, pool};
- return resolver.VisitChildren(columns, FieldPosition());
-}
-
-} // namespace ipc
-} // namespace arrow
+ to_combine.push_back(MakeArray(data));
+ }
+ ARROW_ASSIGN_OR_RAISE(auto combined_dict, Concatenate(to_combine, pool));
+ *data_vector = {combined_dict->data()};
+ }
+
+ return data_vector->back();
+ }
+};
+
+DictionaryMemo::DictionaryMemo() : impl_(new Impl()) {}
+
+DictionaryMemo::~DictionaryMemo() {}
+
+DictionaryFieldMapper& DictionaryMemo::fields() { return impl_->mapper_; }
+
+const DictionaryFieldMapper& DictionaryMemo::fields() const { return impl_->mapper_; }
+
+Result<std::shared_ptr<DataType>> DictionaryMemo::GetDictionaryType(int64_t id) const {
+ const auto it = impl_->id_to_type_.find(id);
+ if (it == impl_->id_to_type_.end()) {
+ return Status::KeyError("No record of dictionary type with id ", id);
+ }
+ return it->second;
+}
+
+// Returns KeyError if dictionary not found
+Result<std::shared_ptr<ArrayData>> DictionaryMemo::GetDictionary(int64_t id,
+ MemoryPool* pool) const {
+ return impl_->ReifyDictionary(id, pool);
+}
+
+Status DictionaryMemo::AddDictionaryType(int64_t id,
+ const std::shared_ptr<DataType>& type) {
+ // AddDictionaryType expects the dict value type
+ DCHECK_NE(type->id(), Type::DICTIONARY);
+ const auto pair = impl_->id_to_type_.emplace(id, type);
+ if (!pair.second && !pair.first->second->Equals(*type)) {
+ return Status::KeyError("Conflicting dictionary types for id ", id);
+ }
+ return Status::OK();
+}
+
+bool DictionaryMemo::HasDictionary(int64_t id) const {
+ const auto it = impl_->id_to_dictionary_.find(id);
+ return it != impl_->id_to_dictionary_.end();
+}
+
+Status DictionaryMemo::AddDictionary(int64_t id,
+ const std::shared_ptr<ArrayData>& dictionary) {
+ const auto pair = impl_->id_to_dictionary_.emplace(id, ArrayDataVector{dictionary});
+ if (!pair.second) {
+ return Status::KeyError("Dictionary with id ", id, " already exists");
+ }
+ return Status::OK();
+}
+
+Status DictionaryMemo::AddDictionaryDelta(int64_t id,
+ const std::shared_ptr<ArrayData>& dictionary) {
+ ARROW_ASSIGN_OR_RAISE(auto it, impl_->FindDictionary(id));
+ it->second.push_back(dictionary);
+ return Status::OK();
+}
+
+Result<bool> DictionaryMemo::AddOrReplaceDictionary(
+ int64_t id, const std::shared_ptr<ArrayData>& dictionary) {
+ ArrayDataVector value{dictionary};
+
+ auto pair = impl_->id_to_dictionary_.emplace(id, value);
+ if (pair.second) {
+ // Inserted
+ return true;
+ } else {
+ // Update existing value
+ pair.first->second = std::move(value);
+ return false;
+ }
+}
+
+// ----------------------------------------------------------------------
+// CollectDictionaries implementation
+
+namespace {
+
+struct DictionaryCollector {
+ const DictionaryFieldMapper& mapper_;
+ DictionaryVector dictionaries_;
+
+ Status WalkChildren(const FieldPosition& position, const DataType& type,
+ const Array& array) {
+ for (int i = 0; i < type.num_fields(); ++i) {
+ auto boxed_child = MakeArray(array.data()->child_data[i]);
+ RETURN_NOT_OK(Visit(position.child(i), type.field(i), boxed_child.get()));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const FieldPosition& position, const std::shared_ptr<Field>& field,
+ const Array* array) {
+ const DataType* type = array->type().get();
+
+ if (type->id() == Type::EXTENSION) {
+ type = checked_cast<const ExtensionType&>(*type).storage_type().get();
+ array = checked_cast<const ExtensionArray&>(*array).storage().get();
+ }
+ if (type->id() == Type::DICTIONARY) {
+ const auto& dict_array = checked_cast<const DictionaryArray&>(*array);
+ auto dictionary = dict_array.dictionary();
+
+ // Traverse the dictionary to first gather any nested dictionaries
+ // (so that they appear in the output before their parent)
+ const auto& dict_type = checked_cast<const DictionaryType&>(*type);
+ RETURN_NOT_OK(WalkChildren(position, *dict_type.value_type(), *dictionary));
+
+ // Then record the dictionary itself
+ ARROW_ASSIGN_OR_RAISE(int64_t id, mapper_.GetFieldId(position.path()));
+ dictionaries_.emplace_back(id, dictionary);
+ } else {
+ RETURN_NOT_OK(WalkChildren(position, *type, *array));
+ }
+ return Status::OK();
+ }
+
+ Status Collect(const RecordBatch& batch) {
+ FieldPosition position;
+ const Schema& schema = *batch.schema();
+ dictionaries_.reserve(mapper_.num_fields());
+
+ for (int i = 0; i < schema.num_fields(); ++i) {
+ RETURN_NOT_OK(Visit(position.child(i), schema.field(i), batch.column(i).get()));
+ }
+ return Status::OK();
+ }
+};
+
+struct DictionaryResolver {
+ const DictionaryMemo& memo_;
+ MemoryPool* pool_;
+
+ Status VisitChildren(const ArrayDataVector& data_vector, FieldPosition parent_pos) {
+ int i = 0;
+ for (const auto& data : data_vector) {
+ // Some data entries may be missing if reading only a subset of the schema
+ if (data != nullptr) {
+ RETURN_NOT_OK(VisitField(parent_pos.child(i), data.get()));
+ }
+ ++i;
+ }
+ return Status::OK();
+ }
+
+ Status VisitField(FieldPosition field_pos, ArrayData* data) {
+ const DataType* type = data->type.get();
+ if (type->id() == Type::EXTENSION) {
+ type = checked_cast<const ExtensionType&>(*type).storage_type().get();
+ }
+ if (type->id() == Type::DICTIONARY) {
+ ARROW_ASSIGN_OR_RAISE(const int64_t id,
+ memo_.fields().GetFieldId(field_pos.path()));
+ ARROW_ASSIGN_OR_RAISE(data->dictionary, memo_.GetDictionary(id, pool_));
+ // Resolve nested dictionary data
+ RETURN_NOT_OK(VisitField(field_pos, data->dictionary.get()));
+ }
+ // Resolve child data
+ return VisitChildren(data->child_data, field_pos);
+ }
+};
+
+} // namespace
+
+Result<DictionaryVector> CollectDictionaries(const RecordBatch& batch,
+ const DictionaryFieldMapper& mapper) {
+ DictionaryCollector collector{mapper, {}};
+ RETURN_NOT_OK(collector.Collect(batch));
+ return std::move(collector.dictionaries_);
+}
+
+namespace internal {
+
+Status CollectDictionaries(const RecordBatch& batch, DictionaryMemo* memo) {
+ RETURN_NOT_OK(memo->fields().AddSchemaFields(*batch.schema()));
+ ARROW_ASSIGN_OR_RAISE(const auto dictionaries,
+ CollectDictionaries(batch, memo->fields()));
+ for (const auto& pair : dictionaries) {
+ RETURN_NOT_OK(memo->AddDictionary(pair.first, pair.second->data()));
+ }
+ return Status::OK();
+}
+
+} // namespace internal
+
+Status ResolveDictionaries(const ArrayDataVector& columns, const DictionaryMemo& memo,
+ MemoryPool* pool) {
+ DictionaryResolver resolver{memo, pool};
+ return resolver.VisitChildren(columns, FieldPosition());
+}
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
index e4287cb1974..29f8144372b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
@@ -1,177 +1,177 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Tools for dictionaries in IPC context
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace ipc {
-
-namespace internal {
-
-class FieldPosition {
- public:
- FieldPosition() : parent_(NULLPTR), index_(-1), depth_(0) {}
-
- FieldPosition child(int index) const { return {this, index}; }
-
- std::vector<int> path() const {
- std::vector<int> path(depth_);
- const FieldPosition* cur = this;
- for (int i = depth_ - 1; i >= 0; --i) {
- path[i] = cur->index_;
- cur = cur->parent_;
- }
- return path;
- }
-
- protected:
- FieldPosition(const FieldPosition* parent, int index)
- : parent_(parent), index_(index), depth_(parent->depth_ + 1) {}
-
- const FieldPosition* parent_;
- int index_;
- int depth_;
-};
-
-} // namespace internal
-
-/// \brief Map fields in a schema to dictionary ids
-///
-/// The mapping is structural, i.e. the field path (as a vector of indices)
-/// is associated to the dictionary id. A dictionary id may be associated
-/// to multiple fields.
-class ARROW_EXPORT DictionaryFieldMapper {
- public:
- DictionaryFieldMapper();
- explicit DictionaryFieldMapper(const Schema& schema);
- ~DictionaryFieldMapper();
-
- Status AddSchemaFields(const Schema& schema);
- Status AddField(int64_t id, std::vector<int> field_path);
-
- Result<int64_t> GetFieldId(std::vector<int> field_path) const;
-
- int num_fields() const;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Tools for dictionaries in IPC context
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace ipc {
+
+namespace internal {
+
+class FieldPosition {
+ public:
+ FieldPosition() : parent_(NULLPTR), index_(-1), depth_(0) {}
+
+ FieldPosition child(int index) const { return {this, index}; }
+
+ std::vector<int> path() const {
+ std::vector<int> path(depth_);
+ const FieldPosition* cur = this;
+ for (int i = depth_ - 1; i >= 0; --i) {
+ path[i] = cur->index_;
+ cur = cur->parent_;
+ }
+ return path;
+ }
+
+ protected:
+ FieldPosition(const FieldPosition* parent, int index)
+ : parent_(parent), index_(index), depth_(parent->depth_ + 1) {}
+
+ const FieldPosition* parent_;
+ int index_;
+ int depth_;
+};
+
+} // namespace internal
+
+/// \brief Map fields in a schema to dictionary ids
+///
+/// The mapping is structural, i.e. the field path (as a vector of indices)
+/// is associated to the dictionary id. A dictionary id may be associated
+/// to multiple fields.
+class ARROW_EXPORT DictionaryFieldMapper {
+ public:
+ DictionaryFieldMapper();
+ explicit DictionaryFieldMapper(const Schema& schema);
+ ~DictionaryFieldMapper();
+
+ Status AddSchemaFields(const Schema& schema);
+ Status AddField(int64_t id, std::vector<int> field_path);
+
+ Result<int64_t> GetFieldId(std::vector<int> field_path) const;
+
+ int num_fields() const;
+
/// \brief Returns number of unique dictionaries, taking into
/// account that different fields can share the same dictionary.
int num_dicts() const;
- private:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-using DictionaryVector = std::vector<std::pair<int64_t, std::shared_ptr<Array>>>;
-
-/// \brief Memoization data structure for reading dictionaries from IPC streams
-///
-/// This structure tracks the following associations:
-/// - field position (structural) -> dictionary id
-/// - dictionary id -> value type
-/// - dictionary id -> dictionary (value) data
-///
-/// Together, they allow resolving dictionary data when reading an IPC stream,
-/// using metadata recorded in the schema message and data recorded in the
-/// dictionary batch messages (see ResolveDictionaries).
-///
-/// This structure isn't useful for writing an IPC stream, where only
-/// DictionaryFieldMapper is necessary.
-class ARROW_EXPORT DictionaryMemo {
- public:
- DictionaryMemo();
- ~DictionaryMemo();
-
- DictionaryFieldMapper& fields();
- const DictionaryFieldMapper& fields() const;
-
- /// \brief Return current dictionary corresponding to a particular
- /// id. Returns KeyError if id not found
- Result<std::shared_ptr<ArrayData>> GetDictionary(int64_t id, MemoryPool* pool) const;
-
- /// \brief Return dictionary value type corresponding to a
- /// particular dictionary id.
- Result<std::shared_ptr<DataType>> GetDictionaryType(int64_t id) const;
-
- /// \brief Return true if we have a dictionary for the input id
- bool HasDictionary(int64_t id) const;
-
- /// \brief Add a dictionary value type to the memo with a particular id.
- /// Returns KeyError if a different type is already registered with the same id.
- Status AddDictionaryType(int64_t id, const std::shared_ptr<DataType>& type);
-
- /// \brief Add a dictionary to the memo with a particular id. Returns
- /// KeyError if that dictionary already exists
- Status AddDictionary(int64_t id, const std::shared_ptr<ArrayData>& dictionary);
-
- /// \brief Append a dictionary delta to the memo with a particular id. Returns
- /// KeyError if that dictionary does not exists
- Status AddDictionaryDelta(int64_t id, const std::shared_ptr<ArrayData>& dictionary);
-
- /// \brief Add a dictionary to the memo if it does not have one with the id,
- /// otherwise, replace the dictionary with the new one.
- ///
- /// Return true if the dictionary was added, false if replaced.
- Result<bool> AddOrReplaceDictionary(int64_t id,
- const std::shared_ptr<ArrayData>& dictionary);
-
- private:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-// For writing: collect dictionary entries to write to the IPC stream, in order
-// (i.e. inner dictionaries before dependent outer dictionaries).
-ARROW_EXPORT
-Result<DictionaryVector> CollectDictionaries(const RecordBatch& batch,
- const DictionaryFieldMapper& mapper);
-
-// For reading: resolve all dictionaries in columns, according to the field
-// mapping and dictionary arrays stored in memo.
-// Columns may be sparse, i.e. some entries may be left null
-// (e.g. if an inclusion mask was used).
-ARROW_EXPORT
-Status ResolveDictionaries(const ArrayDataVector& columns, const DictionaryMemo& memo,
- MemoryPool* pool);
-
-namespace internal {
-
-// Like CollectDictionaries above, but uses the memo's DictionaryFieldMapper
-// and all collected dictionaries are added to the memo using AddDictionary.
-//
-// This is used as a shortcut in some roundtripping tests (to avoid emitting
-// any actual dictionary batches).
-ARROW_EXPORT
-Status CollectDictionaries(const RecordBatch& batch, DictionaryMemo* memo);
-
-} // namespace internal
-
-} // namespace ipc
-} // namespace arrow
+ private:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+using DictionaryVector = std::vector<std::pair<int64_t, std::shared_ptr<Array>>>;
+
+/// \brief Memoization data structure for reading dictionaries from IPC streams
+///
+/// This structure tracks the following associations:
+/// - field position (structural) -> dictionary id
+/// - dictionary id -> value type
+/// - dictionary id -> dictionary (value) data
+///
+/// Together, they allow resolving dictionary data when reading an IPC stream,
+/// using metadata recorded in the schema message and data recorded in the
+/// dictionary batch messages (see ResolveDictionaries).
+///
+/// This structure isn't useful for writing an IPC stream, where only
+/// DictionaryFieldMapper is necessary.
+class ARROW_EXPORT DictionaryMemo {
+ public:
+ DictionaryMemo();
+ ~DictionaryMemo();
+
+ DictionaryFieldMapper& fields();
+ const DictionaryFieldMapper& fields() const;
+
+ /// \brief Return current dictionary corresponding to a particular
+ /// id. Returns KeyError if id not found
+ Result<std::shared_ptr<ArrayData>> GetDictionary(int64_t id, MemoryPool* pool) const;
+
+ /// \brief Return dictionary value type corresponding to a
+ /// particular dictionary id.
+ Result<std::shared_ptr<DataType>> GetDictionaryType(int64_t id) const;
+
+ /// \brief Return true if we have a dictionary for the input id
+ bool HasDictionary(int64_t id) const;
+
+ /// \brief Add a dictionary value type to the memo with a particular id.
+ /// Returns KeyError if a different type is already registered with the same id.
+ Status AddDictionaryType(int64_t id, const std::shared_ptr<DataType>& type);
+
+ /// \brief Add a dictionary to the memo with a particular id. Returns
+ /// KeyError if that dictionary already exists
+ Status AddDictionary(int64_t id, const std::shared_ptr<ArrayData>& dictionary);
+
+ /// \brief Append a dictionary delta to the memo with a particular id. Returns
+ /// KeyError if that dictionary does not exists
+ Status AddDictionaryDelta(int64_t id, const std::shared_ptr<ArrayData>& dictionary);
+
+ /// \brief Add a dictionary to the memo if it does not have one with the id,
+ /// otherwise, replace the dictionary with the new one.
+ ///
+ /// Return true if the dictionary was added, false if replaced.
+ Result<bool> AddOrReplaceDictionary(int64_t id,
+ const std::shared_ptr<ArrayData>& dictionary);
+
+ private:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+// For writing: collect dictionary entries to write to the IPC stream, in order
+// (i.e. inner dictionaries before dependent outer dictionaries).
+ARROW_EXPORT
+Result<DictionaryVector> CollectDictionaries(const RecordBatch& batch,
+ const DictionaryFieldMapper& mapper);
+
+// For reading: resolve all dictionaries in columns, according to the field
+// mapping and dictionary arrays stored in memo.
+// Columns may be sparse, i.e. some entries may be left null
+// (e.g. if an inclusion mask was used).
+ARROW_EXPORT
+Status ResolveDictionaries(const ArrayDataVector& columns, const DictionaryMemo& memo,
+ MemoryPool* pool);
+
+namespace internal {
+
+// Like CollectDictionaries above, but uses the memo's DictionaryFieldMapper
+// and all collected dictionaries are added to the memo using AddDictionary.
+//
+// This is used as a shortcut in some roundtripping tests (to avoid emitting
+// any actual dictionary batches).
+ARROW_EXPORT
+Status CollectDictionaries(const RecordBatch& batch, DictionaryMemo* memo);
+
+} // namespace internal
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
index b1c30eec0b3..0e242550845 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
@@ -1,819 +1,819 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/feather.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <sstream> // IWYU pragma: keep
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include <flatbuffers/flatbuffers.h>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/chunked_array.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/ipc/metadata_internal.h"
-#include "arrow/ipc/options.h"
-#include "arrow/ipc/reader.h"
-#include "arrow/ipc/util.h"
-#include "arrow/ipc/writer.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/make_unique.h"
-#include "arrow/visitor_inline.h"
-
-#include "generated/feather_generated.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::make_unique;
-
-class ExtensionType;
-
-namespace ipc {
-namespace feather {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/feather.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sstream> // IWYU pragma: keep
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <flatbuffers/flatbuffers.h>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/chunked_array.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/ipc/metadata_internal.h"
+#include "arrow/ipc/options.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/util.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/visitor_inline.h"
+
+#include "generated/feather_generated.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::make_unique;
+
+class ExtensionType;
+
+namespace ipc {
+namespace feather {
+
namespace {
-
+
using FBB = flatbuffers::FlatBufferBuilder;
-
+
constexpr const char* kFeatherV1MagicBytes = "FEA1";
constexpr const int kFeatherDefaultAlignment = 8;
const uint8_t kPaddingBytes[kFeatherDefaultAlignment] = {0};
inline int64_t PaddedLength(int64_t nbytes) {
- static const int64_t alignment = kFeatherDefaultAlignment;
- return ((nbytes + alignment - 1) / alignment) * alignment;
-}
-
-Status WritePaddedWithOffset(io::OutputStream* stream, const uint8_t* data,
- int64_t bit_offset, const int64_t length,
- int64_t* bytes_written) {
- data = data + bit_offset / 8;
- uint8_t bit_shift = static_cast<uint8_t>(bit_offset % 8);
- if (bit_offset == 0) {
- RETURN_NOT_OK(stream->Write(data, length));
- } else {
- constexpr int64_t buffersize = 256;
- uint8_t buffer[buffersize];
- const uint8_t lshift = static_cast<uint8_t>(8 - bit_shift);
- const uint8_t* buffer_end = buffer + buffersize;
- uint8_t* buffer_it = buffer;
-
- for (const uint8_t* end = data + length; data != end;) {
- uint8_t r = static_cast<uint8_t>(*data++ >> bit_shift);
- uint8_t l = static_cast<uint8_t>(*data << lshift);
- uint8_t value = l | r;
- *buffer_it++ = value;
- if (buffer_it == buffer_end) {
- RETURN_NOT_OK(stream->Write(buffer, buffersize));
- buffer_it = buffer;
- }
- }
- if (buffer_it != buffer) {
- RETURN_NOT_OK(stream->Write(buffer, buffer_it - buffer));
- }
- }
-
- int64_t remainder = PaddedLength(length) - length;
- if (remainder != 0) {
- RETURN_NOT_OK(stream->Write(kPaddingBytes, remainder));
- }
- *bytes_written = length + remainder;
- return Status::OK();
-}
-
-Status WritePadded(io::OutputStream* stream, const uint8_t* data, int64_t length,
- int64_t* bytes_written) {
- return WritePaddedWithOffset(stream, data, /*bit_offset=*/0, length, bytes_written);
-}
-
-struct ColumnType {
- enum type { PRIMITIVE, CATEGORY, TIMESTAMP, DATE, TIME };
-};
-
+ static const int64_t alignment = kFeatherDefaultAlignment;
+ return ((nbytes + alignment - 1) / alignment) * alignment;
+}
+
+Status WritePaddedWithOffset(io::OutputStream* stream, const uint8_t* data,
+ int64_t bit_offset, const int64_t length,
+ int64_t* bytes_written) {
+ data = data + bit_offset / 8;
+ uint8_t bit_shift = static_cast<uint8_t>(bit_offset % 8);
+ if (bit_offset == 0) {
+ RETURN_NOT_OK(stream->Write(data, length));
+ } else {
+ constexpr int64_t buffersize = 256;
+ uint8_t buffer[buffersize];
+ const uint8_t lshift = static_cast<uint8_t>(8 - bit_shift);
+ const uint8_t* buffer_end = buffer + buffersize;
+ uint8_t* buffer_it = buffer;
+
+ for (const uint8_t* end = data + length; data != end;) {
+ uint8_t r = static_cast<uint8_t>(*data++ >> bit_shift);
+ uint8_t l = static_cast<uint8_t>(*data << lshift);
+ uint8_t value = l | r;
+ *buffer_it++ = value;
+ if (buffer_it == buffer_end) {
+ RETURN_NOT_OK(stream->Write(buffer, buffersize));
+ buffer_it = buffer;
+ }
+ }
+ if (buffer_it != buffer) {
+ RETURN_NOT_OK(stream->Write(buffer, buffer_it - buffer));
+ }
+ }
+
+ int64_t remainder = PaddedLength(length) - length;
+ if (remainder != 0) {
+ RETURN_NOT_OK(stream->Write(kPaddingBytes, remainder));
+ }
+ *bytes_written = length + remainder;
+ return Status::OK();
+}
+
+Status WritePadded(io::OutputStream* stream, const uint8_t* data, int64_t length,
+ int64_t* bytes_written) {
+ return WritePaddedWithOffset(stream, data, /*bit_offset=*/0, length, bytes_written);
+}
+
+struct ColumnType {
+ enum type { PRIMITIVE, CATEGORY, TIMESTAMP, DATE, TIME };
+};
+
inline TimeUnit::type FromFlatbufferEnum(fbs::TimeUnit unit) {
- return static_cast<TimeUnit::type>(static_cast<int>(unit));
-}
-
-/// For compatibility, we need to write any data sometimes just to keep producing
-/// files that can be read with an older reader.
+ return static_cast<TimeUnit::type>(static_cast<int>(unit));
+}
+
+/// For compatibility, we need to write any data sometimes just to keep producing
+/// files that can be read with an older reader.
Status WritePaddedBlank(io::OutputStream* stream, int64_t length,
int64_t* bytes_written) {
- const uint8_t null = 0;
- for (int64_t i = 0; i < length; i++) {
- RETURN_NOT_OK(stream->Write(&null, 1));
- }
- int64_t remainder = PaddedLength(length) - length;
- if (remainder != 0) {
- RETURN_NOT_OK(stream->Write(kPaddingBytes, remainder));
- }
- *bytes_written = length + remainder;
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// ReaderV1
-
-class ReaderV1 : public Reader {
- public:
- Status Open(const std::shared_ptr<io::RandomAccessFile>& source) {
- source_ = source;
-
- ARROW_ASSIGN_OR_RAISE(int64_t size, source->GetSize());
- int magic_size = static_cast<int>(strlen(kFeatherV1MagicBytes));
- int footer_size = magic_size + static_cast<int>(sizeof(uint32_t));
-
- // Now get the footer and verify
- ARROW_ASSIGN_OR_RAISE(auto buffer, source->ReadAt(size - footer_size, footer_size));
-
- if (memcmp(buffer->data() + sizeof(uint32_t), kFeatherV1MagicBytes, magic_size)) {
- return Status::Invalid("Feather file footer incomplete");
- }
-
- uint32_t metadata_length = *reinterpret_cast<const uint32_t*>(buffer->data());
- if (size < magic_size + footer_size + metadata_length) {
- return Status::Invalid("File is smaller than indicated metadata size");
- }
- ARROW_ASSIGN_OR_RAISE(
- metadata_buffer_,
- source->ReadAt(size - footer_size - metadata_length, metadata_length));
-
- metadata_ = fbs::GetCTable(metadata_buffer_->data());
- return ReadSchema();
- }
-
- Status ReadSchema() {
- std::vector<std::shared_ptr<Field>> fields;
- for (int i = 0; i < static_cast<int>(metadata_->columns()->size()); ++i) {
- const fbs::Column* col = metadata_->columns()->Get(i);
- std::shared_ptr<DataType> type;
- RETURN_NOT_OK(
- GetDataType(col->values(), col->metadata_type(), col->metadata(), &type));
- fields.push_back(::arrow::field(col->name()->str(), type));
- }
+ const uint8_t null = 0;
+ for (int64_t i = 0; i < length; i++) {
+ RETURN_NOT_OK(stream->Write(&null, 1));
+ }
+ int64_t remainder = PaddedLength(length) - length;
+ if (remainder != 0) {
+ RETURN_NOT_OK(stream->Write(kPaddingBytes, remainder));
+ }
+ *bytes_written = length + remainder;
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// ReaderV1
+
+class ReaderV1 : public Reader {
+ public:
+ Status Open(const std::shared_ptr<io::RandomAccessFile>& source) {
+ source_ = source;
+
+ ARROW_ASSIGN_OR_RAISE(int64_t size, source->GetSize());
+ int magic_size = static_cast<int>(strlen(kFeatherV1MagicBytes));
+ int footer_size = magic_size + static_cast<int>(sizeof(uint32_t));
+
+ // Now get the footer and verify
+ ARROW_ASSIGN_OR_RAISE(auto buffer, source->ReadAt(size - footer_size, footer_size));
+
+ if (memcmp(buffer->data() + sizeof(uint32_t), kFeatherV1MagicBytes, magic_size)) {
+ return Status::Invalid("Feather file footer incomplete");
+ }
+
+ uint32_t metadata_length = *reinterpret_cast<const uint32_t*>(buffer->data());
+ if (size < magic_size + footer_size + metadata_length) {
+ return Status::Invalid("File is smaller than indicated metadata size");
+ }
+ ARROW_ASSIGN_OR_RAISE(
+ metadata_buffer_,
+ source->ReadAt(size - footer_size - metadata_length, metadata_length));
+
+ metadata_ = fbs::GetCTable(metadata_buffer_->data());
+ return ReadSchema();
+ }
+
+ Status ReadSchema() {
+ std::vector<std::shared_ptr<Field>> fields;
+ for (int i = 0; i < static_cast<int>(metadata_->columns()->size()); ++i) {
+ const fbs::Column* col = metadata_->columns()->Get(i);
+ std::shared_ptr<DataType> type;
+ RETURN_NOT_OK(
+ GetDataType(col->values(), col->metadata_type(), col->metadata(), &type));
+ fields.push_back(::arrow::field(col->name()->str(), type));
+ }
schema_ = ::arrow::schema(std::move(fields));
- return Status::OK();
- }
-
- Status GetDataType(const fbs::PrimitiveArray* values, fbs::TypeMetadata metadata_type,
- const void* metadata, std::shared_ptr<DataType>* out) {
-#define PRIMITIVE_CASE(CAP_TYPE, FACTORY_FUNC) \
- case fbs::Type::CAP_TYPE: \
- *out = FACTORY_FUNC(); \
- break;
-
- switch (metadata_type) {
- case fbs::TypeMetadata::CategoryMetadata: {
- auto meta = static_cast<const fbs::CategoryMetadata*>(metadata);
-
- std::shared_ptr<DataType> index_type, dict_type;
- RETURN_NOT_OK(GetDataType(values, fbs::TypeMetadata::NONE, nullptr, &index_type));
- RETURN_NOT_OK(
- GetDataType(meta->levels(), fbs::TypeMetadata::NONE, nullptr, &dict_type));
- *out = dictionary(index_type, dict_type, meta->ordered());
- break;
- }
- case fbs::TypeMetadata::TimestampMetadata: {
- auto meta = static_cast<const fbs::TimestampMetadata*>(metadata);
- TimeUnit::type unit = FromFlatbufferEnum(meta->unit());
- std::string tz;
- // flatbuffer non-null
- if (meta->timezone() != 0) {
- tz = meta->timezone()->str();
- } else {
- tz = "";
- }
- *out = timestamp(unit, tz);
- } break;
- case fbs::TypeMetadata::DateMetadata:
- *out = date32();
- break;
- case fbs::TypeMetadata::TimeMetadata: {
- auto meta = static_cast<const fbs::TimeMetadata*>(metadata);
- *out = time32(FromFlatbufferEnum(meta->unit()));
- } break;
- default:
- switch (values->type()) {
- PRIMITIVE_CASE(BOOL, boolean);
- PRIMITIVE_CASE(INT8, int8);
- PRIMITIVE_CASE(INT16, int16);
- PRIMITIVE_CASE(INT32, int32);
- PRIMITIVE_CASE(INT64, int64);
- PRIMITIVE_CASE(UINT8, uint8);
- PRIMITIVE_CASE(UINT16, uint16);
- PRIMITIVE_CASE(UINT32, uint32);
- PRIMITIVE_CASE(UINT64, uint64);
- PRIMITIVE_CASE(FLOAT, float32);
- PRIMITIVE_CASE(DOUBLE, float64);
- PRIMITIVE_CASE(UTF8, utf8);
- PRIMITIVE_CASE(BINARY, binary);
- PRIMITIVE_CASE(LARGE_UTF8, large_utf8);
- PRIMITIVE_CASE(LARGE_BINARY, large_binary);
- default:
- return Status::Invalid("Unrecognized type");
- }
- break;
- }
-
-#undef PRIMITIVE_CASE
-
- return Status::OK();
- }
-
- int64_t GetOutputLength(int64_t nbytes) {
- // XXX: Hack for Feather 0.3.0 for backwards compatibility with old files
- // Size in-file of written byte buffer
- if (version() < 2) {
- // Feather files < 0.3.0
- return nbytes;
- } else {
- return PaddedLength(nbytes);
- }
- }
-
- // Retrieve a primitive array from the data source
- //
- // @returns: a Buffer instance, the precise type will depend on the kind of
- // input data source (which may or may not have memory-map like semantics)
- Status LoadValues(std::shared_ptr<DataType> type, const fbs::PrimitiveArray* meta,
- fbs::TypeMetadata metadata_type, const void* metadata,
- std::shared_ptr<ArrayData>* out) {
- std::vector<std::shared_ptr<Buffer>> buffers;
-
- // Buffer data from the source (may or may not perform a copy depending on
- // input source)
- ARROW_ASSIGN_OR_RAISE(auto buffer,
- source_->ReadAt(meta->offset(), meta->total_bytes()));
-
- int64_t offset = 0;
-
- if (type->id() == Type::DICTIONARY) {
- // Load the index type values
- type = checked_cast<const DictionaryType&>(*type).index_type();
- }
-
- // If there are nulls, the null bitmask is first
- if (meta->null_count() > 0) {
- int64_t null_bitmap_size = GetOutputLength(BitUtil::BytesForBits(meta->length()));
- buffers.push_back(SliceBuffer(buffer, offset, null_bitmap_size));
- offset += null_bitmap_size;
- } else {
- buffers.push_back(nullptr);
- }
-
- if (is_binary_like(type->id())) {
- int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int32_t));
- buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
- offset += offsets_size;
- } else if (is_large_binary_like(type->id())) {
- int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int64_t));
- buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
- offset += offsets_size;
- }
-
- buffers.push_back(SliceBuffer(buffer, offset, buffer->size() - offset));
-
- *out = ArrayData::Make(type, meta->length(), std::move(buffers), meta->null_count());
- return Status::OK();
- }
-
- int version() const override { return metadata_->version(); }
- int64_t num_rows() const { return metadata_->num_rows(); }
-
- std::shared_ptr<Schema> schema() const override { return schema_; }
-
- Status GetDictionary(int field_index, std::shared_ptr<ArrayData>* out) {
- const fbs::Column* col_meta = metadata_->columns()->Get(field_index);
- auto dict_meta = col_meta->metadata_as<fbs::CategoryMetadata>();
- const auto& dict_type =
- checked_cast<const DictionaryType&>(*schema_->field(field_index)->type());
-
- return LoadValues(dict_type.value_type(), dict_meta->levels(),
- fbs::TypeMetadata::NONE, nullptr, out);
- }
-
- Status GetColumn(int field_index, std::shared_ptr<ChunkedArray>* out) {
- const fbs::Column* col_meta = metadata_->columns()->Get(field_index);
- std::shared_ptr<ArrayData> data;
-
- auto type = schema_->field(field_index)->type();
- RETURN_NOT_OK(LoadValues(type, col_meta->values(), col_meta->metadata_type(),
- col_meta->metadata(), &data));
-
- if (type->id() == Type::DICTIONARY) {
- RETURN_NOT_OK(GetDictionary(field_index, &data->dictionary));
- data->type = type;
- }
- *out = std::make_shared<ChunkedArray>(MakeArray(data));
- return Status::OK();
- }
-
- Status Read(std::shared_ptr<Table>* out) override {
- std::vector<std::shared_ptr<ChunkedArray>> columns;
- for (int i = 0; i < static_cast<int>(metadata_->columns()->size()); ++i) {
- columns.emplace_back();
- RETURN_NOT_OK(GetColumn(i, &columns.back()));
- }
+ return Status::OK();
+ }
+
+ Status GetDataType(const fbs::PrimitiveArray* values, fbs::TypeMetadata metadata_type,
+ const void* metadata, std::shared_ptr<DataType>* out) {
+#define PRIMITIVE_CASE(CAP_TYPE, FACTORY_FUNC) \
+ case fbs::Type::CAP_TYPE: \
+ *out = FACTORY_FUNC(); \
+ break;
+
+ switch (metadata_type) {
+ case fbs::TypeMetadata::CategoryMetadata: {
+ auto meta = static_cast<const fbs::CategoryMetadata*>(metadata);
+
+ std::shared_ptr<DataType> index_type, dict_type;
+ RETURN_NOT_OK(GetDataType(values, fbs::TypeMetadata::NONE, nullptr, &index_type));
+ RETURN_NOT_OK(
+ GetDataType(meta->levels(), fbs::TypeMetadata::NONE, nullptr, &dict_type));
+ *out = dictionary(index_type, dict_type, meta->ordered());
+ break;
+ }
+ case fbs::TypeMetadata::TimestampMetadata: {
+ auto meta = static_cast<const fbs::TimestampMetadata*>(metadata);
+ TimeUnit::type unit = FromFlatbufferEnum(meta->unit());
+ std::string tz;
+ // flatbuffer non-null
+ if (meta->timezone() != 0) {
+ tz = meta->timezone()->str();
+ } else {
+ tz = "";
+ }
+ *out = timestamp(unit, tz);
+ } break;
+ case fbs::TypeMetadata::DateMetadata:
+ *out = date32();
+ break;
+ case fbs::TypeMetadata::TimeMetadata: {
+ auto meta = static_cast<const fbs::TimeMetadata*>(metadata);
+ *out = time32(FromFlatbufferEnum(meta->unit()));
+ } break;
+ default:
+ switch (values->type()) {
+ PRIMITIVE_CASE(BOOL, boolean);
+ PRIMITIVE_CASE(INT8, int8);
+ PRIMITIVE_CASE(INT16, int16);
+ PRIMITIVE_CASE(INT32, int32);
+ PRIMITIVE_CASE(INT64, int64);
+ PRIMITIVE_CASE(UINT8, uint8);
+ PRIMITIVE_CASE(UINT16, uint16);
+ PRIMITIVE_CASE(UINT32, uint32);
+ PRIMITIVE_CASE(UINT64, uint64);
+ PRIMITIVE_CASE(FLOAT, float32);
+ PRIMITIVE_CASE(DOUBLE, float64);
+ PRIMITIVE_CASE(UTF8, utf8);
+ PRIMITIVE_CASE(BINARY, binary);
+ PRIMITIVE_CASE(LARGE_UTF8, large_utf8);
+ PRIMITIVE_CASE(LARGE_BINARY, large_binary);
+ default:
+ return Status::Invalid("Unrecognized type");
+ }
+ break;
+ }
+
+#undef PRIMITIVE_CASE
+
+ return Status::OK();
+ }
+
+ int64_t GetOutputLength(int64_t nbytes) {
+ // XXX: Hack for Feather 0.3.0 for backwards compatibility with old files
+ // Size in-file of written byte buffer
+ if (version() < 2) {
+ // Feather files < 0.3.0
+ return nbytes;
+ } else {
+ return PaddedLength(nbytes);
+ }
+ }
+
+ // Retrieve a primitive array from the data source
+ //
+ // @returns: a Buffer instance, the precise type will depend on the kind of
+ // input data source (which may or may not have memory-map like semantics)
+ Status LoadValues(std::shared_ptr<DataType> type, const fbs::PrimitiveArray* meta,
+ fbs::TypeMetadata metadata_type, const void* metadata,
+ std::shared_ptr<ArrayData>* out) {
+ std::vector<std::shared_ptr<Buffer>> buffers;
+
+ // Buffer data from the source (may or may not perform a copy depending on
+ // input source)
+ ARROW_ASSIGN_OR_RAISE(auto buffer,
+ source_->ReadAt(meta->offset(), meta->total_bytes()));
+
+ int64_t offset = 0;
+
+ if (type->id() == Type::DICTIONARY) {
+ // Load the index type values
+ type = checked_cast<const DictionaryType&>(*type).index_type();
+ }
+
+ // If there are nulls, the null bitmask is first
+ if (meta->null_count() > 0) {
+ int64_t null_bitmap_size = GetOutputLength(BitUtil::BytesForBits(meta->length()));
+ buffers.push_back(SliceBuffer(buffer, offset, null_bitmap_size));
+ offset += null_bitmap_size;
+ } else {
+ buffers.push_back(nullptr);
+ }
+
+ if (is_binary_like(type->id())) {
+ int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int32_t));
+ buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
+ offset += offsets_size;
+ } else if (is_large_binary_like(type->id())) {
+ int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int64_t));
+ buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
+ offset += offsets_size;
+ }
+
+ buffers.push_back(SliceBuffer(buffer, offset, buffer->size() - offset));
+
+ *out = ArrayData::Make(type, meta->length(), std::move(buffers), meta->null_count());
+ return Status::OK();
+ }
+
+ int version() const override { return metadata_->version(); }
+ int64_t num_rows() const { return metadata_->num_rows(); }
+
+ std::shared_ptr<Schema> schema() const override { return schema_; }
+
+ Status GetDictionary(int field_index, std::shared_ptr<ArrayData>* out) {
+ const fbs::Column* col_meta = metadata_->columns()->Get(field_index);
+ auto dict_meta = col_meta->metadata_as<fbs::CategoryMetadata>();
+ const auto& dict_type =
+ checked_cast<const DictionaryType&>(*schema_->field(field_index)->type());
+
+ return LoadValues(dict_type.value_type(), dict_meta->levels(),
+ fbs::TypeMetadata::NONE, nullptr, out);
+ }
+
+ Status GetColumn(int field_index, std::shared_ptr<ChunkedArray>* out) {
+ const fbs::Column* col_meta = metadata_->columns()->Get(field_index);
+ std::shared_ptr<ArrayData> data;
+
+ auto type = schema_->field(field_index)->type();
+ RETURN_NOT_OK(LoadValues(type, col_meta->values(), col_meta->metadata_type(),
+ col_meta->metadata(), &data));
+
+ if (type->id() == Type::DICTIONARY) {
+ RETURN_NOT_OK(GetDictionary(field_index, &data->dictionary));
+ data->type = type;
+ }
+ *out = std::make_shared<ChunkedArray>(MakeArray(data));
+ return Status::OK();
+ }
+
+ Status Read(std::shared_ptr<Table>* out) override {
+ std::vector<std::shared_ptr<ChunkedArray>> columns;
+ for (int i = 0; i < static_cast<int>(metadata_->columns()->size()); ++i) {
+ columns.emplace_back();
+ RETURN_NOT_OK(GetColumn(i, &columns.back()));
+ }
*out = Table::Make(this->schema(), std::move(columns), this->num_rows());
- return Status::OK();
- }
-
- Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) override {
- std::vector<std::shared_ptr<Field>> fields;
- std::vector<std::shared_ptr<ChunkedArray>> columns;
-
- auto my_schema = this->schema();
- for (auto field_index : indices) {
- if (field_index < 0 || field_index >= my_schema->num_fields()) {
- return Status::Invalid("Field index ", field_index, " is out of bounds");
- }
- columns.emplace_back();
- RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
- fields.push_back(my_schema->field(field_index));
- }
+ return Status::OK();
+ }
+
+ Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) override {
+ std::vector<std::shared_ptr<Field>> fields;
+ std::vector<std::shared_ptr<ChunkedArray>> columns;
+
+ auto my_schema = this->schema();
+ for (auto field_index : indices) {
+ if (field_index < 0 || field_index >= my_schema->num_fields()) {
+ return Status::Invalid("Field index ", field_index, " is out of bounds");
+ }
+ columns.emplace_back();
+ RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
+ fields.push_back(my_schema->field(field_index));
+ }
*out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
this->num_rows());
- return Status::OK();
- }
-
- Status Read(const std::vector<std::string>& names,
- std::shared_ptr<Table>* out) override {
- std::vector<std::shared_ptr<Field>> fields;
- std::vector<std::shared_ptr<ChunkedArray>> columns;
-
- std::shared_ptr<Schema> sch = this->schema();
- for (auto name : names) {
- int field_index = sch->GetFieldIndex(name);
- if (field_index == -1) {
- return Status::Invalid("Field named ", name, " is not found");
- }
- columns.emplace_back();
- RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
- fields.push_back(sch->field(field_index));
- }
+ return Status::OK();
+ }
+
+ Status Read(const std::vector<std::string>& names,
+ std::shared_ptr<Table>* out) override {
+ std::vector<std::shared_ptr<Field>> fields;
+ std::vector<std::shared_ptr<ChunkedArray>> columns;
+
+ std::shared_ptr<Schema> sch = this->schema();
+ for (auto name : names) {
+ int field_index = sch->GetFieldIndex(name);
+ if (field_index == -1) {
+ return Status::Invalid("Field named ", name, " is not found");
+ }
+ columns.emplace_back();
+ RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
+ fields.push_back(sch->field(field_index));
+ }
*out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
this->num_rows());
- return Status::OK();
- }
-
- private:
- std::shared_ptr<io::RandomAccessFile> source_;
- std::shared_ptr<Buffer> metadata_buffer_;
- const fbs::CTable* metadata_;
- std::shared_ptr<Schema> schema_;
-};
-
-// ----------------------------------------------------------------------
-// WriterV1
-
-struct ArrayMetadata {
- fbs::Type type;
- int64_t offset;
- int64_t length;
- int64_t null_count;
- int64_t total_bytes;
-};
-
-#define TO_FLATBUFFER_CASE(TYPE) \
- case Type::TYPE: \
- return fbs::Type::TYPE;
-
-Result<fbs::Type> ToFlatbufferType(const DataType& type) {
- switch (type.id()) {
- TO_FLATBUFFER_CASE(BOOL);
- TO_FLATBUFFER_CASE(INT8);
- TO_FLATBUFFER_CASE(INT16);
- TO_FLATBUFFER_CASE(INT32);
- TO_FLATBUFFER_CASE(INT64);
- TO_FLATBUFFER_CASE(UINT8);
- TO_FLATBUFFER_CASE(UINT16);
- TO_FLATBUFFER_CASE(UINT32);
- TO_FLATBUFFER_CASE(UINT64);
- TO_FLATBUFFER_CASE(FLOAT);
- TO_FLATBUFFER_CASE(DOUBLE);
- TO_FLATBUFFER_CASE(LARGE_BINARY);
- TO_FLATBUFFER_CASE(BINARY);
- case Type::STRING:
- return fbs::Type::UTF8;
- case Type::LARGE_STRING:
- return fbs::Type::LARGE_UTF8;
- case Type::DATE32:
- return fbs::Type::INT32;
- case Type::TIMESTAMP:
- return fbs::Type::INT64;
- case Type::TIME32:
- return fbs::Type::INT32;
- case Type::TIME64:
- return fbs::Type::INT64;
- default:
- return Status::TypeError("Unsupported Feather V1 type: ", type.ToString(),
- ". Use V2 format to serialize all Arrow types.");
- }
-}
-
+ return Status::OK();
+ }
+
+ private:
+ std::shared_ptr<io::RandomAccessFile> source_;
+ std::shared_ptr<Buffer> metadata_buffer_;
+ const fbs::CTable* metadata_;
+ std::shared_ptr<Schema> schema_;
+};
+
+// ----------------------------------------------------------------------
+// WriterV1
+
+struct ArrayMetadata {
+ fbs::Type type;
+ int64_t offset;
+ int64_t length;
+ int64_t null_count;
+ int64_t total_bytes;
+};
+
+#define TO_FLATBUFFER_CASE(TYPE) \
+ case Type::TYPE: \
+ return fbs::Type::TYPE;
+
+Result<fbs::Type> ToFlatbufferType(const DataType& type) {
+ switch (type.id()) {
+ TO_FLATBUFFER_CASE(BOOL);
+ TO_FLATBUFFER_CASE(INT8);
+ TO_FLATBUFFER_CASE(INT16);
+ TO_FLATBUFFER_CASE(INT32);
+ TO_FLATBUFFER_CASE(INT64);
+ TO_FLATBUFFER_CASE(UINT8);
+ TO_FLATBUFFER_CASE(UINT16);
+ TO_FLATBUFFER_CASE(UINT32);
+ TO_FLATBUFFER_CASE(UINT64);
+ TO_FLATBUFFER_CASE(FLOAT);
+ TO_FLATBUFFER_CASE(DOUBLE);
+ TO_FLATBUFFER_CASE(LARGE_BINARY);
+ TO_FLATBUFFER_CASE(BINARY);
+ case Type::STRING:
+ return fbs::Type::UTF8;
+ case Type::LARGE_STRING:
+ return fbs::Type::LARGE_UTF8;
+ case Type::DATE32:
+ return fbs::Type::INT32;
+ case Type::TIMESTAMP:
+ return fbs::Type::INT64;
+ case Type::TIME32:
+ return fbs::Type::INT32;
+ case Type::TIME64:
+ return fbs::Type::INT64;
+ default:
+ return Status::TypeError("Unsupported Feather V1 type: ", type.ToString(),
+ ". Use V2 format to serialize all Arrow types.");
+ }
+}
+
inline flatbuffers::Offset<fbs::PrimitiveArray> GetPrimitiveArray(
- FBB& fbb, const ArrayMetadata& array) {
- return fbs::CreatePrimitiveArray(fbb, array.type, fbs::Encoding::PLAIN, array.offset,
- array.length, array.null_count, array.total_bytes);
-}
-
-// Convert Feather enums to Flatbuffer enums
+ FBB& fbb, const ArrayMetadata& array) {
+ return fbs::CreatePrimitiveArray(fbb, array.type, fbs::Encoding::PLAIN, array.offset,
+ array.length, array.null_count, array.total_bytes);
+}
+
+// Convert Feather enums to Flatbuffer enums
inline fbs::TimeUnit ToFlatbufferEnum(TimeUnit::type unit) {
- return static_cast<fbs::TimeUnit>(static_cast<int>(unit));
-}
-
-const fbs::TypeMetadata COLUMN_TYPE_ENUM_MAPPING[] = {
- fbs::TypeMetadata::NONE, // PRIMITIVE
- fbs::TypeMetadata::CategoryMetadata, // CATEGORY
- fbs::TypeMetadata::TimestampMetadata, // TIMESTAMP
- fbs::TypeMetadata::DateMetadata, // DATE
- fbs::TypeMetadata::TimeMetadata // TIME
-};
-
+ return static_cast<fbs::TimeUnit>(static_cast<int>(unit));
+}
+
+const fbs::TypeMetadata COLUMN_TYPE_ENUM_MAPPING[] = {
+ fbs::TypeMetadata::NONE, // PRIMITIVE
+ fbs::TypeMetadata::CategoryMetadata, // CATEGORY
+ fbs::TypeMetadata::TimestampMetadata, // TIMESTAMP
+ fbs::TypeMetadata::DateMetadata, // DATE
+ fbs::TypeMetadata::TimeMetadata // TIME
+};
+
inline fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) {
- return COLUMN_TYPE_ENUM_MAPPING[column_type];
-}
-
-struct ColumnMetadata {
- flatbuffers::Offset<void> WriteMetadata(FBB& fbb) { // NOLINT
- switch (this->meta_type) {
- case ColumnType::PRIMITIVE:
- // flatbuffer void
- return 0;
- case ColumnType::CATEGORY: {
- auto cat_meta = fbs::CreateCategoryMetadata(
- fbb, GetPrimitiveArray(fbb, this->category_levels), this->category_ordered);
- return cat_meta.Union();
- }
- case ColumnType::TIMESTAMP: {
- // flatbuffer void
- flatbuffers::Offset<flatbuffers::String> tz = 0;
- if (!this->timezone.empty()) {
- tz = fbb.CreateString(this->timezone);
- }
-
- auto ts_meta =
- fbs::CreateTimestampMetadata(fbb, ToFlatbufferEnum(this->temporal_unit), tz);
- return ts_meta.Union();
- }
- case ColumnType::DATE: {
- auto date_meta = fbs::CreateDateMetadata(fbb);
- return date_meta.Union();
- }
- case ColumnType::TIME: {
- auto time_meta =
- fbs::CreateTimeMetadata(fbb, ToFlatbufferEnum(this->temporal_unit));
- return time_meta.Union();
- }
- default:
- // null
- DCHECK(false);
- return 0;
- }
- }
-
- ArrayMetadata values;
- ColumnType::type meta_type;
-
- ArrayMetadata category_levels;
- bool category_ordered;
-
- TimeUnit::type temporal_unit;
-
- // A timezone name known to the Olson timezone database. For display purposes
- // because the actual data is all UTC
- std::string timezone;
-};
-
-Status WriteArrayV1(const Array& values, io::OutputStream* dst, ArrayMetadata* meta);
-
-struct ArrayWriterV1 {
- const Array& values;
- io::OutputStream* dst;
- ArrayMetadata* meta;
-
- Status WriteBuffer(const uint8_t* buffer, int64_t length, int64_t bit_offset) {
- int64_t bytes_written = 0;
- if (buffer) {
- RETURN_NOT_OK(
- WritePaddedWithOffset(dst, buffer, bit_offset, length, &bytes_written));
- } else {
- RETURN_NOT_OK(WritePaddedBlank(dst, length, &bytes_written));
- }
- meta->total_bytes += bytes_written;
- return Status::OK();
- }
-
- template <typename T>
- typename std::enable_if<
- is_nested_type<T>::value || is_null_type<T>::value || is_decimal_type<T>::value ||
- std::is_same<DictionaryType, T>::value || is_duration_type<T>::value ||
- is_interval_type<T>::value || is_fixed_size_binary_type<T>::value ||
- std::is_same<Date64Type, T>::value || std::is_same<Time64Type, T>::value ||
- std::is_same<ExtensionType, T>::value,
- Status>::type
- Visit(const T& type) {
- return Status::NotImplemented(type.ToString());
- }
-
- template <typename T>
- typename std::enable_if<is_number_type<T>::value ||
- std::is_same<Date32Type, T>::value ||
- std::is_same<Time32Type, T>::value ||
- is_timestamp_type<T>::value || is_boolean_type<T>::value,
- Status>::type
- Visit(const T&) {
- const auto& prim_values = checked_cast<const PrimitiveArray&>(values);
- const auto& fw_type = checked_cast<const FixedWidthType&>(*values.type());
-
- if (prim_values.values()) {
- const uint8_t* buffer =
- prim_values.values()->data() + (prim_values.offset() * fw_type.bit_width() / 8);
- int64_t bit_offset = (prim_values.offset() * fw_type.bit_width()) % 8;
- return WriteBuffer(buffer,
- BitUtil::BytesForBits(values.length() * fw_type.bit_width()),
- bit_offset);
- } else {
- return Status::OK();
- }
- return Status::OK();
- }
-
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T&) {
- using ArrayType = typename TypeTraits<T>::ArrayType;
- const auto& ty_values = checked_cast<const ArrayType&>(values);
-
- using offset_type = typename T::offset_type;
- const offset_type* offsets_data = nullptr;
- int64_t values_bytes = 0;
- if (ty_values.value_offsets()) {
- offsets_data = ty_values.raw_value_offsets();
- // All of the data has to be written because we don't have offset
- // shifting implemented here as with the IPC format
- values_bytes = offsets_data[values.length()];
- }
- RETURN_NOT_OK(WriteBuffer(reinterpret_cast<const uint8_t*>(offsets_data),
- sizeof(offset_type) * (values.length() + 1),
- /*bit_offset=*/0));
-
- const uint8_t* values_buffer = nullptr;
- if (ty_values.value_data()) {
- values_buffer = ty_values.value_data()->data();
- }
- return WriteBuffer(values_buffer, values_bytes, /*bit_offset=*/0);
- }
-
- Status Write() {
- if (values.type_id() == Type::DICTIONARY) {
- return WriteArrayV1(*(checked_cast<const DictionaryArray&>(values).indices()), dst,
- meta);
- }
-
- ARROW_ASSIGN_OR_RAISE(meta->type, ToFlatbufferType(*values.type()));
- ARROW_ASSIGN_OR_RAISE(meta->offset, dst->Tell());
- meta->length = values.length();
- meta->null_count = values.null_count();
- meta->total_bytes = 0;
-
- // Write the null bitmask
- if (values.null_count() > 0) {
- RETURN_NOT_OK(WriteBuffer(values.null_bitmap_data(),
- BitUtil::BytesForBits(values.length()), values.offset()));
- }
- // Write data buffer(s)
- return VisitTypeInline(*values.type(), this);
- }
-};
-
-Status WriteArrayV1(const Array& values, io::OutputStream* dst, ArrayMetadata* meta) {
- std::shared_ptr<Array> sanitized;
- if (values.type_id() == Type::NA) {
- // As long as R doesn't support NA, we write this as a StringColumn
- // to ensure stable roundtrips.
- sanitized = std::make_shared<StringArray>(values.length(), nullptr, nullptr,
- values.null_bitmap(), values.null_count());
- } else {
- sanitized = MakeArray(values.data());
- }
- ArrayWriterV1 visitor{*sanitized, dst, meta};
- return visitor.Write();
-}
-
-Status WriteColumnV1(const ChunkedArray& values, io::OutputStream* dst,
- ColumnMetadata* out) {
- if (values.num_chunks() > 1) {
- return Status::Invalid("Writing chunked arrays not supported in Feather V1");
- }
- const Array& chunk = *values.chunk(0);
- RETURN_NOT_OK(WriteArrayV1(chunk, dst, &out->values));
- switch (chunk.type_id()) {
- case Type::DICTIONARY: {
- out->meta_type = ColumnType::CATEGORY;
- auto dictionary = checked_cast<const DictionaryArray&>(chunk).dictionary();
- RETURN_NOT_OK(WriteArrayV1(*dictionary, dst, &out->category_levels));
- out->category_ordered =
- checked_cast<const DictionaryType&>(*chunk.type()).ordered();
- } break;
- case Type::DATE32:
- out->meta_type = ColumnType::DATE;
- break;
- case Type::TIME32: {
- out->meta_type = ColumnType::TIME;
- out->temporal_unit = checked_cast<const Time32Type&>(*chunk.type()).unit();
- } break;
- case Type::TIMESTAMP: {
- const auto& ts_type = checked_cast<const TimestampType&>(*chunk.type());
- out->meta_type = ColumnType::TIMESTAMP;
- out->temporal_unit = ts_type.unit();
- out->timezone = ts_type.timezone();
- } break;
- default:
- out->meta_type = ColumnType::PRIMITIVE;
- break;
- }
- return Status::OK();
-}
-
-Status WriteFeatherV1(const Table& table, io::OutputStream* dst) {
- // Preamble
- int64_t bytes_written;
- RETURN_NOT_OK(WritePadded(dst, reinterpret_cast<const uint8_t*>(kFeatherV1MagicBytes),
- strlen(kFeatherV1MagicBytes), &bytes_written));
-
- // Write columns
- flatbuffers::FlatBufferBuilder fbb;
- std::vector<flatbuffers::Offset<fbs::Column>> fb_columns;
- for (int i = 0; i < table.num_columns(); ++i) {
- ColumnMetadata col;
- RETURN_NOT_OK(WriteColumnV1(*table.column(i), dst, &col));
- auto fb_column = fbs::CreateColumn(
- fbb, fbb.CreateString(table.field(i)->name()), GetPrimitiveArray(fbb, col.values),
- ToFlatbufferEnum(col.meta_type), col.WriteMetadata(fbb),
- /*user_metadata=*/0);
- fb_columns.push_back(fb_column);
- }
-
- // Finalize file footer
- auto root = fbs::CreateCTable(fbb, /*description=*/0, table.num_rows(),
- fbb.CreateVector(fb_columns), kFeatherV1Version,
- /*metadata=*/0);
- fbb.Finish(root);
- auto buffer = std::make_shared<Buffer>(fbb.GetBufferPointer(),
- static_cast<int64_t>(fbb.GetSize()));
-
- // Writer metadata
- RETURN_NOT_OK(WritePadded(dst, buffer->data(), buffer->size(), &bytes_written));
- uint32_t metadata_size = static_cast<uint32_t>(bytes_written);
-
- // Footer: metadata length, magic bytes
- RETURN_NOT_OK(dst->Write(&metadata_size, sizeof(uint32_t)));
- return dst->Write(kFeatherV1MagicBytes, strlen(kFeatherV1MagicBytes));
-}
-
-// ----------------------------------------------------------------------
-// Reader V2
-
-class ReaderV2 : public Reader {
- public:
- Status Open(const std::shared_ptr<io::RandomAccessFile>& source) {
- source_ = source;
- ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(source_));
- schema_ = reader->schema();
- return Status::OK();
- }
-
- int version() const override { return kFeatherV2Version; }
-
- std::shared_ptr<Schema> schema() const override { return schema_; }
-
- Status Read(const IpcReadOptions& options, std::shared_ptr<Table>* out) {
- ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(source_, options));
- RecordBatchVector batches(reader->num_record_batches());
- for (int i = 0; i < reader->num_record_batches(); ++i) {
- ARROW_ASSIGN_OR_RAISE(batches[i], reader->ReadRecordBatch(i));
- }
-
- return Table::FromRecordBatches(reader->schema(), batches).Value(out);
- }
-
- Status Read(std::shared_ptr<Table>* out) override {
- return Read(IpcReadOptions::Defaults(), out);
- }
-
- Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) override {
- auto options = IpcReadOptions::Defaults();
- options.included_fields = indices;
- return Read(options, out);
- }
-
- Status Read(const std::vector<std::string>& names,
- std::shared_ptr<Table>* out) override {
- std::vector<int> indices;
- std::shared_ptr<Schema> sch = this->schema();
- for (auto name : names) {
- int field_index = sch->GetFieldIndex(name);
- if (field_index == -1) {
- return Status::Invalid("Field named ", name, " is not found");
- }
- indices.push_back(field_index);
- }
- return Read(indices, out);
- }
-
- private:
- std::shared_ptr<io::RandomAccessFile> source_;
- std::shared_ptr<Schema> schema_;
-};
-
+ return COLUMN_TYPE_ENUM_MAPPING[column_type];
+}
+
+struct ColumnMetadata {
+ flatbuffers::Offset<void> WriteMetadata(FBB& fbb) { // NOLINT
+ switch (this->meta_type) {
+ case ColumnType::PRIMITIVE:
+ // flatbuffer void
+ return 0;
+ case ColumnType::CATEGORY: {
+ auto cat_meta = fbs::CreateCategoryMetadata(
+ fbb, GetPrimitiveArray(fbb, this->category_levels), this->category_ordered);
+ return cat_meta.Union();
+ }
+ case ColumnType::TIMESTAMP: {
+ // flatbuffer void
+ flatbuffers::Offset<flatbuffers::String> tz = 0;
+ if (!this->timezone.empty()) {
+ tz = fbb.CreateString(this->timezone);
+ }
+
+ auto ts_meta =
+ fbs::CreateTimestampMetadata(fbb, ToFlatbufferEnum(this->temporal_unit), tz);
+ return ts_meta.Union();
+ }
+ case ColumnType::DATE: {
+ auto date_meta = fbs::CreateDateMetadata(fbb);
+ return date_meta.Union();
+ }
+ case ColumnType::TIME: {
+ auto time_meta =
+ fbs::CreateTimeMetadata(fbb, ToFlatbufferEnum(this->temporal_unit));
+ return time_meta.Union();
+ }
+ default:
+ // null
+ DCHECK(false);
+ return 0;
+ }
+ }
+
+ ArrayMetadata values;
+ ColumnType::type meta_type;
+
+ ArrayMetadata category_levels;
+ bool category_ordered;
+
+ TimeUnit::type temporal_unit;
+
+ // A timezone name known to the Olson timezone database. For display purposes
+ // because the actual data is all UTC
+ std::string timezone;
+};
+
+Status WriteArrayV1(const Array& values, io::OutputStream* dst, ArrayMetadata* meta);
+
+struct ArrayWriterV1 {
+ const Array& values;
+ io::OutputStream* dst;
+ ArrayMetadata* meta;
+
+ Status WriteBuffer(const uint8_t* buffer, int64_t length, int64_t bit_offset) {
+ int64_t bytes_written = 0;
+ if (buffer) {
+ RETURN_NOT_OK(
+ WritePaddedWithOffset(dst, buffer, bit_offset, length, &bytes_written));
+ } else {
+ RETURN_NOT_OK(WritePaddedBlank(dst, length, &bytes_written));
+ }
+ meta->total_bytes += bytes_written;
+ return Status::OK();
+ }
+
+ template <typename T>
+ typename std::enable_if<
+ is_nested_type<T>::value || is_null_type<T>::value || is_decimal_type<T>::value ||
+ std::is_same<DictionaryType, T>::value || is_duration_type<T>::value ||
+ is_interval_type<T>::value || is_fixed_size_binary_type<T>::value ||
+ std::is_same<Date64Type, T>::value || std::is_same<Time64Type, T>::value ||
+ std::is_same<ExtensionType, T>::value,
+ Status>::type
+ Visit(const T& type) {
+ return Status::NotImplemented(type.ToString());
+ }
+
+ template <typename T>
+ typename std::enable_if<is_number_type<T>::value ||
+ std::is_same<Date32Type, T>::value ||
+ std::is_same<Time32Type, T>::value ||
+ is_timestamp_type<T>::value || is_boolean_type<T>::value,
+ Status>::type
+ Visit(const T&) {
+ const auto& prim_values = checked_cast<const PrimitiveArray&>(values);
+ const auto& fw_type = checked_cast<const FixedWidthType&>(*values.type());
+
+ if (prim_values.values()) {
+ const uint8_t* buffer =
+ prim_values.values()->data() + (prim_values.offset() * fw_type.bit_width() / 8);
+ int64_t bit_offset = (prim_values.offset() * fw_type.bit_width()) % 8;
+ return WriteBuffer(buffer,
+ BitUtil::BytesForBits(values.length() * fw_type.bit_width()),
+ bit_offset);
+ } else {
+ return Status::OK();
+ }
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_base_binary<T, Status> Visit(const T&) {
+ using ArrayType = typename TypeTraits<T>::ArrayType;
+ const auto& ty_values = checked_cast<const ArrayType&>(values);
+
+ using offset_type = typename T::offset_type;
+ const offset_type* offsets_data = nullptr;
+ int64_t values_bytes = 0;
+ if (ty_values.value_offsets()) {
+ offsets_data = ty_values.raw_value_offsets();
+ // All of the data has to be written because we don't have offset
+ // shifting implemented here as with the IPC format
+ values_bytes = offsets_data[values.length()];
+ }
+ RETURN_NOT_OK(WriteBuffer(reinterpret_cast<const uint8_t*>(offsets_data),
+ sizeof(offset_type) * (values.length() + 1),
+ /*bit_offset=*/0));
+
+ const uint8_t* values_buffer = nullptr;
+ if (ty_values.value_data()) {
+ values_buffer = ty_values.value_data()->data();
+ }
+ return WriteBuffer(values_buffer, values_bytes, /*bit_offset=*/0);
+ }
+
+ Status Write() {
+ if (values.type_id() == Type::DICTIONARY) {
+ return WriteArrayV1(*(checked_cast<const DictionaryArray&>(values).indices()), dst,
+ meta);
+ }
+
+ ARROW_ASSIGN_OR_RAISE(meta->type, ToFlatbufferType(*values.type()));
+ ARROW_ASSIGN_OR_RAISE(meta->offset, dst->Tell());
+ meta->length = values.length();
+ meta->null_count = values.null_count();
+ meta->total_bytes = 0;
+
+ // Write the null bitmask
+ if (values.null_count() > 0) {
+ RETURN_NOT_OK(WriteBuffer(values.null_bitmap_data(),
+ BitUtil::BytesForBits(values.length()), values.offset()));
+ }
+ // Write data buffer(s)
+ return VisitTypeInline(*values.type(), this);
+ }
+};
+
+Status WriteArrayV1(const Array& values, io::OutputStream* dst, ArrayMetadata* meta) {
+ std::shared_ptr<Array> sanitized;
+ if (values.type_id() == Type::NA) {
+ // As long as R doesn't support NA, we write this as a StringColumn
+ // to ensure stable roundtrips.
+ sanitized = std::make_shared<StringArray>(values.length(), nullptr, nullptr,
+ values.null_bitmap(), values.null_count());
+ } else {
+ sanitized = MakeArray(values.data());
+ }
+ ArrayWriterV1 visitor{*sanitized, dst, meta};
+ return visitor.Write();
+}
+
+Status WriteColumnV1(const ChunkedArray& values, io::OutputStream* dst,
+ ColumnMetadata* out) {
+ if (values.num_chunks() > 1) {
+ return Status::Invalid("Writing chunked arrays not supported in Feather V1");
+ }
+ const Array& chunk = *values.chunk(0);
+ RETURN_NOT_OK(WriteArrayV1(chunk, dst, &out->values));
+ switch (chunk.type_id()) {
+ case Type::DICTIONARY: {
+ out->meta_type = ColumnType::CATEGORY;
+ auto dictionary = checked_cast<const DictionaryArray&>(chunk).dictionary();
+ RETURN_NOT_OK(WriteArrayV1(*dictionary, dst, &out->category_levels));
+ out->category_ordered =
+ checked_cast<const DictionaryType&>(*chunk.type()).ordered();
+ } break;
+ case Type::DATE32:
+ out->meta_type = ColumnType::DATE;
+ break;
+ case Type::TIME32: {
+ out->meta_type = ColumnType::TIME;
+ out->temporal_unit = checked_cast<const Time32Type&>(*chunk.type()).unit();
+ } break;
+ case Type::TIMESTAMP: {
+ const auto& ts_type = checked_cast<const TimestampType&>(*chunk.type());
+ out->meta_type = ColumnType::TIMESTAMP;
+ out->temporal_unit = ts_type.unit();
+ out->timezone = ts_type.timezone();
+ } break;
+ default:
+ out->meta_type = ColumnType::PRIMITIVE;
+ break;
+ }
+ return Status::OK();
+}
+
+Status WriteFeatherV1(const Table& table, io::OutputStream* dst) {
+ // Preamble
+ int64_t bytes_written;
+ RETURN_NOT_OK(WritePadded(dst, reinterpret_cast<const uint8_t*>(kFeatherV1MagicBytes),
+ strlen(kFeatherV1MagicBytes), &bytes_written));
+
+ // Write columns
+ flatbuffers::FlatBufferBuilder fbb;
+ std::vector<flatbuffers::Offset<fbs::Column>> fb_columns;
+ for (int i = 0; i < table.num_columns(); ++i) {
+ ColumnMetadata col;
+ RETURN_NOT_OK(WriteColumnV1(*table.column(i), dst, &col));
+ auto fb_column = fbs::CreateColumn(
+ fbb, fbb.CreateString(table.field(i)->name()), GetPrimitiveArray(fbb, col.values),
+ ToFlatbufferEnum(col.meta_type), col.WriteMetadata(fbb),
+ /*user_metadata=*/0);
+ fb_columns.push_back(fb_column);
+ }
+
+ // Finalize file footer
+ auto root = fbs::CreateCTable(fbb, /*description=*/0, table.num_rows(),
+ fbb.CreateVector(fb_columns), kFeatherV1Version,
+ /*metadata=*/0);
+ fbb.Finish(root);
+ auto buffer = std::make_shared<Buffer>(fbb.GetBufferPointer(),
+ static_cast<int64_t>(fbb.GetSize()));
+
+ // Writer metadata
+ RETURN_NOT_OK(WritePadded(dst, buffer->data(), buffer->size(), &bytes_written));
+ uint32_t metadata_size = static_cast<uint32_t>(bytes_written);
+
+ // Footer: metadata length, magic bytes
+ RETURN_NOT_OK(dst->Write(&metadata_size, sizeof(uint32_t)));
+ return dst->Write(kFeatherV1MagicBytes, strlen(kFeatherV1MagicBytes));
+}
+
+// ----------------------------------------------------------------------
+// Reader V2
+
+class ReaderV2 : public Reader {
+ public:
+ Status Open(const std::shared_ptr<io::RandomAccessFile>& source) {
+ source_ = source;
+ ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(source_));
+ schema_ = reader->schema();
+ return Status::OK();
+ }
+
+ int version() const override { return kFeatherV2Version; }
+
+ std::shared_ptr<Schema> schema() const override { return schema_; }
+
+ Status Read(const IpcReadOptions& options, std::shared_ptr<Table>* out) {
+ ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(source_, options));
+ RecordBatchVector batches(reader->num_record_batches());
+ for (int i = 0; i < reader->num_record_batches(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(batches[i], reader->ReadRecordBatch(i));
+ }
+
+ return Table::FromRecordBatches(reader->schema(), batches).Value(out);
+ }
+
+ Status Read(std::shared_ptr<Table>* out) override {
+ return Read(IpcReadOptions::Defaults(), out);
+ }
+
+ Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) override {
+ auto options = IpcReadOptions::Defaults();
+ options.included_fields = indices;
+ return Read(options, out);
+ }
+
+ Status Read(const std::vector<std::string>& names,
+ std::shared_ptr<Table>* out) override {
+ std::vector<int> indices;
+ std::shared_ptr<Schema> sch = this->schema();
+ for (auto name : names) {
+ int field_index = sch->GetFieldIndex(name);
+ if (field_index == -1) {
+ return Status::Invalid("Field named ", name, " is not found");
+ }
+ indices.push_back(field_index);
+ }
+ return Read(indices, out);
+ }
+
+ private:
+ std::shared_ptr<io::RandomAccessFile> source_;
+ std::shared_ptr<Schema> schema_;
+};
+
} // namespace
-Result<std::shared_ptr<Reader>> Reader::Open(
- const std::shared_ptr<io::RandomAccessFile>& source) {
- // Pathological issue where the file is smaller than header and footer
- // combined
- ARROW_ASSIGN_OR_RAISE(int64_t size, source->GetSize());
- if (size < /* 2 * 4 + 4 */ 12) {
- return Status::Invalid("File is too small to be a well-formed file");
- }
-
- // Determine what kind of file we have. 6 is the max of len(FEA1) and
- // len(ARROW1)
- constexpr int magic_size = 6;
- ARROW_ASSIGN_OR_RAISE(auto buffer, source->ReadAt(0, magic_size));
-
- if (memcmp(buffer->data(), kFeatherV1MagicBytes, strlen(kFeatherV1MagicBytes)) == 0) {
- std::shared_ptr<ReaderV1> result = std::make_shared<ReaderV1>();
- RETURN_NOT_OK(result->Open(source));
- return result;
- } else if (memcmp(buffer->data(), internal::kArrowMagicBytes,
- strlen(internal::kArrowMagicBytes)) == 0) {
- std::shared_ptr<ReaderV2> result = std::make_shared<ReaderV2>();
- RETURN_NOT_OK(result->Open(source));
- return result;
- } else {
- return Status::Invalid("Not a Feather V1 or Arrow IPC file");
- }
-}
-
-WriteProperties WriteProperties::Defaults() {
- WriteProperties result;
-#ifdef ARROW_WITH_LZ4
- result.compression = Compression::LZ4_FRAME;
-#else
- result.compression = Compression::UNCOMPRESSED;
-#endif
- return result;
-}
-
-Status WriteTable(const Table& table, io::OutputStream* dst,
- const WriteProperties& properties) {
- if (properties.version == kFeatherV1Version) {
- return WriteFeatherV1(table, dst);
- } else {
- IpcWriteOptions ipc_options = IpcWriteOptions::Defaults();
+Result<std::shared_ptr<Reader>> Reader::Open(
+ const std::shared_ptr<io::RandomAccessFile>& source) {
+ // Pathological issue where the file is smaller than header and footer
+ // combined
+ ARROW_ASSIGN_OR_RAISE(int64_t size, source->GetSize());
+ if (size < /* 2 * 4 + 4 */ 12) {
+ return Status::Invalid("File is too small to be a well-formed file");
+ }
+
+ // Determine what kind of file we have. 6 is the max of len(FEA1) and
+ // len(ARROW1)
+ constexpr int magic_size = 6;
+ ARROW_ASSIGN_OR_RAISE(auto buffer, source->ReadAt(0, magic_size));
+
+ if (memcmp(buffer->data(), kFeatherV1MagicBytes, strlen(kFeatherV1MagicBytes)) == 0) {
+ std::shared_ptr<ReaderV1> result = std::make_shared<ReaderV1>();
+ RETURN_NOT_OK(result->Open(source));
+ return result;
+ } else if (memcmp(buffer->data(), internal::kArrowMagicBytes,
+ strlen(internal::kArrowMagicBytes)) == 0) {
+ std::shared_ptr<ReaderV2> result = std::make_shared<ReaderV2>();
+ RETURN_NOT_OK(result->Open(source));
+ return result;
+ } else {
+ return Status::Invalid("Not a Feather V1 or Arrow IPC file");
+ }
+}
+
+WriteProperties WriteProperties::Defaults() {
+ WriteProperties result;
+#ifdef ARROW_WITH_LZ4
+ result.compression = Compression::LZ4_FRAME;
+#else
+ result.compression = Compression::UNCOMPRESSED;
+#endif
+ return result;
+}
+
+Status WriteTable(const Table& table, io::OutputStream* dst,
+ const WriteProperties& properties) {
+ if (properties.version == kFeatherV1Version) {
+ return WriteFeatherV1(table, dst);
+ } else {
+ IpcWriteOptions ipc_options = IpcWriteOptions::Defaults();
ipc_options.unify_dictionaries = true;
ipc_options.allow_64bit = true;
- ARROW_ASSIGN_OR_RAISE(
- ipc_options.codec,
- util::Codec::Create(properties.compression, properties.compression_level));
-
- std::shared_ptr<RecordBatchWriter> writer;
- ARROW_ASSIGN_OR_RAISE(writer, MakeFileWriter(dst, table.schema(), ipc_options));
- RETURN_NOT_OK(writer->WriteTable(table, properties.chunksize));
- return writer->Close();
- }
-}
-
-} // namespace feather
-} // namespace ipc
-} // namespace arrow
+ ARROW_ASSIGN_OR_RAISE(
+ ipc_options.codec,
+ util::Codec::Create(properties.compression, properties.compression_level));
+
+ std::shared_ptr<RecordBatchWriter> writer;
+ ARROW_ASSIGN_OR_RAISE(writer, MakeFileWriter(dst, table.schema(), ipc_options));
+ RETURN_NOT_OK(writer->WriteTable(table, properties.chunksize));
+ return writer->Close();
+ }
+}
+
+} // namespace feather
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
index a32ff6d0a5a..5e1dcfbce9d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
@@ -1,140 +1,140 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Public API for the "Feather" file format, originally created at
-// http://github.com/wesm/feather
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for the "Feather" file format, originally created at
+// http://github.com/wesm/feather
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
#include "arrow/type_fwd.h"
-#include "arrow/util/compression.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Schema;
-class Status;
-class Table;
-
-namespace io {
-
-class OutputStream;
-class RandomAccessFile;
-
-} // namespace io
-
-namespace ipc {
-namespace feather {
-
-static constexpr const int kFeatherV1Version = 2;
-static constexpr const int kFeatherV2Version = 3;
-
-// ----------------------------------------------------------------------
-// Metadata accessor classes
-
-/// \class Reader
-/// \brief An interface for reading columns from Feather files
-class ARROW_EXPORT Reader {
- public:
- virtual ~Reader() = default;
-
- /// \brief Open a Feather file from a RandomAccessFile interface
- ///
- /// \param[in] source a RandomAccessFile instance
- /// \return the table reader
- static Result<std::shared_ptr<Reader>> Open(
- const std::shared_ptr<io::RandomAccessFile>& source);
-
- /// \brief Return the version number of the Feather file
- virtual int version() const = 0;
-
- virtual std::shared_ptr<Schema> schema() const = 0;
-
- /// \brief Read all columns from the file as an arrow::Table.
- ///
- /// \param[out] out the returned table
- /// \return Status
- ///
- /// This function is zero-copy if the file source supports zero-copy reads
- virtual Status Read(std::shared_ptr<Table>* out) = 0;
-
- /// \brief Read only the specified columns from the file as an arrow::Table.
- ///
- /// \param[in] indices the column indices to read
- /// \param[out] out the returned table
- /// \return Status
- ///
- /// This function is zero-copy if the file source supports zero-copy reads
- virtual Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) = 0;
-
- /// \brief Read only the specified columns from the file as an arrow::Table.
- ///
- /// \param[in] names the column names to read
- /// \param[out] out the returned table
- /// \return Status
- ///
- /// This function is zero-copy if the file source supports zero-copy reads
- virtual Status Read(const std::vector<std::string>& names,
- std::shared_ptr<Table>* out) = 0;
-};
-
-struct ARROW_EXPORT WriteProperties {
- static WriteProperties Defaults();
-
- static WriteProperties DefaultsV1() {
- WriteProperties props = Defaults();
- props.version = kFeatherV1Version;
- return props;
- }
-
- /// Feather file version number
- ///
- /// version 2: "Feather V1" Apache Arrow <= 0.16.0
- /// version 3: "Feather V2" Apache Arrow > 0.16.0
- int version = kFeatherV2Version;
-
- // Parameters for Feather V2 only
-
- /// Number of rows per intra-file chunk. Use smaller chunksize when you need
- /// faster random row access
- int64_t chunksize = 1LL << 16;
-
- /// Compression type to use. Only UNCOMPRESSED, LZ4_FRAME, and ZSTD are
- /// supported. The default compression returned by Defaults() is LZ4 if the
- /// project is built with support for it, otherwise
- /// UNCOMPRESSED. UNCOMPRESSED is set as the object default here so that if
- /// WriteProperties::Defaults() is not used, the default constructor for
- /// WriteProperties will work regardless of the options used to build the C++
- /// project.
- Compression::type compression = Compression::UNCOMPRESSED;
-
- /// Compressor-specific compression level
+#include "arrow/util/compression.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Schema;
+class Status;
+class Table;
+
+namespace io {
+
+class OutputStream;
+class RandomAccessFile;
+
+} // namespace io
+
+namespace ipc {
+namespace feather {
+
+static constexpr const int kFeatherV1Version = 2;
+static constexpr const int kFeatherV2Version = 3;
+
+// ----------------------------------------------------------------------
+// Metadata accessor classes
+
+/// \class Reader
+/// \brief An interface for reading columns from Feather files
+class ARROW_EXPORT Reader {
+ public:
+ virtual ~Reader() = default;
+
+ /// \brief Open a Feather file from a RandomAccessFile interface
+ ///
+ /// \param[in] source a RandomAccessFile instance
+ /// \return the table reader
+ static Result<std::shared_ptr<Reader>> Open(
+ const std::shared_ptr<io::RandomAccessFile>& source);
+
+ /// \brief Return the version number of the Feather file
+ virtual int version() const = 0;
+
+ virtual std::shared_ptr<Schema> schema() const = 0;
+
+ /// \brief Read all columns from the file as an arrow::Table.
+ ///
+ /// \param[out] out the returned table
+ /// \return Status
+ ///
+ /// This function is zero-copy if the file source supports zero-copy reads
+ virtual Status Read(std::shared_ptr<Table>* out) = 0;
+
+ /// \brief Read only the specified columns from the file as an arrow::Table.
+ ///
+ /// \param[in] indices the column indices to read
+ /// \param[out] out the returned table
+ /// \return Status
+ ///
+ /// This function is zero-copy if the file source supports zero-copy reads
+ virtual Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) = 0;
+
+ /// \brief Read only the specified columns from the file as an arrow::Table.
+ ///
+ /// \param[in] names the column names to read
+ /// \param[out] out the returned table
+ /// \return Status
+ ///
+ /// This function is zero-copy if the file source supports zero-copy reads
+ virtual Status Read(const std::vector<std::string>& names,
+ std::shared_ptr<Table>* out) = 0;
+};
+
+struct ARROW_EXPORT WriteProperties {
+ static WriteProperties Defaults();
+
+ static WriteProperties DefaultsV1() {
+ WriteProperties props = Defaults();
+ props.version = kFeatherV1Version;
+ return props;
+ }
+
+ /// Feather file version number
+ ///
+ /// version 2: "Feather V1" Apache Arrow <= 0.16.0
+ /// version 3: "Feather V2" Apache Arrow > 0.16.0
+ int version = kFeatherV2Version;
+
+ // Parameters for Feather V2 only
+
+ /// Number of rows per intra-file chunk. Use smaller chunksize when you need
+ /// faster random row access
+ int64_t chunksize = 1LL << 16;
+
+ /// Compression type to use. Only UNCOMPRESSED, LZ4_FRAME, and ZSTD are
+ /// supported. The default compression returned by Defaults() is LZ4 if the
+ /// project is built with support for it, otherwise
+ /// UNCOMPRESSED. UNCOMPRESSED is set as the object default here so that if
+ /// WriteProperties::Defaults() is not used, the default constructor for
+ /// WriteProperties will work regardless of the options used to build the C++
+ /// project.
+ Compression::type compression = Compression::UNCOMPRESSED;
+
+ /// Compressor-specific compression level
int compression_level = ::arrow::util::kUseDefaultCompressionLevel;
-};
-
-ARROW_EXPORT
-Status WriteTable(const Table& table, io::OutputStream* dst,
- const WriteProperties& properties = WriteProperties::Defaults());
-
-} // namespace feather
-} // namespace ipc
-} // namespace arrow
+};
+
+ARROW_EXPORT
+Status WriteTable(const Table& table, io::OutputStream* dst,
+ const WriteProperties& properties = WriteProperties::Defaults());
+
+} // namespace feather
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
index 197556efcea..aeaca258a71 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
@@ -1,330 +1,330 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/message.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/device.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/ipc/metadata_internal.h"
-#include "arrow/ipc/options.h"
-#include "arrow/ipc/util.h"
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/message.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/device.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/ipc/metadata_internal.h"
+#include "arrow/ipc/options.h"
+#include "arrow/ipc/util.h"
+#include "arrow/status.h"
#include "arrow/util/endian.h"
#include "arrow/util/future.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/ubsan.h"
-
-#include "generated/Message_generated.h"
-
-namespace arrow {
-
-class KeyValueMetadata;
-class MemoryPool;
-
-namespace ipc {
-
-class Message::MessageImpl {
- public:
- explicit MessageImpl(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body)
- : metadata_(std::move(metadata)), message_(nullptr), body_(std::move(body)) {}
-
- Status Open() {
- RETURN_NOT_OK(
- internal::VerifyMessage(metadata_->data(), metadata_->size(), &message_));
-
- // Check that the metadata version is supported
- if (message_->version() < internal::kMinMetadataVersion) {
- return Status::Invalid("Old metadata version not supported");
- }
-
- if (message_->version() > flatbuf::MetadataVersion::MAX) {
- return Status::Invalid("Unsupported future MetadataVersion: ",
- static_cast<int16_t>(message_->version()));
- }
-
- if (message_->custom_metadata() != nullptr) {
- // Deserialize from Flatbuffers if first time called
- std::shared_ptr<KeyValueMetadata> md;
- RETURN_NOT_OK(internal::GetKeyValueMetadata(message_->custom_metadata(), &md));
- custom_metadata_ = std::move(md); // const-ify
- }
-
- return Status::OK();
- }
-
- MessageType type() const {
- switch (message_->header_type()) {
- case flatbuf::MessageHeader::Schema:
- return MessageType::SCHEMA;
- case flatbuf::MessageHeader::DictionaryBatch:
- return MessageType::DICTIONARY_BATCH;
- case flatbuf::MessageHeader::RecordBatch:
- return MessageType::RECORD_BATCH;
- case flatbuf::MessageHeader::Tensor:
- return MessageType::TENSOR;
- case flatbuf::MessageHeader::SparseTensor:
- return MessageType::SPARSE_TENSOR;
- default:
- return MessageType::NONE;
- }
- }
-
- MetadataVersion version() const {
- return internal::GetMetadataVersion(message_->version());
- }
-
- const void* header() const { return message_->header(); }
-
- int64_t body_length() const { return message_->bodyLength(); }
-
- std::shared_ptr<Buffer> body() const { return body_; }
-
- std::shared_ptr<Buffer> metadata() const { return metadata_; }
-
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata() const {
- return custom_metadata_;
- }
-
- private:
- // The Flatbuffer metadata
- std::shared_ptr<Buffer> metadata_;
- const flatbuf::Message* message_;
-
- // The reconstructed custom_metadata field from the Message Flatbuffer
- std::shared_ptr<const KeyValueMetadata> custom_metadata_;
-
- // The message body, if any
- std::shared_ptr<Buffer> body_;
-};
-
-Message::Message(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body) {
- impl_.reset(new MessageImpl(std::move(metadata), std::move(body)));
-}
-
-Result<std::unique_ptr<Message>> Message::Open(std::shared_ptr<Buffer> metadata,
- std::shared_ptr<Buffer> body) {
- std::unique_ptr<Message> result(new Message(std::move(metadata), std::move(body)));
- RETURN_NOT_OK(result->impl_->Open());
- return std::move(result);
-}
-
-Message::~Message() {}
-
-std::shared_ptr<Buffer> Message::body() const { return impl_->body(); }
-
-int64_t Message::body_length() const { return impl_->body_length(); }
-
-std::shared_ptr<Buffer> Message::metadata() const { return impl_->metadata(); }
-
-MessageType Message::type() const { return impl_->type(); }
-
-MetadataVersion Message::metadata_version() const { return impl_->version(); }
-
-const void* Message::header() const { return impl_->header(); }
-
-const std::shared_ptr<const KeyValueMetadata>& Message::custom_metadata() const {
- return impl_->custom_metadata();
-}
-
-bool Message::Equals(const Message& other) const {
- int64_t metadata_bytes = std::min(metadata()->size(), other.metadata()->size());
-
- if (!metadata()->Equals(*other.metadata(), metadata_bytes)) {
- return false;
- }
-
- // Compare bodies, if they have them
- auto this_body = body();
- auto other_body = other.body();
-
- const bool this_has_body = (this_body != nullptr) && (this_body->size() > 0);
- const bool other_has_body = (other_body != nullptr) && (other_body->size() > 0);
-
- if (this_has_body && other_has_body) {
- return this_body->Equals(*other_body);
- } else if (this_has_body ^ other_has_body) {
- // One has a body but not the other
- return false;
- } else {
- // Neither has a body
- return true;
- }
-}
-
-Status MaybeAlignMetadata(std::shared_ptr<Buffer>* metadata) {
- if (reinterpret_cast<uintptr_t>((*metadata)->data()) % 8 != 0) {
- // If the metadata memory is not aligned, we copy it here to avoid
- // potential UBSAN issues from Flatbuffers
- ARROW_ASSIGN_OR_RAISE(*metadata, (*metadata)->CopySlice(0, (*metadata)->size()));
- }
- return Status::OK();
-}
-
-Status CheckMetadataAndGetBodyLength(const Buffer& metadata, int64_t* body_length) {
- const flatbuf::Message* fb_message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &fb_message));
- *body_length = fb_message->bodyLength();
- if (*body_length < 0) {
- return Status::IOError("Invalid IPC message: negative bodyLength");
- }
- return Status::OK();
-}
-
-Result<std::unique_ptr<Message>> Message::ReadFrom(std::shared_ptr<Buffer> metadata,
- io::InputStream* stream) {
- std::unique_ptr<Message> result;
- auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
- MessageDecoder decoder(listener, MessageDecoder::State::METADATA, metadata->size());
- ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
-
- ARROW_ASSIGN_OR_RAISE(auto body, stream->Read(decoder.next_required_size()));
- if (body->size() < decoder.next_required_size()) {
- return Status::IOError("Expected to be able to read ", decoder.next_required_size(),
- " bytes for message body, got ", body->size());
- }
- RETURN_NOT_OK(decoder.Consume(body));
- return std::move(result);
-}
-
-Result<std::unique_ptr<Message>> Message::ReadFrom(const int64_t offset,
- std::shared_ptr<Buffer> metadata,
- io::RandomAccessFile* file) {
- std::unique_ptr<Message> result;
- auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
- MessageDecoder decoder(listener, MessageDecoder::State::METADATA, metadata->size());
- ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
-
- ARROW_ASSIGN_OR_RAISE(auto body, file->ReadAt(offset, decoder.next_required_size()));
- if (body->size() < decoder.next_required_size()) {
- return Status::IOError("Expected to be able to read ", decoder.next_required_size(),
- " bytes for message body, got ", body->size());
- }
- RETURN_NOT_OK(decoder.Consume(body));
- return std::move(result);
-}
-
-Status WritePadding(io::OutputStream* stream, int64_t nbytes) {
- while (nbytes > 0) {
- const int64_t bytes_to_write = std::min<int64_t>(nbytes, kArrowAlignment);
- RETURN_NOT_OK(stream->Write(kPaddingBytes, bytes_to_write));
- nbytes -= bytes_to_write;
- }
- return Status::OK();
-}
-
-Status Message::SerializeTo(io::OutputStream* stream, const IpcWriteOptions& options,
- int64_t* output_length) const {
- int32_t metadata_length = 0;
- RETURN_NOT_OK(WriteMessage(*metadata(), options, stream, &metadata_length));
-
- *output_length = metadata_length;
-
- auto body_buffer = body();
- if (body_buffer) {
- RETURN_NOT_OK(stream->Write(body_buffer));
- *output_length += body_buffer->size();
-
- DCHECK_GE(this->body_length(), body_buffer->size());
-
- int64_t remainder = this->body_length() - body_buffer->size();
- RETURN_NOT_OK(WritePadding(stream, remainder));
- *output_length += remainder;
- }
- return Status::OK();
-}
-
-bool Message::Verify() const {
- const flatbuf::Message* unused;
- return internal::VerifyMessage(metadata()->data(), metadata()->size(), &unused).ok();
-}
-
-std::string FormatMessageType(MessageType type) {
- switch (type) {
- case MessageType::SCHEMA:
- return "schema";
- case MessageType::RECORD_BATCH:
- return "record batch";
- case MessageType::DICTIONARY_BATCH:
- return "dictionary";
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+
+#include "generated/Message_generated.h"
+
+namespace arrow {
+
+class KeyValueMetadata;
+class MemoryPool;
+
+namespace ipc {
+
+class Message::MessageImpl {
+ public:
+ explicit MessageImpl(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body)
+ : metadata_(std::move(metadata)), message_(nullptr), body_(std::move(body)) {}
+
+ Status Open() {
+ RETURN_NOT_OK(
+ internal::VerifyMessage(metadata_->data(), metadata_->size(), &message_));
+
+ // Check that the metadata version is supported
+ if (message_->version() < internal::kMinMetadataVersion) {
+ return Status::Invalid("Old metadata version not supported");
+ }
+
+ if (message_->version() > flatbuf::MetadataVersion::MAX) {
+ return Status::Invalid("Unsupported future MetadataVersion: ",
+ static_cast<int16_t>(message_->version()));
+ }
+
+ if (message_->custom_metadata() != nullptr) {
+ // Deserialize from Flatbuffers if first time called
+ std::shared_ptr<KeyValueMetadata> md;
+ RETURN_NOT_OK(internal::GetKeyValueMetadata(message_->custom_metadata(), &md));
+ custom_metadata_ = std::move(md); // const-ify
+ }
+
+ return Status::OK();
+ }
+
+ MessageType type() const {
+ switch (message_->header_type()) {
+ case flatbuf::MessageHeader::Schema:
+ return MessageType::SCHEMA;
+ case flatbuf::MessageHeader::DictionaryBatch:
+ return MessageType::DICTIONARY_BATCH;
+ case flatbuf::MessageHeader::RecordBatch:
+ return MessageType::RECORD_BATCH;
+ case flatbuf::MessageHeader::Tensor:
+ return MessageType::TENSOR;
+ case flatbuf::MessageHeader::SparseTensor:
+ return MessageType::SPARSE_TENSOR;
+ default:
+ return MessageType::NONE;
+ }
+ }
+
+ MetadataVersion version() const {
+ return internal::GetMetadataVersion(message_->version());
+ }
+
+ const void* header() const { return message_->header(); }
+
+ int64_t body_length() const { return message_->bodyLength(); }
+
+ std::shared_ptr<Buffer> body() const { return body_; }
+
+ std::shared_ptr<Buffer> metadata() const { return metadata_; }
+
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata() const {
+ return custom_metadata_;
+ }
+
+ private:
+ // The Flatbuffer metadata
+ std::shared_ptr<Buffer> metadata_;
+ const flatbuf::Message* message_;
+
+ // The reconstructed custom_metadata field from the Message Flatbuffer
+ std::shared_ptr<const KeyValueMetadata> custom_metadata_;
+
+ // The message body, if any
+ std::shared_ptr<Buffer> body_;
+};
+
+Message::Message(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body) {
+ impl_.reset(new MessageImpl(std::move(metadata), std::move(body)));
+}
+
+Result<std::unique_ptr<Message>> Message::Open(std::shared_ptr<Buffer> metadata,
+ std::shared_ptr<Buffer> body) {
+ std::unique_ptr<Message> result(new Message(std::move(metadata), std::move(body)));
+ RETURN_NOT_OK(result->impl_->Open());
+ return std::move(result);
+}
+
+Message::~Message() {}
+
+std::shared_ptr<Buffer> Message::body() const { return impl_->body(); }
+
+int64_t Message::body_length() const { return impl_->body_length(); }
+
+std::shared_ptr<Buffer> Message::metadata() const { return impl_->metadata(); }
+
+MessageType Message::type() const { return impl_->type(); }
+
+MetadataVersion Message::metadata_version() const { return impl_->version(); }
+
+const void* Message::header() const { return impl_->header(); }
+
+const std::shared_ptr<const KeyValueMetadata>& Message::custom_metadata() const {
+ return impl_->custom_metadata();
+}
+
+bool Message::Equals(const Message& other) const {
+ int64_t metadata_bytes = std::min(metadata()->size(), other.metadata()->size());
+
+ if (!metadata()->Equals(*other.metadata(), metadata_bytes)) {
+ return false;
+ }
+
+ // Compare bodies, if they have them
+ auto this_body = body();
+ auto other_body = other.body();
+
+ const bool this_has_body = (this_body != nullptr) && (this_body->size() > 0);
+ const bool other_has_body = (other_body != nullptr) && (other_body->size() > 0);
+
+ if (this_has_body && other_has_body) {
+ return this_body->Equals(*other_body);
+ } else if (this_has_body ^ other_has_body) {
+ // One has a body but not the other
+ return false;
+ } else {
+ // Neither has a body
+ return true;
+ }
+}
+
+Status MaybeAlignMetadata(std::shared_ptr<Buffer>* metadata) {
+ if (reinterpret_cast<uintptr_t>((*metadata)->data()) % 8 != 0) {
+ // If the metadata memory is not aligned, we copy it here to avoid
+ // potential UBSAN issues from Flatbuffers
+ ARROW_ASSIGN_OR_RAISE(*metadata, (*metadata)->CopySlice(0, (*metadata)->size()));
+ }
+ return Status::OK();
+}
+
+Status CheckMetadataAndGetBodyLength(const Buffer& metadata, int64_t* body_length) {
+ const flatbuf::Message* fb_message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &fb_message));
+ *body_length = fb_message->bodyLength();
+ if (*body_length < 0) {
+ return Status::IOError("Invalid IPC message: negative bodyLength");
+ }
+ return Status::OK();
+}
+
+Result<std::unique_ptr<Message>> Message::ReadFrom(std::shared_ptr<Buffer> metadata,
+ io::InputStream* stream) {
+ std::unique_ptr<Message> result;
+ auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
+ MessageDecoder decoder(listener, MessageDecoder::State::METADATA, metadata->size());
+ ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
+
+ ARROW_ASSIGN_OR_RAISE(auto body, stream->Read(decoder.next_required_size()));
+ if (body->size() < decoder.next_required_size()) {
+ return Status::IOError("Expected to be able to read ", decoder.next_required_size(),
+ " bytes for message body, got ", body->size());
+ }
+ RETURN_NOT_OK(decoder.Consume(body));
+ return std::move(result);
+}
+
+Result<std::unique_ptr<Message>> Message::ReadFrom(const int64_t offset,
+ std::shared_ptr<Buffer> metadata,
+ io::RandomAccessFile* file) {
+ std::unique_ptr<Message> result;
+ auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
+ MessageDecoder decoder(listener, MessageDecoder::State::METADATA, metadata->size());
+ ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
+
+ ARROW_ASSIGN_OR_RAISE(auto body, file->ReadAt(offset, decoder.next_required_size()));
+ if (body->size() < decoder.next_required_size()) {
+ return Status::IOError("Expected to be able to read ", decoder.next_required_size(),
+ " bytes for message body, got ", body->size());
+ }
+ RETURN_NOT_OK(decoder.Consume(body));
+ return std::move(result);
+}
+
+Status WritePadding(io::OutputStream* stream, int64_t nbytes) {
+ while (nbytes > 0) {
+ const int64_t bytes_to_write = std::min<int64_t>(nbytes, kArrowAlignment);
+ RETURN_NOT_OK(stream->Write(kPaddingBytes, bytes_to_write));
+ nbytes -= bytes_to_write;
+ }
+ return Status::OK();
+}
+
+Status Message::SerializeTo(io::OutputStream* stream, const IpcWriteOptions& options,
+ int64_t* output_length) const {
+ int32_t metadata_length = 0;
+ RETURN_NOT_OK(WriteMessage(*metadata(), options, stream, &metadata_length));
+
+ *output_length = metadata_length;
+
+ auto body_buffer = body();
+ if (body_buffer) {
+ RETURN_NOT_OK(stream->Write(body_buffer));
+ *output_length += body_buffer->size();
+
+ DCHECK_GE(this->body_length(), body_buffer->size());
+
+ int64_t remainder = this->body_length() - body_buffer->size();
+ RETURN_NOT_OK(WritePadding(stream, remainder));
+ *output_length += remainder;
+ }
+ return Status::OK();
+}
+
+bool Message::Verify() const {
+ const flatbuf::Message* unused;
+ return internal::VerifyMessage(metadata()->data(), metadata()->size(), &unused).ok();
+}
+
+std::string FormatMessageType(MessageType type) {
+ switch (type) {
+ case MessageType::SCHEMA:
+ return "schema";
+ case MessageType::RECORD_BATCH:
+ return "record batch";
+ case MessageType::DICTIONARY_BATCH:
+ return "dictionary";
case MessageType::TENSOR:
return "tensor";
case MessageType::SPARSE_TENSOR:
return "sparse tensor";
- default:
- break;
- }
- return "unknown";
-}
-
-Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_length,
- io::RandomAccessFile* file) {
- std::unique_ptr<Message> result;
- auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
- MessageDecoder decoder(listener);
-
- if (metadata_length < decoder.next_required_size()) {
- return Status::Invalid("metadata_length should be at least ",
- decoder.next_required_size());
- }
-
- ARROW_ASSIGN_OR_RAISE(auto metadata, file->ReadAt(offset, metadata_length));
- if (metadata->size() < metadata_length) {
- return Status::Invalid("Expected to read ", metadata_length,
- " metadata bytes but got ", metadata->size());
- }
- ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
-
- switch (decoder.state()) {
- case MessageDecoder::State::INITIAL:
- return std::move(result);
- case MessageDecoder::State::METADATA_LENGTH:
- return Status::Invalid("metadata length is missing. File offset: ", offset,
- ", metadata length: ", metadata_length);
- case MessageDecoder::State::METADATA:
- return Status::Invalid("flatbuffer size ", decoder.next_required_size(),
- " invalid. File offset: ", offset,
- ", metadata length: ", metadata_length);
- case MessageDecoder::State::BODY: {
- ARROW_ASSIGN_OR_RAISE(auto body, file->ReadAt(offset + metadata_length,
- decoder.next_required_size()));
- if (body->size() < decoder.next_required_size()) {
- return Status::IOError("Expected to be able to read ",
- decoder.next_required_size(),
- " bytes for message body, got ", body->size());
- }
- RETURN_NOT_OK(decoder.Consume(body));
- return std::move(result);
- }
- case MessageDecoder::State::EOS:
- return Status::Invalid("Unexpected empty message in IPC file format");
- default:
- return Status::Invalid("Unexpected state: ", decoder.state());
- }
-}
-
+ default:
+ break;
+ }
+ return "unknown";
+}
+
+Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_length,
+ io::RandomAccessFile* file) {
+ std::unique_ptr<Message> result;
+ auto listener = std::make_shared<AssignMessageDecoderListener>(&result);
+ MessageDecoder decoder(listener);
+
+ if (metadata_length < decoder.next_required_size()) {
+ return Status::Invalid("metadata_length should be at least ",
+ decoder.next_required_size());
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto metadata, file->ReadAt(offset, metadata_length));
+ if (metadata->size() < metadata_length) {
+ return Status::Invalid("Expected to read ", metadata_length,
+ " metadata bytes but got ", metadata->size());
+ }
+ ARROW_RETURN_NOT_OK(decoder.Consume(metadata));
+
+ switch (decoder.state()) {
+ case MessageDecoder::State::INITIAL:
+ return std::move(result);
+ case MessageDecoder::State::METADATA_LENGTH:
+ return Status::Invalid("metadata length is missing. File offset: ", offset,
+ ", metadata length: ", metadata_length);
+ case MessageDecoder::State::METADATA:
+ return Status::Invalid("flatbuffer size ", decoder.next_required_size(),
+ " invalid. File offset: ", offset,
+ ", metadata length: ", metadata_length);
+ case MessageDecoder::State::BODY: {
+ ARROW_ASSIGN_OR_RAISE(auto body, file->ReadAt(offset + metadata_length,
+ decoder.next_required_size()));
+ if (body->size() < decoder.next_required_size()) {
+ return Status::IOError("Expected to be able to read ",
+ decoder.next_required_size(),
+ " bytes for message body, got ", body->size());
+ }
+ RETURN_NOT_OK(decoder.Consume(body));
+ return std::move(result);
+ }
+ case MessageDecoder::State::EOS:
+ return Status::Invalid("Unexpected empty message in IPC file format");
+ default:
+ return Status::Invalid("Unexpected state: ", decoder.state());
+ }
+}
+
Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length,
int64_t body_length,
io::RandomAccessFile* file,
@@ -379,553 +379,553 @@ Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metada
});
}
-Status AlignStream(io::InputStream* stream, int32_t alignment) {
- ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
- return stream->Advance(PaddedLength(position, alignment) - position);
-}
-
-Status AlignStream(io::OutputStream* stream, int32_t alignment) {
- ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
- int64_t remainder = PaddedLength(position, alignment) - position;
- if (remainder > 0) {
- return stream->Write(kPaddingBytes, remainder);
- }
- return Status::OK();
-}
-
-Status CheckAligned(io::FileInterface* stream, int32_t alignment) {
- ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
- if (position % alignment != 0) {
- return Status::Invalid("Stream is not aligned pos: ", position,
- " alignment: ", alignment);
- } else {
- return Status::OK();
- }
-}
-
-Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) {
- if (decoder->state() == MessageDecoder::State::INITIAL) {
- uint8_t continuation[sizeof(int32_t)];
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, file->Read(sizeof(int32_t), &continuation));
- if (bytes_read == 0) {
- // EOS without indication
- return Status::OK();
- } else if (bytes_read != decoder->next_required_size()) {
- return Status::Invalid("Corrupted message, only ", bytes_read, " bytes available");
- }
- ARROW_RETURN_NOT_OK(decoder->Consume(continuation, bytes_read));
- }
-
- if (decoder->state() == MessageDecoder::State::METADATA_LENGTH) {
- // Valid IPC message, read the message length now
- uint8_t metadata_length[sizeof(int32_t)];
- ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
- file->Read(sizeof(int32_t), &metadata_length));
- if (bytes_read != decoder->next_required_size()) {
- return Status::Invalid("Corrupted metadata length, only ", bytes_read,
- " bytes available");
- }
- ARROW_RETURN_NOT_OK(decoder->Consume(metadata_length, bytes_read));
- }
-
- if (decoder->state() == MessageDecoder::State::EOS) {
- return Status::OK();
- }
-
- auto metadata_length = decoder->next_required_size();
- ARROW_ASSIGN_OR_RAISE(auto metadata, file->Read(metadata_length));
- if (metadata->size() != metadata_length) {
- return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ",
- "only read ", metadata->size());
- }
- ARROW_RETURN_NOT_OK(decoder->Consume(metadata));
-
- if (decoder->state() == MessageDecoder::State::BODY) {
- ARROW_ASSIGN_OR_RAISE(auto body, file->Read(decoder->next_required_size()));
- if (body->size() < decoder->next_required_size()) {
- return Status::IOError("Expected to be able to read ",
- decoder->next_required_size(),
- " bytes for message body, got ", body->size());
- }
- ARROW_RETURN_NOT_OK(decoder->Consume(body));
- }
-
- if (decoder->state() == MessageDecoder::State::INITIAL ||
- decoder->state() == MessageDecoder::State::EOS) {
- return Status::OK();
- } else {
- return Status::Invalid("Failed to decode message");
- }
-}
-
-Result<std::unique_ptr<Message>> ReadMessage(io::InputStream* file, MemoryPool* pool) {
- std::unique_ptr<Message> message;
- auto listener = std::make_shared<AssignMessageDecoderListener>(&message);
- MessageDecoder decoder(listener, pool);
- ARROW_RETURN_NOT_OK(DecodeMessage(&decoder, file));
- if (!message) {
- return nullptr;
- } else {
- return std::move(message);
- }
-}
-
-Status WriteMessage(const Buffer& message, const IpcWriteOptions& options,
- io::OutputStream* file, int32_t* message_length) {
- const int32_t prefix_size = options.write_legacy_ipc_format ? 4 : 8;
- const int32_t flatbuffer_size = static_cast<int32_t>(message.size());
-
- int32_t padded_message_length = static_cast<int32_t>(
- PaddedLength(flatbuffer_size + prefix_size, options.alignment));
-
- int32_t padding = padded_message_length - flatbuffer_size - prefix_size;
-
- // The returned message size includes the length prefix, the flatbuffer,
- // plus padding
- *message_length = padded_message_length;
-
- // ARROW-6314: Write continuation / padding token
- if (!options.write_legacy_ipc_format) {
- RETURN_NOT_OK(file->Write(&internal::kIpcContinuationToken, sizeof(int32_t)));
- }
-
- // Write the flatbuffer size prefix including padding in little endian
- int32_t padded_flatbuffer_size =
- BitUtil::ToLittleEndian(padded_message_length - prefix_size);
- RETURN_NOT_OK(file->Write(&padded_flatbuffer_size, sizeof(int32_t)));
-
- // Write the flatbuffer
- RETURN_NOT_OK(file->Write(message.data(), flatbuffer_size));
- if (padding > 0) {
- RETURN_NOT_OK(file->Write(kPaddingBytes, padding));
- }
-
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// Implement MessageDecoder
-
-Status MessageDecoderListener::OnInitial() { return Status::OK(); }
-Status MessageDecoderListener::OnMetadataLength() { return Status::OK(); }
-Status MessageDecoderListener::OnMetadata() { return Status::OK(); }
-Status MessageDecoderListener::OnBody() { return Status::OK(); }
-Status MessageDecoderListener::OnEOS() { return Status::OK(); }
-
-static constexpr auto kMessageDecoderNextRequiredSizeInitial = sizeof(int32_t);
-static constexpr auto kMessageDecoderNextRequiredSizeMetadataLength = sizeof(int32_t);
-
-class MessageDecoder::MessageDecoderImpl {
- public:
- explicit MessageDecoderImpl(std::shared_ptr<MessageDecoderListener> listener,
- State initial_state, int64_t initial_next_required_size,
- MemoryPool* pool)
- : listener_(std::move(listener)),
- pool_(pool),
- state_(initial_state),
- next_required_size_(initial_next_required_size),
- chunks_(),
- buffered_size_(0),
- metadata_(nullptr) {}
-
- Status ConsumeData(const uint8_t* data, int64_t size) {
- if (buffered_size_ == 0) {
- while (size > 0 && size >= next_required_size_) {
- auto used_size = next_required_size_;
- switch (state_) {
- case State::INITIAL:
- RETURN_NOT_OK(ConsumeInitialData(data, next_required_size_));
- break;
- case State::METADATA_LENGTH:
- RETURN_NOT_OK(ConsumeMetadataLengthData(data, next_required_size_));
- break;
- case State::METADATA: {
- auto buffer = std::make_shared<Buffer>(data, next_required_size_);
- RETURN_NOT_OK(ConsumeMetadataBuffer(buffer));
- } break;
- case State::BODY: {
- auto buffer = std::make_shared<Buffer>(data, next_required_size_);
- RETURN_NOT_OK(ConsumeBodyBuffer(buffer));
- } break;
- case State::EOS:
- return Status::OK();
- }
- data += used_size;
- size -= used_size;
- }
- }
-
- if (size == 0) {
- return Status::OK();
- }
-
- chunks_.push_back(std::make_shared<Buffer>(data, size));
- buffered_size_ += size;
- return ConsumeChunks();
- }
-
- Status ConsumeBuffer(std::shared_ptr<Buffer> buffer) {
- if (buffered_size_ == 0) {
- while (buffer->size() >= next_required_size_) {
- auto used_size = next_required_size_;
- switch (state_) {
- case State::INITIAL:
- RETURN_NOT_OK(ConsumeInitialBuffer(buffer));
- break;
- case State::METADATA_LENGTH:
- RETURN_NOT_OK(ConsumeMetadataLengthBuffer(buffer));
- break;
- case State::METADATA:
- if (buffer->size() == next_required_size_) {
- return ConsumeMetadataBuffer(buffer);
- } else {
- auto sliced_buffer = SliceBuffer(buffer, 0, next_required_size_);
- RETURN_NOT_OK(ConsumeMetadataBuffer(sliced_buffer));
- }
- break;
- case State::BODY:
- if (buffer->size() == next_required_size_) {
- return ConsumeBodyBuffer(buffer);
- } else {
- auto sliced_buffer = SliceBuffer(buffer, 0, next_required_size_);
- RETURN_NOT_OK(ConsumeBodyBuffer(sliced_buffer));
- }
- break;
- case State::EOS:
- return Status::OK();
- }
- if (buffer->size() == used_size) {
- return Status::OK();
- }
- buffer = SliceBuffer(buffer, used_size);
- }
- }
-
- if (buffer->size() == 0) {
- return Status::OK();
- }
-
- buffered_size_ += buffer->size();
- chunks_.push_back(std::move(buffer));
- return ConsumeChunks();
- }
-
- int64_t next_required_size() const { return next_required_size_ - buffered_size_; }
-
- MessageDecoder::State state() const { return state_; }
-
- private:
- Status ConsumeChunks() {
- while (state_ != State::EOS) {
- if (buffered_size_ < next_required_size_) {
- return Status::OK();
- }
-
- switch (state_) {
- case State::INITIAL:
- RETURN_NOT_OK(ConsumeInitialChunks());
- break;
- case State::METADATA_LENGTH:
- RETURN_NOT_OK(ConsumeMetadataLengthChunks());
- break;
- case State::METADATA:
- RETURN_NOT_OK(ConsumeMetadataChunks());
- break;
- case State::BODY:
- RETURN_NOT_OK(ConsumeBodyChunks());
- break;
- case State::EOS:
- return Status::OK();
- }
- }
-
- return Status::OK();
- }
-
- Status ConsumeInitialData(const uint8_t* data, int64_t size) {
- return ConsumeInitial(BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
- }
-
- Status ConsumeInitialBuffer(const std::shared_ptr<Buffer>& buffer) {
- ARROW_ASSIGN_OR_RAISE(auto continuation, ConsumeDataBufferInt32(buffer));
- return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
- }
-
- Status ConsumeInitialChunks() {
- int32_t continuation = 0;
- RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &continuation));
- return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
- }
-
- Status ConsumeInitial(int32_t continuation) {
- if (continuation == internal::kIpcContinuationToken) {
- state_ = State::METADATA_LENGTH;
- next_required_size_ = kMessageDecoderNextRequiredSizeMetadataLength;
- RETURN_NOT_OK(listener_->OnMetadataLength());
- // Valid IPC message, read the message length now
- return Status::OK();
- } else if (continuation == 0) {
- state_ = State::EOS;
- next_required_size_ = 0;
- RETURN_NOT_OK(listener_->OnEOS());
- return Status::OK();
- } else if (continuation > 0) {
- state_ = State::METADATA;
- // ARROW-6314: Backwards compatibility for reading old IPC
- // messages produced prior to version 0.15.0
- next_required_size_ = continuation;
- RETURN_NOT_OK(listener_->OnMetadata());
- return Status::OK();
- } else {
- return Status::IOError("Invalid IPC stream: negative continuation token");
- }
- }
-
- Status ConsumeMetadataLengthData(const uint8_t* data, int64_t size) {
- return ConsumeMetadataLength(
- BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
- }
-
- Status ConsumeMetadataLengthBuffer(const std::shared_ptr<Buffer>& buffer) {
- ARROW_ASSIGN_OR_RAISE(auto metadata_length, ConsumeDataBufferInt32(buffer));
- return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
- }
-
- Status ConsumeMetadataLengthChunks() {
- int32_t metadata_length = 0;
- RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &metadata_length));
- return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
- }
-
- Status ConsumeMetadataLength(int32_t metadata_length) {
- if (metadata_length == 0) {
- state_ = State::EOS;
- next_required_size_ = 0;
- RETURN_NOT_OK(listener_->OnEOS());
- return Status::OK();
- } else if (metadata_length > 0) {
- state_ = State::METADATA;
- next_required_size_ = metadata_length;
- RETURN_NOT_OK(listener_->OnMetadata());
- return Status::OK();
- } else {
- return Status::IOError("Invalid IPC message: negative metadata length");
- }
- }
-
- Status ConsumeMetadataBuffer(const std::shared_ptr<Buffer>& buffer) {
- if (buffer->is_cpu()) {
- metadata_ = buffer;
- } else {
- ARROW_ASSIGN_OR_RAISE(metadata_,
- Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
- }
- return ConsumeMetadata();
- }
-
- Status ConsumeMetadataChunks() {
- if (chunks_[0]->size() >= next_required_size_) {
- if (chunks_[0]->size() == next_required_size_) {
- if (chunks_[0]->is_cpu()) {
- metadata_ = std::move(chunks_[0]);
- } else {
- ARROW_ASSIGN_OR_RAISE(
- metadata_,
- Buffer::ViewOrCopy(chunks_[0], CPUDevice::memory_manager(pool_)));
- }
- chunks_.erase(chunks_.begin());
- } else {
- metadata_ = SliceBuffer(chunks_[0], 0, next_required_size_);
- if (!chunks_[0]->is_cpu()) {
- ARROW_ASSIGN_OR_RAISE(
- metadata_, Buffer::ViewOrCopy(metadata_, CPUDevice::memory_manager(pool_)));
- }
- chunks_[0] = SliceBuffer(chunks_[0], next_required_size_);
- }
- buffered_size_ -= next_required_size_;
- } else {
- ARROW_ASSIGN_OR_RAISE(auto metadata, AllocateBuffer(next_required_size_, pool_));
- metadata_ = std::shared_ptr<Buffer>(metadata.release());
- RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, metadata_->mutable_data()));
- }
- return ConsumeMetadata();
- }
-
- Status ConsumeMetadata() {
- RETURN_NOT_OK(MaybeAlignMetadata(&metadata_));
- int64_t body_length = -1;
- RETURN_NOT_OK(CheckMetadataAndGetBodyLength(*metadata_, &body_length));
-
- state_ = State::BODY;
- next_required_size_ = body_length;
- RETURN_NOT_OK(listener_->OnBody());
- if (next_required_size_ == 0) {
- ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(0, pool_));
- std::shared_ptr<Buffer> shared_body(body.release());
- return ConsumeBody(&shared_body);
- } else {
- return Status::OK();
- }
- }
-
- Status ConsumeBodyBuffer(std::shared_ptr<Buffer> buffer) {
- return ConsumeBody(&buffer);
- }
-
- Status ConsumeBodyChunks() {
- if (chunks_[0]->size() >= next_required_size_) {
- auto used_size = next_required_size_;
- if (chunks_[0]->size() == next_required_size_) {
- RETURN_NOT_OK(ConsumeBody(&chunks_[0]));
- chunks_.erase(chunks_.begin());
- } else {
- auto body = SliceBuffer(chunks_[0], 0, next_required_size_);
- RETURN_NOT_OK(ConsumeBody(&body));
- chunks_[0] = SliceBuffer(chunks_[0], used_size);
- }
- buffered_size_ -= used_size;
- return Status::OK();
- } else {
- ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(next_required_size_, pool_));
- RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, body->mutable_data()));
- std::shared_ptr<Buffer> shared_body(body.release());
- return ConsumeBody(&shared_body);
- }
- }
-
- Status ConsumeBody(std::shared_ptr<Buffer>* buffer) {
- ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message,
- Message::Open(metadata_, *buffer));
-
- RETURN_NOT_OK(listener_->OnMessageDecoded(std::move(message)));
- state_ = State::INITIAL;
- next_required_size_ = kMessageDecoderNextRequiredSizeInitial;
- RETURN_NOT_OK(listener_->OnInitial());
- return Status::OK();
- }
-
- Result<int32_t> ConsumeDataBufferInt32(const std::shared_ptr<Buffer>& buffer) {
- if (buffer->is_cpu()) {
- return util::SafeLoadAs<int32_t>(buffer->data());
- } else {
- ARROW_ASSIGN_OR_RAISE(auto cpu_buffer,
- Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
- return util::SafeLoadAs<int32_t>(cpu_buffer->data());
- }
- }
-
- Status ConsumeDataChunks(int64_t nbytes, void* out) {
- size_t offset = 0;
- size_t n_used_chunks = 0;
- auto required_size = nbytes;
- std::shared_ptr<Buffer> last_chunk;
- for (auto& chunk : chunks_) {
- if (!chunk->is_cpu()) {
- ARROW_ASSIGN_OR_RAISE(
- chunk, Buffer::ViewOrCopy(chunk, CPUDevice::memory_manager(pool_)));
- }
- auto data = chunk->data();
- auto data_size = chunk->size();
- auto copy_size = std::min(required_size, data_size);
- memcpy(static_cast<uint8_t*>(out) + offset, data, copy_size);
- n_used_chunks++;
- offset += copy_size;
- required_size -= copy_size;
- if (required_size == 0) {
- if (data_size != copy_size) {
- last_chunk = SliceBuffer(chunk, copy_size);
- }
- break;
- }
- }
- chunks_.erase(chunks_.begin(), chunks_.begin() + n_used_chunks);
- if (last_chunk.get() != nullptr) {
- chunks_.insert(chunks_.begin(), std::move(last_chunk));
- }
- buffered_size_ -= offset;
- return Status::OK();
- }
-
- std::shared_ptr<MessageDecoderListener> listener_;
- MemoryPool* pool_;
- State state_;
- int64_t next_required_size_;
- std::vector<std::shared_ptr<Buffer>> chunks_;
- int64_t buffered_size_;
- std::shared_ptr<Buffer> metadata_; // Must be CPU buffer
-};
-
-MessageDecoder::MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
- MemoryPool* pool) {
- impl_.reset(new MessageDecoderImpl(std::move(listener), State::INITIAL,
- kMessageDecoderNextRequiredSizeInitial, pool));
-}
-
-MessageDecoder::MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
- State initial_state, int64_t initial_next_required_size,
- MemoryPool* pool) {
- impl_.reset(new MessageDecoderImpl(std::move(listener), initial_state,
- initial_next_required_size, pool));
-}
-
-MessageDecoder::~MessageDecoder() {}
-
-Status MessageDecoder::Consume(const uint8_t* data, int64_t size) {
- return impl_->ConsumeData(data, size);
-}
-
-Status MessageDecoder::Consume(std::shared_ptr<Buffer> buffer) {
- return impl_->ConsumeBuffer(buffer);
-}
-
-int64_t MessageDecoder::next_required_size() const { return impl_->next_required_size(); }
-
-MessageDecoder::State MessageDecoder::state() const { return impl_->state(); }
-
-// ----------------------------------------------------------------------
-// Implement InputStream message reader
-
-/// \brief Implementation of MessageReader that reads from InputStream
-class InputStreamMessageReader : public MessageReader, public MessageDecoderListener {
- public:
- explicit InputStreamMessageReader(io::InputStream* stream)
- : stream_(stream),
- owned_stream_(),
- message_(),
- decoder_(std::shared_ptr<InputStreamMessageReader>(this, [](void*) {})) {}
-
- explicit InputStreamMessageReader(const std::shared_ptr<io::InputStream>& owned_stream)
- : InputStreamMessageReader(owned_stream.get()) {
- owned_stream_ = owned_stream;
- }
-
- ~InputStreamMessageReader() {}
-
- Status OnMessageDecoded(std::unique_ptr<Message> message) override {
- message_ = std::move(message);
- return Status::OK();
- }
-
- Result<std::unique_ptr<Message>> ReadNextMessage() override {
- ARROW_RETURN_NOT_OK(DecodeMessage(&decoder_, stream_));
- return std::move(message_);
- }
-
- private:
- io::InputStream* stream_;
- std::shared_ptr<io::InputStream> owned_stream_;
- std::unique_ptr<Message> message_;
- MessageDecoder decoder_;
-};
-
-std::unique_ptr<MessageReader> MessageReader::Open(io::InputStream* stream) {
- return std::unique_ptr<MessageReader>(new InputStreamMessageReader(stream));
-}
-
-std::unique_ptr<MessageReader> MessageReader::Open(
- const std::shared_ptr<io::InputStream>& owned_stream) {
- return std::unique_ptr<MessageReader>(new InputStreamMessageReader(owned_stream));
-}
-
-} // namespace ipc
-} // namespace arrow
+Status AlignStream(io::InputStream* stream, int32_t alignment) {
+ ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
+ return stream->Advance(PaddedLength(position, alignment) - position);
+}
+
+Status AlignStream(io::OutputStream* stream, int32_t alignment) {
+ ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
+ int64_t remainder = PaddedLength(position, alignment) - position;
+ if (remainder > 0) {
+ return stream->Write(kPaddingBytes, remainder);
+ }
+ return Status::OK();
+}
+
+Status CheckAligned(io::FileInterface* stream, int32_t alignment) {
+ ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
+ if (position % alignment != 0) {
+ return Status::Invalid("Stream is not aligned pos: ", position,
+ " alignment: ", alignment);
+ } else {
+ return Status::OK();
+ }
+}
+
+Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) {
+ if (decoder->state() == MessageDecoder::State::INITIAL) {
+ uint8_t continuation[sizeof(int32_t)];
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, file->Read(sizeof(int32_t), &continuation));
+ if (bytes_read == 0) {
+ // EOS without indication
+ return Status::OK();
+ } else if (bytes_read != decoder->next_required_size()) {
+ return Status::Invalid("Corrupted message, only ", bytes_read, " bytes available");
+ }
+ ARROW_RETURN_NOT_OK(decoder->Consume(continuation, bytes_read));
+ }
+
+ if (decoder->state() == MessageDecoder::State::METADATA_LENGTH) {
+ // Valid IPC message, read the message length now
+ uint8_t metadata_length[sizeof(int32_t)];
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
+ file->Read(sizeof(int32_t), &metadata_length));
+ if (bytes_read != decoder->next_required_size()) {
+ return Status::Invalid("Corrupted metadata length, only ", bytes_read,
+ " bytes available");
+ }
+ ARROW_RETURN_NOT_OK(decoder->Consume(metadata_length, bytes_read));
+ }
+
+ if (decoder->state() == MessageDecoder::State::EOS) {
+ return Status::OK();
+ }
+
+ auto metadata_length = decoder->next_required_size();
+ ARROW_ASSIGN_OR_RAISE(auto metadata, file->Read(metadata_length));
+ if (metadata->size() != metadata_length) {
+ return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ",
+ "only read ", metadata->size());
+ }
+ ARROW_RETURN_NOT_OK(decoder->Consume(metadata));
+
+ if (decoder->state() == MessageDecoder::State::BODY) {
+ ARROW_ASSIGN_OR_RAISE(auto body, file->Read(decoder->next_required_size()));
+ if (body->size() < decoder->next_required_size()) {
+ return Status::IOError("Expected to be able to read ",
+ decoder->next_required_size(),
+ " bytes for message body, got ", body->size());
+ }
+ ARROW_RETURN_NOT_OK(decoder->Consume(body));
+ }
+
+ if (decoder->state() == MessageDecoder::State::INITIAL ||
+ decoder->state() == MessageDecoder::State::EOS) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("Failed to decode message");
+ }
+}
+
+Result<std::unique_ptr<Message>> ReadMessage(io::InputStream* file, MemoryPool* pool) {
+ std::unique_ptr<Message> message;
+ auto listener = std::make_shared<AssignMessageDecoderListener>(&message);
+ MessageDecoder decoder(listener, pool);
+ ARROW_RETURN_NOT_OK(DecodeMessage(&decoder, file));
+ if (!message) {
+ return nullptr;
+ } else {
+ return std::move(message);
+ }
+}
+
+Status WriteMessage(const Buffer& message, const IpcWriteOptions& options,
+ io::OutputStream* file, int32_t* message_length) {
+ const int32_t prefix_size = options.write_legacy_ipc_format ? 4 : 8;
+ const int32_t flatbuffer_size = static_cast<int32_t>(message.size());
+
+ int32_t padded_message_length = static_cast<int32_t>(
+ PaddedLength(flatbuffer_size + prefix_size, options.alignment));
+
+ int32_t padding = padded_message_length - flatbuffer_size - prefix_size;
+
+ // The returned message size includes the length prefix, the flatbuffer,
+ // plus padding
+ *message_length = padded_message_length;
+
+ // ARROW-6314: Write continuation / padding token
+ if (!options.write_legacy_ipc_format) {
+ RETURN_NOT_OK(file->Write(&internal::kIpcContinuationToken, sizeof(int32_t)));
+ }
+
+ // Write the flatbuffer size prefix including padding in little endian
+ int32_t padded_flatbuffer_size =
+ BitUtil::ToLittleEndian(padded_message_length - prefix_size);
+ RETURN_NOT_OK(file->Write(&padded_flatbuffer_size, sizeof(int32_t)));
+
+ // Write the flatbuffer
+ RETURN_NOT_OK(file->Write(message.data(), flatbuffer_size));
+ if (padding > 0) {
+ RETURN_NOT_OK(file->Write(kPaddingBytes, padding));
+ }
+
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Implement MessageDecoder
+
+Status MessageDecoderListener::OnInitial() { return Status::OK(); }
+Status MessageDecoderListener::OnMetadataLength() { return Status::OK(); }
+Status MessageDecoderListener::OnMetadata() { return Status::OK(); }
+Status MessageDecoderListener::OnBody() { return Status::OK(); }
+Status MessageDecoderListener::OnEOS() { return Status::OK(); }
+
+static constexpr auto kMessageDecoderNextRequiredSizeInitial = sizeof(int32_t);
+static constexpr auto kMessageDecoderNextRequiredSizeMetadataLength = sizeof(int32_t);
+
+class MessageDecoder::MessageDecoderImpl {
+ public:
+ explicit MessageDecoderImpl(std::shared_ptr<MessageDecoderListener> listener,
+ State initial_state, int64_t initial_next_required_size,
+ MemoryPool* pool)
+ : listener_(std::move(listener)),
+ pool_(pool),
+ state_(initial_state),
+ next_required_size_(initial_next_required_size),
+ chunks_(),
+ buffered_size_(0),
+ metadata_(nullptr) {}
+
+ Status ConsumeData(const uint8_t* data, int64_t size) {
+ if (buffered_size_ == 0) {
+ while (size > 0 && size >= next_required_size_) {
+ auto used_size = next_required_size_;
+ switch (state_) {
+ case State::INITIAL:
+ RETURN_NOT_OK(ConsumeInitialData(data, next_required_size_));
+ break;
+ case State::METADATA_LENGTH:
+ RETURN_NOT_OK(ConsumeMetadataLengthData(data, next_required_size_));
+ break;
+ case State::METADATA: {
+ auto buffer = std::make_shared<Buffer>(data, next_required_size_);
+ RETURN_NOT_OK(ConsumeMetadataBuffer(buffer));
+ } break;
+ case State::BODY: {
+ auto buffer = std::make_shared<Buffer>(data, next_required_size_);
+ RETURN_NOT_OK(ConsumeBodyBuffer(buffer));
+ } break;
+ case State::EOS:
+ return Status::OK();
+ }
+ data += used_size;
+ size -= used_size;
+ }
+ }
+
+ if (size == 0) {
+ return Status::OK();
+ }
+
+ chunks_.push_back(std::make_shared<Buffer>(data, size));
+ buffered_size_ += size;
+ return ConsumeChunks();
+ }
+
+ Status ConsumeBuffer(std::shared_ptr<Buffer> buffer) {
+ if (buffered_size_ == 0) {
+ while (buffer->size() >= next_required_size_) {
+ auto used_size = next_required_size_;
+ switch (state_) {
+ case State::INITIAL:
+ RETURN_NOT_OK(ConsumeInitialBuffer(buffer));
+ break;
+ case State::METADATA_LENGTH:
+ RETURN_NOT_OK(ConsumeMetadataLengthBuffer(buffer));
+ break;
+ case State::METADATA:
+ if (buffer->size() == next_required_size_) {
+ return ConsumeMetadataBuffer(buffer);
+ } else {
+ auto sliced_buffer = SliceBuffer(buffer, 0, next_required_size_);
+ RETURN_NOT_OK(ConsumeMetadataBuffer(sliced_buffer));
+ }
+ break;
+ case State::BODY:
+ if (buffer->size() == next_required_size_) {
+ return ConsumeBodyBuffer(buffer);
+ } else {
+ auto sliced_buffer = SliceBuffer(buffer, 0, next_required_size_);
+ RETURN_NOT_OK(ConsumeBodyBuffer(sliced_buffer));
+ }
+ break;
+ case State::EOS:
+ return Status::OK();
+ }
+ if (buffer->size() == used_size) {
+ return Status::OK();
+ }
+ buffer = SliceBuffer(buffer, used_size);
+ }
+ }
+
+ if (buffer->size() == 0) {
+ return Status::OK();
+ }
+
+ buffered_size_ += buffer->size();
+ chunks_.push_back(std::move(buffer));
+ return ConsumeChunks();
+ }
+
+ int64_t next_required_size() const { return next_required_size_ - buffered_size_; }
+
+ MessageDecoder::State state() const { return state_; }
+
+ private:
+ Status ConsumeChunks() {
+ while (state_ != State::EOS) {
+ if (buffered_size_ < next_required_size_) {
+ return Status::OK();
+ }
+
+ switch (state_) {
+ case State::INITIAL:
+ RETURN_NOT_OK(ConsumeInitialChunks());
+ break;
+ case State::METADATA_LENGTH:
+ RETURN_NOT_OK(ConsumeMetadataLengthChunks());
+ break;
+ case State::METADATA:
+ RETURN_NOT_OK(ConsumeMetadataChunks());
+ break;
+ case State::BODY:
+ RETURN_NOT_OK(ConsumeBodyChunks());
+ break;
+ case State::EOS:
+ return Status::OK();
+ }
+ }
+
+ return Status::OK();
+ }
+
+ Status ConsumeInitialData(const uint8_t* data, int64_t size) {
+ return ConsumeInitial(BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
+ }
+
+ Status ConsumeInitialBuffer(const std::shared_ptr<Buffer>& buffer) {
+ ARROW_ASSIGN_OR_RAISE(auto continuation, ConsumeDataBufferInt32(buffer));
+ return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
+ }
+
+ Status ConsumeInitialChunks() {
+ int32_t continuation = 0;
+ RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &continuation));
+ return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
+ }
+
+ Status ConsumeInitial(int32_t continuation) {
+ if (continuation == internal::kIpcContinuationToken) {
+ state_ = State::METADATA_LENGTH;
+ next_required_size_ = kMessageDecoderNextRequiredSizeMetadataLength;
+ RETURN_NOT_OK(listener_->OnMetadataLength());
+ // Valid IPC message, read the message length now
+ return Status::OK();
+ } else if (continuation == 0) {
+ state_ = State::EOS;
+ next_required_size_ = 0;
+ RETURN_NOT_OK(listener_->OnEOS());
+ return Status::OK();
+ } else if (continuation > 0) {
+ state_ = State::METADATA;
+ // ARROW-6314: Backwards compatibility for reading old IPC
+ // messages produced prior to version 0.15.0
+ next_required_size_ = continuation;
+ RETURN_NOT_OK(listener_->OnMetadata());
+ return Status::OK();
+ } else {
+ return Status::IOError("Invalid IPC stream: negative continuation token");
+ }
+ }
+
+ Status ConsumeMetadataLengthData(const uint8_t* data, int64_t size) {
+ return ConsumeMetadataLength(
+ BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
+ }
+
+ Status ConsumeMetadataLengthBuffer(const std::shared_ptr<Buffer>& buffer) {
+ ARROW_ASSIGN_OR_RAISE(auto metadata_length, ConsumeDataBufferInt32(buffer));
+ return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
+ }
+
+ Status ConsumeMetadataLengthChunks() {
+ int32_t metadata_length = 0;
+ RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &metadata_length));
+ return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
+ }
+
+ Status ConsumeMetadataLength(int32_t metadata_length) {
+ if (metadata_length == 0) {
+ state_ = State::EOS;
+ next_required_size_ = 0;
+ RETURN_NOT_OK(listener_->OnEOS());
+ return Status::OK();
+ } else if (metadata_length > 0) {
+ state_ = State::METADATA;
+ next_required_size_ = metadata_length;
+ RETURN_NOT_OK(listener_->OnMetadata());
+ return Status::OK();
+ } else {
+ return Status::IOError("Invalid IPC message: negative metadata length");
+ }
+ }
+
+ Status ConsumeMetadataBuffer(const std::shared_ptr<Buffer>& buffer) {
+ if (buffer->is_cpu()) {
+ metadata_ = buffer;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(metadata_,
+ Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
+ }
+ return ConsumeMetadata();
+ }
+
+ Status ConsumeMetadataChunks() {
+ if (chunks_[0]->size() >= next_required_size_) {
+ if (chunks_[0]->size() == next_required_size_) {
+ if (chunks_[0]->is_cpu()) {
+ metadata_ = std::move(chunks_[0]);
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ metadata_,
+ Buffer::ViewOrCopy(chunks_[0], CPUDevice::memory_manager(pool_)));
+ }
+ chunks_.erase(chunks_.begin());
+ } else {
+ metadata_ = SliceBuffer(chunks_[0], 0, next_required_size_);
+ if (!chunks_[0]->is_cpu()) {
+ ARROW_ASSIGN_OR_RAISE(
+ metadata_, Buffer::ViewOrCopy(metadata_, CPUDevice::memory_manager(pool_)));
+ }
+ chunks_[0] = SliceBuffer(chunks_[0], next_required_size_);
+ }
+ buffered_size_ -= next_required_size_;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(auto metadata, AllocateBuffer(next_required_size_, pool_));
+ metadata_ = std::shared_ptr<Buffer>(metadata.release());
+ RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, metadata_->mutable_data()));
+ }
+ return ConsumeMetadata();
+ }
+
+ Status ConsumeMetadata() {
+ RETURN_NOT_OK(MaybeAlignMetadata(&metadata_));
+ int64_t body_length = -1;
+ RETURN_NOT_OK(CheckMetadataAndGetBodyLength(*metadata_, &body_length));
+
+ state_ = State::BODY;
+ next_required_size_ = body_length;
+ RETURN_NOT_OK(listener_->OnBody());
+ if (next_required_size_ == 0) {
+ ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(0, pool_));
+ std::shared_ptr<Buffer> shared_body(body.release());
+ return ConsumeBody(&shared_body);
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Status ConsumeBodyBuffer(std::shared_ptr<Buffer> buffer) {
+ return ConsumeBody(&buffer);
+ }
+
+ Status ConsumeBodyChunks() {
+ if (chunks_[0]->size() >= next_required_size_) {
+ auto used_size = next_required_size_;
+ if (chunks_[0]->size() == next_required_size_) {
+ RETURN_NOT_OK(ConsumeBody(&chunks_[0]));
+ chunks_.erase(chunks_.begin());
+ } else {
+ auto body = SliceBuffer(chunks_[0], 0, next_required_size_);
+ RETURN_NOT_OK(ConsumeBody(&body));
+ chunks_[0] = SliceBuffer(chunks_[0], used_size);
+ }
+ buffered_size_ -= used_size;
+ return Status::OK();
+ } else {
+ ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(next_required_size_, pool_));
+ RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, body->mutable_data()));
+ std::shared_ptr<Buffer> shared_body(body.release());
+ return ConsumeBody(&shared_body);
+ }
+ }
+
+ Status ConsumeBody(std::shared_ptr<Buffer>* buffer) {
+ ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message,
+ Message::Open(metadata_, *buffer));
+
+ RETURN_NOT_OK(listener_->OnMessageDecoded(std::move(message)));
+ state_ = State::INITIAL;
+ next_required_size_ = kMessageDecoderNextRequiredSizeInitial;
+ RETURN_NOT_OK(listener_->OnInitial());
+ return Status::OK();
+ }
+
+ Result<int32_t> ConsumeDataBufferInt32(const std::shared_ptr<Buffer>& buffer) {
+ if (buffer->is_cpu()) {
+ return util::SafeLoadAs<int32_t>(buffer->data());
+ } else {
+ ARROW_ASSIGN_OR_RAISE(auto cpu_buffer,
+ Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
+ return util::SafeLoadAs<int32_t>(cpu_buffer->data());
+ }
+ }
+
+ Status ConsumeDataChunks(int64_t nbytes, void* out) {
+ size_t offset = 0;
+ size_t n_used_chunks = 0;
+ auto required_size = nbytes;
+ std::shared_ptr<Buffer> last_chunk;
+ for (auto& chunk : chunks_) {
+ if (!chunk->is_cpu()) {
+ ARROW_ASSIGN_OR_RAISE(
+ chunk, Buffer::ViewOrCopy(chunk, CPUDevice::memory_manager(pool_)));
+ }
+ auto data = chunk->data();
+ auto data_size = chunk->size();
+ auto copy_size = std::min(required_size, data_size);
+ memcpy(static_cast<uint8_t*>(out) + offset, data, copy_size);
+ n_used_chunks++;
+ offset += copy_size;
+ required_size -= copy_size;
+ if (required_size == 0) {
+ if (data_size != copy_size) {
+ last_chunk = SliceBuffer(chunk, copy_size);
+ }
+ break;
+ }
+ }
+ chunks_.erase(chunks_.begin(), chunks_.begin() + n_used_chunks);
+ if (last_chunk.get() != nullptr) {
+ chunks_.insert(chunks_.begin(), std::move(last_chunk));
+ }
+ buffered_size_ -= offset;
+ return Status::OK();
+ }
+
+ std::shared_ptr<MessageDecoderListener> listener_;
+ MemoryPool* pool_;
+ State state_;
+ int64_t next_required_size_;
+ std::vector<std::shared_ptr<Buffer>> chunks_;
+ int64_t buffered_size_;
+ std::shared_ptr<Buffer> metadata_; // Must be CPU buffer
+};
+
+MessageDecoder::MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
+ MemoryPool* pool) {
+ impl_.reset(new MessageDecoderImpl(std::move(listener), State::INITIAL,
+ kMessageDecoderNextRequiredSizeInitial, pool));
+}
+
+MessageDecoder::MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
+ State initial_state, int64_t initial_next_required_size,
+ MemoryPool* pool) {
+ impl_.reset(new MessageDecoderImpl(std::move(listener), initial_state,
+ initial_next_required_size, pool));
+}
+
+MessageDecoder::~MessageDecoder() {}
+
+Status MessageDecoder::Consume(const uint8_t* data, int64_t size) {
+ return impl_->ConsumeData(data, size);
+}
+
+Status MessageDecoder::Consume(std::shared_ptr<Buffer> buffer) {
+ return impl_->ConsumeBuffer(buffer);
+}
+
+int64_t MessageDecoder::next_required_size() const { return impl_->next_required_size(); }
+
+MessageDecoder::State MessageDecoder::state() const { return impl_->state(); }
+
+// ----------------------------------------------------------------------
+// Implement InputStream message reader
+
+/// \brief Implementation of MessageReader that reads from InputStream
+class InputStreamMessageReader : public MessageReader, public MessageDecoderListener {
+ public:
+ explicit InputStreamMessageReader(io::InputStream* stream)
+ : stream_(stream),
+ owned_stream_(),
+ message_(),
+ decoder_(std::shared_ptr<InputStreamMessageReader>(this, [](void*) {})) {}
+
+ explicit InputStreamMessageReader(const std::shared_ptr<io::InputStream>& owned_stream)
+ : InputStreamMessageReader(owned_stream.get()) {
+ owned_stream_ = owned_stream;
+ }
+
+ ~InputStreamMessageReader() {}
+
+ Status OnMessageDecoded(std::unique_ptr<Message> message) override {
+ message_ = std::move(message);
+ return Status::OK();
+ }
+
+ Result<std::unique_ptr<Message>> ReadNextMessage() override {
+ ARROW_RETURN_NOT_OK(DecodeMessage(&decoder_, stream_));
+ return std::move(message_);
+ }
+
+ private:
+ io::InputStream* stream_;
+ std::shared_ptr<io::InputStream> owned_stream_;
+ std::unique_ptr<Message> message_;
+ MessageDecoder decoder_;
+};
+
+std::unique_ptr<MessageReader> MessageReader::Open(io::InputStream* stream) {
+ return std::unique_ptr<MessageReader>(new InputStreamMessageReader(stream));
+}
+
+std::unique_ptr<MessageReader> MessageReader::Open(
+ const std::shared_ptr<io::InputStream>& owned_stream) {
+ return std::unique_ptr<MessageReader>(new InputStreamMessageReader(owned_stream));
+}
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
index b2683259cb4..da7ac40a2f7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
@@ -1,536 +1,536 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// C++ object model and user API for interprocess schema messaging
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/io/type_fwd.h"
-#include "arrow/ipc/type_fwd.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace ipc {
-
-struct IpcWriteOptions;
-
-// Read interface classes. We do not fully deserialize the flatbuffers so that
-// individual fields metadata can be retrieved from very large schema without
-//
-
-/// \class Message
-/// \brief An IPC message including metadata and body
-class ARROW_EXPORT Message {
- public:
- /// \brief Construct message, but do not validate
- ///
- /// Use at your own risk; Message::Open has more metadata validation
- Message(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body);
-
- ~Message();
-
- /// \brief Create and validate a Message instance from two buffers
- ///
- /// \param[in] metadata a buffer containing the Flatbuffer metadata
- /// \param[in] body a buffer containing the message body, which may be null
- /// \return the created message
- static Result<std::unique_ptr<Message>> Open(std::shared_ptr<Buffer> metadata,
- std::shared_ptr<Buffer> body);
-
- /// \brief Read message body and create Message given Flatbuffer metadata
- /// \param[in] metadata containing a serialized Message flatbuffer
- /// \param[in] stream an InputStream
- /// \return the created Message
- ///
- /// \note If stream supports zero-copy, this is zero-copy
- static Result<std::unique_ptr<Message>> ReadFrom(std::shared_ptr<Buffer> metadata,
- io::InputStream* stream);
-
- /// \brief Read message body from position in file, and create Message given
- /// the Flatbuffer metadata
- /// \param[in] offset the position in the file where the message body starts.
- /// \param[in] metadata containing a serialized Message flatbuffer
- /// \param[in] file the seekable file interface to read from
- /// \return the created Message
- ///
- /// \note If file supports zero-copy, this is zero-copy
- static Result<std::unique_ptr<Message>> ReadFrom(const int64_t offset,
- std::shared_ptr<Buffer> metadata,
- io::RandomAccessFile* file);
-
- /// \brief Return true if message type and contents are equal
- ///
- /// \param other another message
- /// \return true if contents equal
- bool Equals(const Message& other) const;
-
- /// \brief the Message metadata
- ///
- /// \return buffer
- std::shared_ptr<Buffer> metadata() const;
-
- /// \brief Custom metadata serialized in metadata Flatbuffer. Returns nullptr
- /// when none set
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata() const;
-
- /// \brief the Message body, if any
- ///
- /// \return buffer is null if no body
- std::shared_ptr<Buffer> body() const;
-
- /// \brief The expected body length according to the metadata, for
- /// verification purposes
- int64_t body_length() const;
-
- /// \brief The Message type
- MessageType type() const;
-
- /// \brief The Message metadata version
- MetadataVersion metadata_version() const;
-
- const void* header() const;
-
- /// \brief Write length-prefixed metadata and body to output stream
- ///
- /// \param[in] file output stream to write to
- /// \param[in] options IPC writing options including alignment
- /// \param[out] output_length the number of bytes written
- /// \return Status
- Status SerializeTo(io::OutputStream* file, const IpcWriteOptions& options,
- int64_t* output_length) const;
-
- /// \brief Return true if the Message metadata passes Flatbuffer validation
- bool Verify() const;
-
- /// \brief Whether a given message type needs a body.
- static bool HasBody(MessageType type) {
- return type != MessageType::NONE && type != MessageType::SCHEMA;
- }
-
- private:
- // Hide serialization details from user API
- class MessageImpl;
- std::unique_ptr<MessageImpl> impl_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(Message);
-};
-
-ARROW_EXPORT std::string FormatMessageType(MessageType type);
-
-/// \class MessageDecoderListener
-/// \brief An abstract class to listen events from MessageDecoder.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \since 0.17.0
-class ARROW_EXPORT MessageDecoderListener {
- public:
- virtual ~MessageDecoderListener() = default;
-
- /// \brief Called when a message is decoded.
- ///
- /// MessageDecoder calls this method when it decodes a message. This
- /// method is called multiple times when the target stream has
- /// multiple messages.
- ///
- /// \param[in] message a decoded message
- /// \return Status
- virtual Status OnMessageDecoded(std::unique_ptr<Message> message) = 0;
-
- /// \brief Called when the decoder state is changed to
- /// MessageDecoder::State::INITIAL.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- virtual Status OnInitial();
-
- /// \brief Called when the decoder state is changed to
- /// MessageDecoder::State::METADATA_LENGTH.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- virtual Status OnMetadataLength();
-
- /// \brief Called when the decoder state is changed to
- /// MessageDecoder::State::METADATA.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- virtual Status OnMetadata();
-
- /// \brief Called when the decoder state is changed to
- /// MessageDecoder::State::BODY.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- virtual Status OnBody();
-
- /// \brief Called when the decoder state is changed to
- /// MessageDecoder::State::EOS.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- virtual Status OnEOS();
-};
-
-/// \class AssignMessageDecoderListener
-/// \brief Assign a message decoded by MessageDecoder.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \since 0.17.0
-class ARROW_EXPORT AssignMessageDecoderListener : public MessageDecoderListener {
- public:
- /// \brief Construct a listener that assigns a decoded message to the
- /// specified location.
- ///
- /// \param[in] message a location to store the received message
- explicit AssignMessageDecoderListener(std::unique_ptr<Message>* message)
- : message_(message) {}
-
- virtual ~AssignMessageDecoderListener() = default;
-
- Status OnMessageDecoded(std::unique_ptr<Message> message) override {
- *message_ = std::move(message);
- return Status::OK();
- }
-
- private:
- std::unique_ptr<Message>* message_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(AssignMessageDecoderListener);
-};
-
-/// \class MessageDecoder
-/// \brief Push style message decoder that receives data from user.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \since 0.17.0
-class ARROW_EXPORT MessageDecoder {
- public:
- /// \brief State for reading a message
- enum State {
- /// The initial state. It requires one of the followings as the next data:
- ///
- /// * int32_t continuation token
- /// * int32_t end-of-stream mark (== 0)
- /// * int32_t metadata length (backward compatibility for
- /// reading old IPC messages produced prior to version 0.15.0
- INITIAL,
-
- /// It requires int32_t metadata length.
- METADATA_LENGTH,
-
- /// It requires metadata.
- METADATA,
-
- /// It requires message body.
- BODY,
-
- /// The end-of-stream state. No more data is processed.
- EOS,
- };
-
- /// \brief Construct a message decoder.
- ///
- /// \param[in] listener a MessageDecoderListener that responds events from
- /// the decoder
- /// \param[in] pool an optional MemoryPool to copy metadata on the
- /// CPU, if required
- explicit MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
- MemoryPool* pool = default_memory_pool());
-
- /// \brief Construct a message decoder with the specified state.
- ///
- /// This is a construct for advanced users that know how to decode
- /// Message.
- ///
- /// \param[in] listener a MessageDecoderListener that responds events from
- /// the decoder
- /// \param[in] initial_state an initial state of the decode
- /// \param[in] initial_next_required_size the number of bytes needed
- /// to run the next action
- /// \param[in] pool an optional MemoryPool to copy metadata on the
- /// CPU, if required
- MessageDecoder(std::shared_ptr<MessageDecoderListener> listener, State initial_state,
- int64_t initial_next_required_size,
- MemoryPool* pool = default_memory_pool());
-
- virtual ~MessageDecoder();
-
- /// \brief Feed data to the decoder as a raw data.
- ///
- /// If the decoder can decode one or more messages by the data, the
- /// decoder calls listener->OnMessageDecoded() with a decoded
- /// message multiple times.
- ///
- /// If the state of the decoder is changed, corresponding callbacks
- /// on listener is called:
- ///
- /// * MessageDecoder::State::INITIAL: listener->OnInitial()
- /// * MessageDecoder::State::METADATA_LENGTH: listener->OnMetadataLength()
- /// * MessageDecoder::State::METADATA: listener->OnMetadata()
- /// * MessageDecoder::State::BODY: listener->OnBody()
- /// * MessageDecoder::State::EOS: listener->OnEOS()
- ///
- /// \param[in] data a raw data to be processed. This data isn't
- /// copied. The passed memory must be kept alive through message
- /// processing.
- /// \param[in] size raw data size.
- /// \return Status
- Status Consume(const uint8_t* data, int64_t size);
-
- /// \brief Feed data to the decoder as a Buffer.
- ///
- /// If the decoder can decode one or more messages by the Buffer,
- /// the decoder calls listener->OnMessageDecoded() with a decoded
- /// message multiple times.
- ///
- /// \param[in] buffer a Buffer to be processed.
- /// \return Status
- Status Consume(std::shared_ptr<Buffer> buffer);
-
- /// \brief Return the number of bytes needed to advance the state of
- /// the decoder.
- ///
- /// This method is provided for users who want to optimize performance.
- /// Normal users don't need to use this method.
- ///
- /// Here is an example usage for normal users:
- ///
- /// ~~~{.cpp}
- /// decoder.Consume(buffer1);
- /// decoder.Consume(buffer2);
- /// decoder.Consume(buffer3);
- /// ~~~
- ///
- /// Decoder has internal buffer. If consumed data isn't enough to
- /// advance the state of the decoder, consumed data is buffered to
- /// the internal buffer. It causes performance overhead.
- ///
- /// If you pass next_required_size() size data to each Consume()
- /// call, the decoder doesn't use its internal buffer. It improves
- /// performance.
- ///
- /// Here is an example usage to avoid using internal buffer:
- ///
- /// ~~~{.cpp}
- /// buffer1 = get_data(decoder.next_required_size());
- /// decoder.Consume(buffer1);
- /// buffer2 = get_data(decoder.next_required_size());
- /// decoder.Consume(buffer2);
- /// ~~~
- ///
- /// Users can use this method to avoid creating small
- /// chunks. Message body must be contiguous data. If users pass
- /// small chunks to the decoder, the decoder needs concatenate small
- /// chunks internally. It causes performance overhead.
- ///
- /// Here is an example usage to reduce small chunks:
- ///
- /// ~~~{.cpp}
- /// buffer = AllocateResizableBuffer();
- /// while ((small_chunk = get_data(&small_chunk_size))) {
- /// auto current_buffer_size = buffer->size();
- /// buffer->Resize(current_buffer_size + small_chunk_size);
- /// memcpy(buffer->mutable_data() + current_buffer_size,
- /// small_chunk,
- /// small_chunk_size);
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// C++ object model and user API for interprocess schema messaging
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/io/type_fwd.h"
+#include "arrow/ipc/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace ipc {
+
+struct IpcWriteOptions;
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+/// \class Message
+/// \brief An IPC message including metadata and body
+class ARROW_EXPORT Message {
+ public:
+ /// \brief Construct message, but do not validate
+ ///
+ /// Use at your own risk; Message::Open has more metadata validation
+ Message(std::shared_ptr<Buffer> metadata, std::shared_ptr<Buffer> body);
+
+ ~Message();
+
+ /// \brief Create and validate a Message instance from two buffers
+ ///
+ /// \param[in] metadata a buffer containing the Flatbuffer metadata
+ /// \param[in] body a buffer containing the message body, which may be null
+ /// \return the created message
+ static Result<std::unique_ptr<Message>> Open(std::shared_ptr<Buffer> metadata,
+ std::shared_ptr<Buffer> body);
+
+ /// \brief Read message body and create Message given Flatbuffer metadata
+ /// \param[in] metadata containing a serialized Message flatbuffer
+ /// \param[in] stream an InputStream
+ /// \return the created Message
+ ///
+ /// \note If stream supports zero-copy, this is zero-copy
+ static Result<std::unique_ptr<Message>> ReadFrom(std::shared_ptr<Buffer> metadata,
+ io::InputStream* stream);
+
+ /// \brief Read message body from position in file, and create Message given
+ /// the Flatbuffer metadata
+ /// \param[in] offset the position in the file where the message body starts.
+ /// \param[in] metadata containing a serialized Message flatbuffer
+ /// \param[in] file the seekable file interface to read from
+ /// \return the created Message
+ ///
+ /// \note If file supports zero-copy, this is zero-copy
+ static Result<std::unique_ptr<Message>> ReadFrom(const int64_t offset,
+ std::shared_ptr<Buffer> metadata,
+ io::RandomAccessFile* file);
+
+ /// \brief Return true if message type and contents are equal
+ ///
+ /// \param other another message
+ /// \return true if contents equal
+ bool Equals(const Message& other) const;
+
+ /// \brief the Message metadata
+ ///
+ /// \return buffer
+ std::shared_ptr<Buffer> metadata() const;
+
+ /// \brief Custom metadata serialized in metadata Flatbuffer. Returns nullptr
+ /// when none set
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata() const;
+
+ /// \brief the Message body, if any
+ ///
+ /// \return buffer is null if no body
+ std::shared_ptr<Buffer> body() const;
+
+ /// \brief The expected body length according to the metadata, for
+ /// verification purposes
+ int64_t body_length() const;
+
+ /// \brief The Message type
+ MessageType type() const;
+
+ /// \brief The Message metadata version
+ MetadataVersion metadata_version() const;
+
+ const void* header() const;
+
+ /// \brief Write length-prefixed metadata and body to output stream
+ ///
+ /// \param[in] file output stream to write to
+ /// \param[in] options IPC writing options including alignment
+ /// \param[out] output_length the number of bytes written
+ /// \return Status
+ Status SerializeTo(io::OutputStream* file, const IpcWriteOptions& options,
+ int64_t* output_length) const;
+
+ /// \brief Return true if the Message metadata passes Flatbuffer validation
+ bool Verify() const;
+
+ /// \brief Whether a given message type needs a body.
+ static bool HasBody(MessageType type) {
+ return type != MessageType::NONE && type != MessageType::SCHEMA;
+ }
+
+ private:
+ // Hide serialization details from user API
+ class MessageImpl;
+ std::unique_ptr<MessageImpl> impl_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Message);
+};
+
+ARROW_EXPORT std::string FormatMessageType(MessageType type);
+
+/// \class MessageDecoderListener
+/// \brief An abstract class to listen events from MessageDecoder.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \since 0.17.0
+class ARROW_EXPORT MessageDecoderListener {
+ public:
+ virtual ~MessageDecoderListener() = default;
+
+ /// \brief Called when a message is decoded.
+ ///
+ /// MessageDecoder calls this method when it decodes a message. This
+ /// method is called multiple times when the target stream has
+ /// multiple messages.
+ ///
+ /// \param[in] message a decoded message
+ /// \return Status
+ virtual Status OnMessageDecoded(std::unique_ptr<Message> message) = 0;
+
+ /// \brief Called when the decoder state is changed to
+ /// MessageDecoder::State::INITIAL.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ virtual Status OnInitial();
+
+ /// \brief Called when the decoder state is changed to
+ /// MessageDecoder::State::METADATA_LENGTH.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ virtual Status OnMetadataLength();
+
+ /// \brief Called when the decoder state is changed to
+ /// MessageDecoder::State::METADATA.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ virtual Status OnMetadata();
+
+ /// \brief Called when the decoder state is changed to
+ /// MessageDecoder::State::BODY.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ virtual Status OnBody();
+
+ /// \brief Called when the decoder state is changed to
+ /// MessageDecoder::State::EOS.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ virtual Status OnEOS();
+};
+
+/// \class AssignMessageDecoderListener
+/// \brief Assign a message decoded by MessageDecoder.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \since 0.17.0
+class ARROW_EXPORT AssignMessageDecoderListener : public MessageDecoderListener {
+ public:
+ /// \brief Construct a listener that assigns a decoded message to the
+ /// specified location.
+ ///
+ /// \param[in] message a location to store the received message
+ explicit AssignMessageDecoderListener(std::unique_ptr<Message>* message)
+ : message_(message) {}
+
+ virtual ~AssignMessageDecoderListener() = default;
+
+ Status OnMessageDecoded(std::unique_ptr<Message> message) override {
+ *message_ = std::move(message);
+ return Status::OK();
+ }
+
+ private:
+ std::unique_ptr<Message>* message_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(AssignMessageDecoderListener);
+};
+
+/// \class MessageDecoder
+/// \brief Push style message decoder that receives data from user.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \since 0.17.0
+class ARROW_EXPORT MessageDecoder {
+ public:
+ /// \brief State for reading a message
+ enum State {
+ /// The initial state. It requires one of the followings as the next data:
+ ///
+ /// * int32_t continuation token
+ /// * int32_t end-of-stream mark (== 0)
+ /// * int32_t metadata length (backward compatibility for
+ /// reading old IPC messages produced prior to version 0.15.0
+ INITIAL,
+
+ /// It requires int32_t metadata length.
+ METADATA_LENGTH,
+
+ /// It requires metadata.
+ METADATA,
+
+ /// It requires message body.
+ BODY,
+
+ /// The end-of-stream state. No more data is processed.
+ EOS,
+ };
+
+ /// \brief Construct a message decoder.
+ ///
+ /// \param[in] listener a MessageDecoderListener that responds events from
+ /// the decoder
+ /// \param[in] pool an optional MemoryPool to copy metadata on the
+ /// CPU, if required
+ explicit MessageDecoder(std::shared_ptr<MessageDecoderListener> listener,
+ MemoryPool* pool = default_memory_pool());
+
+ /// \brief Construct a message decoder with the specified state.
+ ///
+ /// This is a construct for advanced users that know how to decode
+ /// Message.
+ ///
+ /// \param[in] listener a MessageDecoderListener that responds events from
+ /// the decoder
+ /// \param[in] initial_state an initial state of the decode
+ /// \param[in] initial_next_required_size the number of bytes needed
+ /// to run the next action
+ /// \param[in] pool an optional MemoryPool to copy metadata on the
+ /// CPU, if required
+ MessageDecoder(std::shared_ptr<MessageDecoderListener> listener, State initial_state,
+ int64_t initial_next_required_size,
+ MemoryPool* pool = default_memory_pool());
+
+ virtual ~MessageDecoder();
+
+ /// \brief Feed data to the decoder as a raw data.
+ ///
+ /// If the decoder can decode one or more messages by the data, the
+ /// decoder calls listener->OnMessageDecoded() with a decoded
+ /// message multiple times.
+ ///
+ /// If the state of the decoder is changed, corresponding callbacks
+ /// on listener is called:
+ ///
+ /// * MessageDecoder::State::INITIAL: listener->OnInitial()
+ /// * MessageDecoder::State::METADATA_LENGTH: listener->OnMetadataLength()
+ /// * MessageDecoder::State::METADATA: listener->OnMetadata()
+ /// * MessageDecoder::State::BODY: listener->OnBody()
+ /// * MessageDecoder::State::EOS: listener->OnEOS()
+ ///
+ /// \param[in] data a raw data to be processed. This data isn't
+ /// copied. The passed memory must be kept alive through message
+ /// processing.
+ /// \param[in] size raw data size.
+ /// \return Status
+ Status Consume(const uint8_t* data, int64_t size);
+
+ /// \brief Feed data to the decoder as a Buffer.
+ ///
+ /// If the decoder can decode one or more messages by the Buffer,
+ /// the decoder calls listener->OnMessageDecoded() with a decoded
+ /// message multiple times.
+ ///
+ /// \param[in] buffer a Buffer to be processed.
+ /// \return Status
+ Status Consume(std::shared_ptr<Buffer> buffer);
+
+ /// \brief Return the number of bytes needed to advance the state of
+ /// the decoder.
+ ///
+ /// This method is provided for users who want to optimize performance.
+ /// Normal users don't need to use this method.
+ ///
+ /// Here is an example usage for normal users:
+ ///
+ /// ~~~{.cpp}
+ /// decoder.Consume(buffer1);
+ /// decoder.Consume(buffer2);
+ /// decoder.Consume(buffer3);
+ /// ~~~
+ ///
+ /// Decoder has internal buffer. If consumed data isn't enough to
+ /// advance the state of the decoder, consumed data is buffered to
+ /// the internal buffer. It causes performance overhead.
+ ///
+ /// If you pass next_required_size() size data to each Consume()
+ /// call, the decoder doesn't use its internal buffer. It improves
+ /// performance.
+ ///
+ /// Here is an example usage to avoid using internal buffer:
+ ///
+ /// ~~~{.cpp}
+ /// buffer1 = get_data(decoder.next_required_size());
+ /// decoder.Consume(buffer1);
+ /// buffer2 = get_data(decoder.next_required_size());
+ /// decoder.Consume(buffer2);
+ /// ~~~
+ ///
+ /// Users can use this method to avoid creating small
+ /// chunks. Message body must be contiguous data. If users pass
+ /// small chunks to the decoder, the decoder needs concatenate small
+ /// chunks internally. It causes performance overhead.
+ ///
+ /// Here is an example usage to reduce small chunks:
+ ///
+ /// ~~~{.cpp}
+ /// buffer = AllocateResizableBuffer();
+ /// while ((small_chunk = get_data(&small_chunk_size))) {
+ /// auto current_buffer_size = buffer->size();
+ /// buffer->Resize(current_buffer_size + small_chunk_size);
+ /// memcpy(buffer->mutable_data() + current_buffer_size,
+ /// small_chunk,
+ /// small_chunk_size);
/// if (buffer->size() < decoder.next_required_size()) {
- /// continue;
- /// }
- /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
- /// decoder.Consume(chunk);
- /// buffer = AllocateResizableBuffer();
- /// }
- /// if (buffer->size() > 0) {
- /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
- /// decoder.Consume(chunk);
- /// }
- /// ~~~
- ///
- /// \return the number of bytes needed to advance the state of the
- /// decoder
- int64_t next_required_size() const;
-
- /// \brief Return the current state of the decoder.
- ///
- /// This method is provided for users who want to optimize performance.
- /// Normal users don't need to use this method.
- ///
- /// Decoder doesn't need Buffer to process data on the
- /// MessageDecoder::State::INITIAL state and the
- /// MessageDecoder::State::METADATA_LENGTH. Creating Buffer has
- /// performance overhead. Advanced users can avoid creating Buffer
- /// by checking the current state of the decoder:
- ///
- /// ~~~{.cpp}
- /// switch (decoder.state()) {
- /// MessageDecoder::State::INITIAL:
- /// MessageDecoder::State::METADATA_LENGTH:
- /// {
- /// uint8_t data[sizeof(int32_t)];
- /// auto data_size = input->Read(decoder.next_required_size(), data);
- /// decoder.Consume(data, data_size);
- /// }
- /// break;
- /// default:
- /// {
- /// auto buffer = input->Read(decoder.next_required_size());
- /// decoder.Consume(buffer);
- /// }
- /// break;
- /// }
- /// ~~~
- ///
- /// \return the current state
- State state() const;
-
- private:
- class MessageDecoderImpl;
- std::unique_ptr<MessageDecoderImpl> impl_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(MessageDecoder);
-};
-
-/// \brief Abstract interface for a sequence of messages
-/// \since 0.5.0
-class ARROW_EXPORT MessageReader {
- public:
- virtual ~MessageReader() = default;
-
- /// \brief Create MessageReader that reads from InputStream
- static std::unique_ptr<MessageReader> Open(io::InputStream* stream);
-
- /// \brief Create MessageReader that reads from owned InputStream
- static std::unique_ptr<MessageReader> Open(
- const std::shared_ptr<io::InputStream>& owned_stream);
-
- /// \brief Read next Message from the interface
- ///
- /// \return an arrow::ipc::Message instance
- virtual Result<std::unique_ptr<Message>> ReadNextMessage() = 0;
-};
-
-/// \brief Read encapsulated RPC message from position in file
-///
-/// Read a length-prefixed message flatbuffer starting at the indicated file
-/// offset. If the message has a body with non-zero length, it will also be
-/// read
-///
-/// The metadata_length includes at least the length prefix and the flatbuffer
-///
-/// \param[in] offset the position in the file where the message starts. The
-/// first 4 bytes after the offset are the message length
-/// \param[in] metadata_length the total number of bytes to read from file
-/// \param[in] file the seekable file interface to read from
-/// \return the message read
-ARROW_EXPORT
-Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
- const int32_t metadata_length,
- io::RandomAccessFile* file);
-
+ /// continue;
+ /// }
+ /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
+ /// decoder.Consume(chunk);
+ /// buffer = AllocateResizableBuffer();
+ /// }
+ /// if (buffer->size() > 0) {
+ /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
+ /// decoder.Consume(chunk);
+ /// }
+ /// ~~~
+ ///
+ /// \return the number of bytes needed to advance the state of the
+ /// decoder
+ int64_t next_required_size() const;
+
+ /// \brief Return the current state of the decoder.
+ ///
+ /// This method is provided for users who want to optimize performance.
+ /// Normal users don't need to use this method.
+ ///
+ /// Decoder doesn't need Buffer to process data on the
+ /// MessageDecoder::State::INITIAL state and the
+ /// MessageDecoder::State::METADATA_LENGTH. Creating Buffer has
+ /// performance overhead. Advanced users can avoid creating Buffer
+ /// by checking the current state of the decoder:
+ ///
+ /// ~~~{.cpp}
+ /// switch (decoder.state()) {
+ /// MessageDecoder::State::INITIAL:
+ /// MessageDecoder::State::METADATA_LENGTH:
+ /// {
+ /// uint8_t data[sizeof(int32_t)];
+ /// auto data_size = input->Read(decoder.next_required_size(), data);
+ /// decoder.Consume(data, data_size);
+ /// }
+ /// break;
+ /// default:
+ /// {
+ /// auto buffer = input->Read(decoder.next_required_size());
+ /// decoder.Consume(buffer);
+ /// }
+ /// break;
+ /// }
+ /// ~~~
+ ///
+ /// \return the current state
+ State state() const;
+
+ private:
+ class MessageDecoderImpl;
+ std::unique_ptr<MessageDecoderImpl> impl_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(MessageDecoder);
+};
+
+/// \brief Abstract interface for a sequence of messages
+/// \since 0.5.0
+class ARROW_EXPORT MessageReader {
+ public:
+ virtual ~MessageReader() = default;
+
+ /// \brief Create MessageReader that reads from InputStream
+ static std::unique_ptr<MessageReader> Open(io::InputStream* stream);
+
+ /// \brief Create MessageReader that reads from owned InputStream
+ static std::unique_ptr<MessageReader> Open(
+ const std::shared_ptr<io::InputStream>& owned_stream);
+
+ /// \brief Read next Message from the interface
+ ///
+ /// \return an arrow::ipc::Message instance
+ virtual Result<std::unique_ptr<Message>> ReadNextMessage() = 0;
+};
+
+/// \brief Read encapsulated RPC message from position in file
+///
+/// Read a length-prefixed message flatbuffer starting at the indicated file
+/// offset. If the message has a body with non-zero length, it will also be
+/// read
+///
+/// The metadata_length includes at least the length prefix and the flatbuffer
+///
+/// \param[in] offset the position in the file where the message starts. The
+/// first 4 bytes after the offset are the message length
+/// \param[in] metadata_length the total number of bytes to read from file
+/// \param[in] file the seekable file interface to read from
+/// \return the message read
+ARROW_EXPORT
+Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
+ const int32_t metadata_length,
+ io::RandomAccessFile* file);
+
ARROW_EXPORT
Future<std::shared_ptr<Message>> ReadMessageAsync(
const int64_t offset, const int32_t metadata_length, const int64_t body_length,
io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context());
-/// \brief Advance stream to an 8-byte offset if its position is not a multiple
-/// of 8 already
-/// \param[in] stream an input stream
-/// \param[in] alignment the byte multiple for the metadata prefix, usually 8
-/// or 64, to ensure the body starts on a multiple of that alignment
-/// \return Status
-ARROW_EXPORT
-Status AlignStream(io::InputStream* stream, int32_t alignment = 8);
-
-/// \brief Advance stream to an 8-byte offset if its position is not a multiple
-/// of 8 already
-/// \param[in] stream an output stream
-/// \param[in] alignment the byte multiple for the metadata prefix, usually 8
-/// or 64, to ensure the body starts on a multiple of that alignment
-/// \return Status
-ARROW_EXPORT
-Status AlignStream(io::OutputStream* stream, int32_t alignment = 8);
-
-/// \brief Return error Status if file position is not a multiple of the
-/// indicated alignment
-ARROW_EXPORT
-Status CheckAligned(io::FileInterface* stream, int32_t alignment = 8);
-
-/// \brief Read encapsulated IPC message (metadata and body) from InputStream
-///
-/// Returns null if there are not enough bytes available or the
-/// message length is 0 (e.g. EOS in a stream)
-///
-/// \param[in] stream an input stream
-/// \param[in] pool an optional MemoryPool to copy metadata on the CPU, if required
-/// \return Message
-ARROW_EXPORT
-Result<std::unique_ptr<Message>> ReadMessage(io::InputStream* stream,
- MemoryPool* pool = default_memory_pool());
-
-/// \brief Feed data from InputStream to MessageDecoder to decode an
-/// encapsulated IPC message (metadata and body)
-///
-/// This API is EXPERIMENTAL.
-///
-/// \param[in] decoder a decoder
-/// \param[in] stream an input stream
-/// \return Status
-///
-/// \since 0.17.0
-ARROW_EXPORT
-Status DecodeMessage(MessageDecoder* decoder, io::InputStream* stream);
-
-/// Write encapsulated IPC message Does not make assumptions about
-/// whether the stream is aligned already. Can write legacy (pre
-/// version 0.15.0) IPC message if option set
-///
-/// continuation: 0xFFFFFFFF
-/// message_size: int32
-/// message: const void*
-/// padding
-///
-///
-/// \param[in] message a buffer containing the metadata to write
-/// \param[in] options IPC writing options, including alignment and
-/// legacy message support
-/// \param[in,out] file the OutputStream to write to
-/// \param[out] message_length the total size of the payload written including
-/// padding
-/// \return Status
-Status WriteMessage(const Buffer& message, const IpcWriteOptions& options,
- io::OutputStream* file, int32_t* message_length);
-
-} // namespace ipc
-} // namespace arrow
+/// \brief Advance stream to an 8-byte offset if its position is not a multiple
+/// of 8 already
+/// \param[in] stream an input stream
+/// \param[in] alignment the byte multiple for the metadata prefix, usually 8
+/// or 64, to ensure the body starts on a multiple of that alignment
+/// \return Status
+ARROW_EXPORT
+Status AlignStream(io::InputStream* stream, int32_t alignment = 8);
+
+/// \brief Advance stream to an 8-byte offset if its position is not a multiple
+/// of 8 already
+/// \param[in] stream an output stream
+/// \param[in] alignment the byte multiple for the metadata prefix, usually 8
+/// or 64, to ensure the body starts on a multiple of that alignment
+/// \return Status
+ARROW_EXPORT
+Status AlignStream(io::OutputStream* stream, int32_t alignment = 8);
+
+/// \brief Return error Status if file position is not a multiple of the
+/// indicated alignment
+ARROW_EXPORT
+Status CheckAligned(io::FileInterface* stream, int32_t alignment = 8);
+
+/// \brief Read encapsulated IPC message (metadata and body) from InputStream
+///
+/// Returns null if there are not enough bytes available or the
+/// message length is 0 (e.g. EOS in a stream)
+///
+/// \param[in] stream an input stream
+/// \param[in] pool an optional MemoryPool to copy metadata on the CPU, if required
+/// \return Message
+ARROW_EXPORT
+Result<std::unique_ptr<Message>> ReadMessage(io::InputStream* stream,
+ MemoryPool* pool = default_memory_pool());
+
+/// \brief Feed data from InputStream to MessageDecoder to decode an
+/// encapsulated IPC message (metadata and body)
+///
+/// This API is EXPERIMENTAL.
+///
+/// \param[in] decoder a decoder
+/// \param[in] stream an input stream
+/// \return Status
+///
+/// \since 0.17.0
+ARROW_EXPORT
+Status DecodeMessage(MessageDecoder* decoder, io::InputStream* stream);
+
+/// Write encapsulated IPC message Does not make assumptions about
+/// whether the stream is aligned already. Can write legacy (pre
+/// version 0.15.0) IPC message if option set
+///
+/// continuation: 0xFFFFFFFF
+/// message_size: int32
+/// message: const void*
+/// padding
+///
+///
+/// \param[in] message a buffer containing the metadata to write
+/// \param[in] options IPC writing options, including alignment and
+/// legacy message support
+/// \param[in,out] file the OutputStream to write to
+/// \param[out] message_length the total size of the payload written including
+/// padding
+/// \return Status
+Status WriteMessage(const Buffer& message, const IpcWriteOptions& options,
+ io::OutputStream* file, int32_t* message_length);
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
index 4b332bd9e1e..2e80d72660c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
@@ -1,608 +1,608 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/metadata_internal.h"
-
-#include <cstdint>
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-
-#include <flatbuffers/flatbuffers.h>
-
-#include "arrow/extension_type.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/ipc/dictionary.h"
-#include "arrow/ipc/message.h"
-#include "arrow/ipc/options.h"
-#include "arrow/ipc/util.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/ubsan.h"
-#include "arrow/visitor_inline.h"
-
-#include "generated/File_generated.h"
-#include "generated/Message_generated.h"
-#include "generated/Schema_generated.h"
-#include "generated/SparseTensor_generated.h"
-#include "generated/Tensor_generated.h"
-
-namespace arrow {
-
-namespace flatbuf = org::apache::arrow::flatbuf;
-using internal::checked_cast;
-using internal::GetByteWidth;
-
-namespace ipc {
-namespace internal {
-
-using FBB = flatbuffers::FlatBufferBuilder;
-using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
-using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
-using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
-using SparseTensorOffset = flatbuffers::Offset<flatbuf::SparseTensor>;
-using Offset = flatbuffers::Offset<void>;
-using FBString = flatbuffers::Offset<flatbuffers::String>;
-
-MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version) {
- switch (version) {
- case flatbuf::MetadataVersion::V1:
- // Arrow 0.1
- return MetadataVersion::V1;
- case flatbuf::MetadataVersion::V2:
- // Arrow 0.2
- return MetadataVersion::V2;
- case flatbuf::MetadataVersion::V3:
- // Arrow 0.3 to 0.7.1
- return MetadataVersion::V4;
- case flatbuf::MetadataVersion::V4:
- // Arrow 0.8 to 0.17
- return MetadataVersion::V4;
- case flatbuf::MetadataVersion::V5:
- // Arrow >= 1.0
- return MetadataVersion::V5;
- // Add cases as other versions become available
- default:
- return MetadataVersion::V5;
- }
-}
-
-flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version) {
- switch (version) {
- case MetadataVersion::V1:
- return flatbuf::MetadataVersion::V1;
- case MetadataVersion::V2:
- return flatbuf::MetadataVersion::V2;
- case MetadataVersion::V3:
- return flatbuf::MetadataVersion::V3;
- case MetadataVersion::V4:
- return flatbuf::MetadataVersion::V4;
- case MetadataVersion::V5:
- return flatbuf::MetadataVersion::V5;
- // Add cases as other versions become available
- default:
- return flatbuf::MetadataVersion::V5;
- }
-}
-
-bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
- // In V4, null types have no validity bitmap
- // In V5 and later, null and union types have no validity bitmap
- return (version < MetadataVersion::V5) ? (type_id != Type::NA)
- : ::arrow::internal::HasValidityBitmap(type_id);
-}
-
-namespace {
-
-Status IntFromFlatbuffer(const flatbuf::Int* int_data, std::shared_ptr<DataType>* out) {
- if (int_data->bitWidth() > 64) {
- return Status::NotImplemented("Integers with more than 64 bits not implemented");
- }
- if (int_data->bitWidth() < 8) {
- return Status::NotImplemented("Integers with less than 8 bits not implemented");
- }
-
- switch (int_data->bitWidth()) {
- case 8:
- *out = int_data->is_signed() ? int8() : uint8();
- break;
- case 16:
- *out = int_data->is_signed() ? int16() : uint16();
- break;
- case 32:
- *out = int_data->is_signed() ? int32() : uint32();
- break;
- case 64:
- *out = int_data->is_signed() ? int64() : uint64();
- break;
- default:
- return Status::NotImplemented("Integers not in cstdint are not implemented");
- }
- return Status::OK();
-}
-
-Status FloatFromFlatbuffer(const flatbuf::FloatingPoint* float_data,
- std::shared_ptr<DataType>* out) {
- if (float_data->precision() == flatbuf::Precision::HALF) {
- *out = float16();
- } else if (float_data->precision() == flatbuf::Precision::SINGLE) {
- *out = float32();
- } else {
- *out = float64();
- }
- return Status::OK();
-}
-
-Offset IntToFlatbuffer(FBB& fbb, int bitWidth, bool is_signed) {
- return flatbuf::CreateInt(fbb, bitWidth, is_signed).Union();
-}
-
-Offset FloatToFlatbuffer(FBB& fbb, flatbuf::Precision precision) {
- return flatbuf::CreateFloatingPoint(fbb, precision).Union();
-}
-
-// ----------------------------------------------------------------------
-// Union implementation
-
-Status UnionFromFlatbuffer(const flatbuf::Union* union_data,
- const std::vector<std::shared_ptr<Field>>& children,
- std::shared_ptr<DataType>* out) {
- UnionMode::type mode =
- (union_data->mode() == flatbuf::UnionMode::Sparse ? UnionMode::SPARSE
- : UnionMode::DENSE);
-
- std::vector<int8_t> type_codes;
-
- const flatbuffers::Vector<int32_t>* fb_type_ids = union_data->typeIds();
- if (fb_type_ids == nullptr) {
- for (int8_t i = 0; i < static_cast<int8_t>(children.size()); ++i) {
- type_codes.push_back(i);
- }
- } else {
- for (int32_t id : (*fb_type_ids)) {
- const auto type_code = static_cast<int8_t>(id);
- if (id != type_code) {
- return Status::Invalid("union type id out of bounds");
- }
- type_codes.push_back(type_code);
- }
- }
-
- if (mode == UnionMode::SPARSE) {
- ARROW_ASSIGN_OR_RAISE(
- *out, SparseUnionType::Make(std::move(children), std::move(type_codes)));
- } else {
- ARROW_ASSIGN_OR_RAISE(
- *out, DenseUnionType::Make(std::move(children), std::move(type_codes)));
- }
- return Status::OK();
-}
-
-#define INT_TO_FB_CASE(BIT_WIDTH, IS_SIGNED) \
- *out_type = flatbuf::Type::Int; \
- *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
- break;
-
-static inline flatbuf::TimeUnit ToFlatbufferUnit(TimeUnit::type unit) {
- switch (unit) {
- case TimeUnit::SECOND:
- return flatbuf::TimeUnit::SECOND;
- case TimeUnit::MILLI:
- return flatbuf::TimeUnit::MILLISECOND;
- case TimeUnit::MICRO:
- return flatbuf::TimeUnit::MICROSECOND;
- case TimeUnit::NANO:
- return flatbuf::TimeUnit::NANOSECOND;
- default:
- break;
- }
- return flatbuf::TimeUnit::MIN;
-}
-
-static inline TimeUnit::type FromFlatbufferUnit(flatbuf::TimeUnit unit) {
- switch (unit) {
- case flatbuf::TimeUnit::SECOND:
- return TimeUnit::SECOND;
- case flatbuf::TimeUnit::MILLISECOND:
- return TimeUnit::MILLI;
- case flatbuf::TimeUnit::MICROSECOND:
- return TimeUnit::MICRO;
- case flatbuf::TimeUnit::NANOSECOND:
- return TimeUnit::NANO;
- default:
- break;
- }
- // cannot reach
- return TimeUnit::SECOND;
-}
-
-Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
- const std::vector<std::shared_ptr<Field>>& children,
- std::shared_ptr<DataType>* out) {
- switch (type) {
- case flatbuf::Type::NONE:
- return Status::Invalid("Type metadata cannot be none");
- case flatbuf::Type::Null:
- *out = null();
- return Status::OK();
- case flatbuf::Type::Int:
- return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
- case flatbuf::Type::FloatingPoint:
- return FloatFromFlatbuffer(static_cast<const flatbuf::FloatingPoint*>(type_data),
- out);
- case flatbuf::Type::Binary:
- *out = binary();
- return Status::OK();
- case flatbuf::Type::LargeBinary:
- *out = large_binary();
- return Status::OK();
- case flatbuf::Type::FixedSizeBinary: {
- auto fw_binary = static_cast<const flatbuf::FixedSizeBinary*>(type_data);
- return FixedSizeBinaryType::Make(fw_binary->byteWidth()).Value(out);
- }
- case flatbuf::Type::Utf8:
- *out = utf8();
- return Status::OK();
- case flatbuf::Type::LargeUtf8:
- *out = large_utf8();
- return Status::OK();
- case flatbuf::Type::Bool:
- *out = boolean();
- return Status::OK();
- case flatbuf::Type::Decimal: {
- auto dec_type = static_cast<const flatbuf::Decimal*>(type_data);
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata_internal.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <unordered_map>
+#include <utility>
+
+#include <flatbuffers/flatbuffers.h>
+
+#include "arrow/extension_type.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/ipc/dictionary.h"
+#include "arrow/ipc/message.h"
+#include "arrow/ipc/options.h"
+#include "arrow/ipc/util.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visitor_inline.h"
+
+#include "generated/File_generated.h"
+#include "generated/Message_generated.h"
+#include "generated/Schema_generated.h"
+#include "generated/SparseTensor_generated.h"
+#include "generated/Tensor_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = org::apache::arrow::flatbuf;
+using internal::checked_cast;
+using internal::GetByteWidth;
+
+namespace ipc {
+namespace internal {
+
+using FBB = flatbuffers::FlatBufferBuilder;
+using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
+using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
+using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
+using SparseTensorOffset = flatbuffers::Offset<flatbuf::SparseTensor>;
+using Offset = flatbuffers::Offset<void>;
+using FBString = flatbuffers::Offset<flatbuffers::String>;
+
+MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version) {
+ switch (version) {
+ case flatbuf::MetadataVersion::V1:
+ // Arrow 0.1
+ return MetadataVersion::V1;
+ case flatbuf::MetadataVersion::V2:
+ // Arrow 0.2
+ return MetadataVersion::V2;
+ case flatbuf::MetadataVersion::V3:
+ // Arrow 0.3 to 0.7.1
+ return MetadataVersion::V4;
+ case flatbuf::MetadataVersion::V4:
+ // Arrow 0.8 to 0.17
+ return MetadataVersion::V4;
+ case flatbuf::MetadataVersion::V5:
+ // Arrow >= 1.0
+ return MetadataVersion::V5;
+ // Add cases as other versions become available
+ default:
+ return MetadataVersion::V5;
+ }
+}
+
+flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version) {
+ switch (version) {
+ case MetadataVersion::V1:
+ return flatbuf::MetadataVersion::V1;
+ case MetadataVersion::V2:
+ return flatbuf::MetadataVersion::V2;
+ case MetadataVersion::V3:
+ return flatbuf::MetadataVersion::V3;
+ case MetadataVersion::V4:
+ return flatbuf::MetadataVersion::V4;
+ case MetadataVersion::V5:
+ return flatbuf::MetadataVersion::V5;
+ // Add cases as other versions become available
+ default:
+ return flatbuf::MetadataVersion::V5;
+ }
+}
+
+bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
+ // In V4, null types have no validity bitmap
+ // In V5 and later, null and union types have no validity bitmap
+ return (version < MetadataVersion::V5) ? (type_id != Type::NA)
+ : ::arrow::internal::HasValidityBitmap(type_id);
+}
+
+namespace {
+
+Status IntFromFlatbuffer(const flatbuf::Int* int_data, std::shared_ptr<DataType>* out) {
+ if (int_data->bitWidth() > 64) {
+ return Status::NotImplemented("Integers with more than 64 bits not implemented");
+ }
+ if (int_data->bitWidth() < 8) {
+ return Status::NotImplemented("Integers with less than 8 bits not implemented");
+ }
+
+ switch (int_data->bitWidth()) {
+ case 8:
+ *out = int_data->is_signed() ? int8() : uint8();
+ break;
+ case 16:
+ *out = int_data->is_signed() ? int16() : uint16();
+ break;
+ case 32:
+ *out = int_data->is_signed() ? int32() : uint32();
+ break;
+ case 64:
+ *out = int_data->is_signed() ? int64() : uint64();
+ break;
+ default:
+ return Status::NotImplemented("Integers not in cstdint are not implemented");
+ }
+ return Status::OK();
+}
+
+Status FloatFromFlatbuffer(const flatbuf::FloatingPoint* float_data,
+ std::shared_ptr<DataType>* out) {
+ if (float_data->precision() == flatbuf::Precision::HALF) {
+ *out = float16();
+ } else if (float_data->precision() == flatbuf::Precision::SINGLE) {
+ *out = float32();
+ } else {
+ *out = float64();
+ }
+ return Status::OK();
+}
+
+Offset IntToFlatbuffer(FBB& fbb, int bitWidth, bool is_signed) {
+ return flatbuf::CreateInt(fbb, bitWidth, is_signed).Union();
+}
+
+Offset FloatToFlatbuffer(FBB& fbb, flatbuf::Precision precision) {
+ return flatbuf::CreateFloatingPoint(fbb, precision).Union();
+}
+
+// ----------------------------------------------------------------------
+// Union implementation
+
+Status UnionFromFlatbuffer(const flatbuf::Union* union_data,
+ const std::vector<std::shared_ptr<Field>>& children,
+ std::shared_ptr<DataType>* out) {
+ UnionMode::type mode =
+ (union_data->mode() == flatbuf::UnionMode::Sparse ? UnionMode::SPARSE
+ : UnionMode::DENSE);
+
+ std::vector<int8_t> type_codes;
+
+ const flatbuffers::Vector<int32_t>* fb_type_ids = union_data->typeIds();
+ if (fb_type_ids == nullptr) {
+ for (int8_t i = 0; i < static_cast<int8_t>(children.size()); ++i) {
+ type_codes.push_back(i);
+ }
+ } else {
+ for (int32_t id : (*fb_type_ids)) {
+ const auto type_code = static_cast<int8_t>(id);
+ if (id != type_code) {
+ return Status::Invalid("union type id out of bounds");
+ }
+ type_codes.push_back(type_code);
+ }
+ }
+
+ if (mode == UnionMode::SPARSE) {
+ ARROW_ASSIGN_OR_RAISE(
+ *out, SparseUnionType::Make(std::move(children), std::move(type_codes)));
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ *out, DenseUnionType::Make(std::move(children), std::move(type_codes)));
+ }
+ return Status::OK();
+}
+
+#define INT_TO_FB_CASE(BIT_WIDTH, IS_SIGNED) \
+ *out_type = flatbuf::Type::Int; \
+ *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
+ break;
+
+static inline flatbuf::TimeUnit ToFlatbufferUnit(TimeUnit::type unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ return flatbuf::TimeUnit::SECOND;
+ case TimeUnit::MILLI:
+ return flatbuf::TimeUnit::MILLISECOND;
+ case TimeUnit::MICRO:
+ return flatbuf::TimeUnit::MICROSECOND;
+ case TimeUnit::NANO:
+ return flatbuf::TimeUnit::NANOSECOND;
+ default:
+ break;
+ }
+ return flatbuf::TimeUnit::MIN;
+}
+
+static inline TimeUnit::type FromFlatbufferUnit(flatbuf::TimeUnit unit) {
+ switch (unit) {
+ case flatbuf::TimeUnit::SECOND:
+ return TimeUnit::SECOND;
+ case flatbuf::TimeUnit::MILLISECOND:
+ return TimeUnit::MILLI;
+ case flatbuf::TimeUnit::MICROSECOND:
+ return TimeUnit::MICRO;
+ case flatbuf::TimeUnit::NANOSECOND:
+ return TimeUnit::NANO;
+ default:
+ break;
+ }
+ // cannot reach
+ return TimeUnit::SECOND;
+}
+
+Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
+ const std::vector<std::shared_ptr<Field>>& children,
+ std::shared_ptr<DataType>* out) {
+ switch (type) {
+ case flatbuf::Type::NONE:
+ return Status::Invalid("Type metadata cannot be none");
+ case flatbuf::Type::Null:
+ *out = null();
+ return Status::OK();
+ case flatbuf::Type::Int:
+ return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
+ case flatbuf::Type::FloatingPoint:
+ return FloatFromFlatbuffer(static_cast<const flatbuf::FloatingPoint*>(type_data),
+ out);
+ case flatbuf::Type::Binary:
+ *out = binary();
+ return Status::OK();
+ case flatbuf::Type::LargeBinary:
+ *out = large_binary();
+ return Status::OK();
+ case flatbuf::Type::FixedSizeBinary: {
+ auto fw_binary = static_cast<const flatbuf::FixedSizeBinary*>(type_data);
+ return FixedSizeBinaryType::Make(fw_binary->byteWidth()).Value(out);
+ }
+ case flatbuf::Type::Utf8:
+ *out = utf8();
+ return Status::OK();
+ case flatbuf::Type::LargeUtf8:
+ *out = large_utf8();
+ return Status::OK();
+ case flatbuf::Type::Bool:
+ *out = boolean();
+ return Status::OK();
+ case flatbuf::Type::Decimal: {
+ auto dec_type = static_cast<const flatbuf::Decimal*>(type_data);
if (dec_type->bitWidth() == 128) {
return Decimal128Type::Make(dec_type->precision(), dec_type->scale()).Value(out);
} else if (dec_type->bitWidth() == 256) {
return Decimal256Type::Make(dec_type->precision(), dec_type->scale()).Value(out);
} else {
return Status::Invalid("Library only supports 128-bit or 256-bit decimal values");
- }
- }
- case flatbuf::Type::Date: {
- auto date_type = static_cast<const flatbuf::Date*>(type_data);
- if (date_type->unit() == flatbuf::DateUnit::DAY) {
- *out = date32();
- } else {
- *out = date64();
- }
- return Status::OK();
- }
- case flatbuf::Type::Time: {
- auto time_type = static_cast<const flatbuf::Time*>(type_data);
- TimeUnit::type unit = FromFlatbufferUnit(time_type->unit());
- int32_t bit_width = time_type->bitWidth();
- switch (unit) {
- case TimeUnit::SECOND:
- case TimeUnit::MILLI:
- if (bit_width != 32) {
- return Status::Invalid("Time is 32 bits for second/milli unit");
- }
- *out = time32(unit);
- break;
- default:
- if (bit_width != 64) {
- return Status::Invalid("Time is 64 bits for micro/nano unit");
- }
- *out = time64(unit);
- break;
- }
- return Status::OK();
- }
- case flatbuf::Type::Timestamp: {
- auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data);
- TimeUnit::type unit = FromFlatbufferUnit(ts_type->unit());
- *out = timestamp(unit, StringFromFlatbuffers(ts_type->timezone()));
- return Status::OK();
- }
- case flatbuf::Type::Duration: {
- auto duration = static_cast<const flatbuf::Duration*>(type_data);
- TimeUnit::type unit = FromFlatbufferUnit(duration->unit());
- *out = arrow::duration(unit);
- return Status::OK();
- }
-
- case flatbuf::Type::Interval: {
- auto i_type = static_cast<const flatbuf::Interval*>(type_data);
- switch (i_type->unit()) {
- case flatbuf::IntervalUnit::YEAR_MONTH: {
- *out = month_interval();
- return Status::OK();
- }
- case flatbuf::IntervalUnit::DAY_TIME: {
- *out = day_time_interval();
- return Status::OK();
- }
- }
- return Status::NotImplemented("Unrecognized interval type.");
- }
-
- case flatbuf::Type::List:
- if (children.size() != 1) {
- return Status::Invalid("List must have exactly 1 child field");
- }
- *out = std::make_shared<ListType>(children[0]);
- return Status::OK();
- case flatbuf::Type::LargeList:
- if (children.size() != 1) {
- return Status::Invalid("LargeList must have exactly 1 child field");
- }
- *out = std::make_shared<LargeListType>(children[0]);
- return Status::OK();
- case flatbuf::Type::Map:
- if (children.size() != 1) {
- return Status::Invalid("Map must have exactly 1 child field");
- }
- if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT ||
- children[0]->type()->num_fields() != 2) {
- return Status::Invalid("Map's key-item pairs must be non-nullable structs");
- }
- if (children[0]->type()->field(0)->nullable()) {
- return Status::Invalid("Map's keys must be non-nullable");
- } else {
- auto map = static_cast<const flatbuf::Map*>(type_data);
- *out = std::make_shared<MapType>(children[0]->type()->field(0)->type(),
- children[0]->type()->field(1)->type(),
- map->keysSorted());
- }
- return Status::OK();
- case flatbuf::Type::FixedSizeList:
- if (children.size() != 1) {
- return Status::Invalid("FixedSizeList must have exactly 1 child field");
- } else {
- auto fs_list = static_cast<const flatbuf::FixedSizeList*>(type_data);
- *out = std::make_shared<FixedSizeListType>(children[0], fs_list->listSize());
- }
- return Status::OK();
- case flatbuf::Type::Struct_:
- *out = std::make_shared<StructType>(children);
- return Status::OK();
- case flatbuf::Type::Union:
- return UnionFromFlatbuffer(static_cast<const flatbuf::Union*>(type_data), children,
- out);
- default:
- return Status::Invalid("Unrecognized type:" +
- std::to_string(static_cast<int>(type)));
- }
-}
-
-Status TensorTypeToFlatbuffer(FBB& fbb, const DataType& type, flatbuf::Type* out_type,
- Offset* offset) {
- switch (type.id()) {
- case Type::UINT8:
- INT_TO_FB_CASE(8, false);
- case Type::INT8:
- INT_TO_FB_CASE(8, true);
- case Type::UINT16:
- INT_TO_FB_CASE(16, false);
- case Type::INT16:
- INT_TO_FB_CASE(16, true);
- case Type::UINT32:
- INT_TO_FB_CASE(32, false);
- case Type::INT32:
- INT_TO_FB_CASE(32, true);
- case Type::UINT64:
- INT_TO_FB_CASE(64, false);
- case Type::INT64:
- INT_TO_FB_CASE(64, true);
- case Type::HALF_FLOAT:
- *out_type = flatbuf::Type::FloatingPoint;
- *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::HALF);
- break;
- case Type::FLOAT:
- *out_type = flatbuf::Type::FloatingPoint;
- *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::SINGLE);
- break;
- case Type::DOUBLE:
- *out_type = flatbuf::Type::FloatingPoint;
- *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::DOUBLE);
- break;
- default:
- *out_type = flatbuf::Type::NONE; // Make clang-tidy happy
- return Status::NotImplemented("Unable to convert type: ", type.ToString());
- }
- return Status::OK();
-}
-
-static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr<Field>& field,
- const DictionaryType& type, int64_t dictionary_id,
- DictionaryOffset* out) {
- // We assume that the dictionary index type (as an integer) has already been
+ }
+ }
+ case flatbuf::Type::Date: {
+ auto date_type = static_cast<const flatbuf::Date*>(type_data);
+ if (date_type->unit() == flatbuf::DateUnit::DAY) {
+ *out = date32();
+ } else {
+ *out = date64();
+ }
+ return Status::OK();
+ }
+ case flatbuf::Type::Time: {
+ auto time_type = static_cast<const flatbuf::Time*>(type_data);
+ TimeUnit::type unit = FromFlatbufferUnit(time_type->unit());
+ int32_t bit_width = time_type->bitWidth();
+ switch (unit) {
+ case TimeUnit::SECOND:
+ case TimeUnit::MILLI:
+ if (bit_width != 32) {
+ return Status::Invalid("Time is 32 bits for second/milli unit");
+ }
+ *out = time32(unit);
+ break;
+ default:
+ if (bit_width != 64) {
+ return Status::Invalid("Time is 64 bits for micro/nano unit");
+ }
+ *out = time64(unit);
+ break;
+ }
+ return Status::OK();
+ }
+ case flatbuf::Type::Timestamp: {
+ auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data);
+ TimeUnit::type unit = FromFlatbufferUnit(ts_type->unit());
+ *out = timestamp(unit, StringFromFlatbuffers(ts_type->timezone()));
+ return Status::OK();
+ }
+ case flatbuf::Type::Duration: {
+ auto duration = static_cast<const flatbuf::Duration*>(type_data);
+ TimeUnit::type unit = FromFlatbufferUnit(duration->unit());
+ *out = arrow::duration(unit);
+ return Status::OK();
+ }
+
+ case flatbuf::Type::Interval: {
+ auto i_type = static_cast<const flatbuf::Interval*>(type_data);
+ switch (i_type->unit()) {
+ case flatbuf::IntervalUnit::YEAR_MONTH: {
+ *out = month_interval();
+ return Status::OK();
+ }
+ case flatbuf::IntervalUnit::DAY_TIME: {
+ *out = day_time_interval();
+ return Status::OK();
+ }
+ }
+ return Status::NotImplemented("Unrecognized interval type.");
+ }
+
+ case flatbuf::Type::List:
+ if (children.size() != 1) {
+ return Status::Invalid("List must have exactly 1 child field");
+ }
+ *out = std::make_shared<ListType>(children[0]);
+ return Status::OK();
+ case flatbuf::Type::LargeList:
+ if (children.size() != 1) {
+ return Status::Invalid("LargeList must have exactly 1 child field");
+ }
+ *out = std::make_shared<LargeListType>(children[0]);
+ return Status::OK();
+ case flatbuf::Type::Map:
+ if (children.size() != 1) {
+ return Status::Invalid("Map must have exactly 1 child field");
+ }
+ if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT ||
+ children[0]->type()->num_fields() != 2) {
+ return Status::Invalid("Map's key-item pairs must be non-nullable structs");
+ }
+ if (children[0]->type()->field(0)->nullable()) {
+ return Status::Invalid("Map's keys must be non-nullable");
+ } else {
+ auto map = static_cast<const flatbuf::Map*>(type_data);
+ *out = std::make_shared<MapType>(children[0]->type()->field(0)->type(),
+ children[0]->type()->field(1)->type(),
+ map->keysSorted());
+ }
+ return Status::OK();
+ case flatbuf::Type::FixedSizeList:
+ if (children.size() != 1) {
+ return Status::Invalid("FixedSizeList must have exactly 1 child field");
+ } else {
+ auto fs_list = static_cast<const flatbuf::FixedSizeList*>(type_data);
+ *out = std::make_shared<FixedSizeListType>(children[0], fs_list->listSize());
+ }
+ return Status::OK();
+ case flatbuf::Type::Struct_:
+ *out = std::make_shared<StructType>(children);
+ return Status::OK();
+ case flatbuf::Type::Union:
+ return UnionFromFlatbuffer(static_cast<const flatbuf::Union*>(type_data), children,
+ out);
+ default:
+ return Status::Invalid("Unrecognized type:" +
+ std::to_string(static_cast<int>(type)));
+ }
+}
+
+Status TensorTypeToFlatbuffer(FBB& fbb, const DataType& type, flatbuf::Type* out_type,
+ Offset* offset) {
+ switch (type.id()) {
+ case Type::UINT8:
+ INT_TO_FB_CASE(8, false);
+ case Type::INT8:
+ INT_TO_FB_CASE(8, true);
+ case Type::UINT16:
+ INT_TO_FB_CASE(16, false);
+ case Type::INT16:
+ INT_TO_FB_CASE(16, true);
+ case Type::UINT32:
+ INT_TO_FB_CASE(32, false);
+ case Type::INT32:
+ INT_TO_FB_CASE(32, true);
+ case Type::UINT64:
+ INT_TO_FB_CASE(64, false);
+ case Type::INT64:
+ INT_TO_FB_CASE(64, true);
+ case Type::HALF_FLOAT:
+ *out_type = flatbuf::Type::FloatingPoint;
+ *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::HALF);
+ break;
+ case Type::FLOAT:
+ *out_type = flatbuf::Type::FloatingPoint;
+ *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::SINGLE);
+ break;
+ case Type::DOUBLE:
+ *out_type = flatbuf::Type::FloatingPoint;
+ *offset = FloatToFlatbuffer(fbb, flatbuf::Precision::DOUBLE);
+ break;
+ default:
+ *out_type = flatbuf::Type::NONE; // Make clang-tidy happy
+ return Status::NotImplemented("Unable to convert type: ", type.ToString());
+ }
+ return Status::OK();
+}
+
+static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr<Field>& field,
+ const DictionaryType& type, int64_t dictionary_id,
+ DictionaryOffset* out) {
+ // We assume that the dictionary index type (as an integer) has already been
// validated elsewhere, and can safely assume we are dealing with integers
- const auto& index_type = checked_cast<const IntegerType&>(*type.index_type());
-
- auto index_type_offset =
- flatbuf::CreateInt(fbb, index_type.bit_width(), index_type.is_signed());
-
- *out = flatbuf::CreateDictionaryEncoding(fbb, dictionary_id, index_type_offset,
- type.ordered());
- return Status::OK();
-}
-
-static KeyValueOffset AppendKeyValue(FBB& fbb, const std::string& key,
- const std::string& value) {
- return flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value));
-}
-
-static void AppendKeyValueMetadata(FBB& fbb, const KeyValueMetadata& metadata,
- std::vector<KeyValueOffset>* key_values) {
- key_values->reserve(metadata.size());
- for (int i = 0; i < metadata.size(); ++i) {
- key_values->push_back(AppendKeyValue(fbb, metadata.key(i), metadata.value(i)));
- }
-}
-
-class FieldToFlatbufferVisitor {
- public:
- FieldToFlatbufferVisitor(FBB& fbb, const DictionaryFieldMapper& mapper,
- const FieldPosition& field_pos)
- : fbb_(fbb), mapper_(mapper), field_pos_(field_pos) {}
-
- Status VisitType(const DataType& type) { return VisitTypeInline(type, this); }
-
- Status Visit(const NullType& type) {
- fb_type_ = flatbuf::Type::Null;
- type_offset_ = flatbuf::CreateNull(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const BooleanType& type) {
- fb_type_ = flatbuf::Type::Bool;
- type_offset_ = flatbuf::CreateBool(fbb_).Union();
- return Status::OK();
- }
-
- template <int BitWidth, bool IsSigned, typename T>
- Status Visit(const T& type) {
- fb_type_ = flatbuf::Type::Int;
- type_offset_ = IntToFlatbuffer(fbb_, BitWidth, IsSigned);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_integer<T, Status> Visit(const T& type) {
- constexpr bool is_signed = is_signed_integer_type<T>::value;
- return Visit<sizeof(typename T::c_type) * 8, is_signed>(type);
- }
-
- Status Visit(const HalfFloatType& type) {
- fb_type_ = flatbuf::Type::FloatingPoint;
- type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::HALF);
- return Status::OK();
- }
-
- Status Visit(const FloatType& type) {
- fb_type_ = flatbuf::Type::FloatingPoint;
- type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::SINGLE);
- return Status::OK();
- }
-
- Status Visit(const DoubleType& type) {
- fb_type_ = flatbuf::Type::FloatingPoint;
- type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::DOUBLE);
- return Status::OK();
- }
-
- Status Visit(const FixedSizeBinaryType& type) {
- const auto& fw_type = checked_cast<const FixedSizeBinaryType&>(type);
- fb_type_ = flatbuf::Type::FixedSizeBinary;
- type_offset_ = flatbuf::CreateFixedSizeBinary(fbb_, fw_type.byte_width()).Union();
- return Status::OK();
- }
-
- Status Visit(const BinaryType& type) {
- fb_type_ = flatbuf::Type::Binary;
- type_offset_ = flatbuf::CreateBinary(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const LargeBinaryType& type) {
- fb_type_ = flatbuf::Type::LargeBinary;
- type_offset_ = flatbuf::CreateLargeBinary(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const StringType& type) {
- fb_type_ = flatbuf::Type::Utf8;
- type_offset_ = flatbuf::CreateUtf8(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const LargeStringType& type) {
- fb_type_ = flatbuf::Type::LargeUtf8;
- type_offset_ = flatbuf::CreateLargeUtf8(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const Date32Type& type) {
- fb_type_ = flatbuf::Type::Date;
- type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit::DAY).Union();
- return Status::OK();
- }
-
- Status Visit(const Date64Type& type) {
- fb_type_ = flatbuf::Type::Date;
- type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit::MILLISECOND).Union();
- return Status::OK();
- }
-
- Status Visit(const Time32Type& type) {
- const auto& time_type = checked_cast<const Time32Type&>(type);
- fb_type_ = flatbuf::Type::Time;
- type_offset_ =
- flatbuf::CreateTime(fbb_, ToFlatbufferUnit(time_type.unit()), 32).Union();
- return Status::OK();
- }
-
- Status Visit(const Time64Type& type) {
- const auto& time_type = checked_cast<const Time64Type&>(type);
- fb_type_ = flatbuf::Type::Time;
- type_offset_ =
- flatbuf::CreateTime(fbb_, ToFlatbufferUnit(time_type.unit()), 64).Union();
- return Status::OK();
- }
-
- Status Visit(const TimestampType& type) {
- const auto& ts_type = checked_cast<const TimestampType&>(type);
- fb_type_ = flatbuf::Type::Timestamp;
- flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(ts_type.unit());
- FBString fb_timezone = 0;
- if (ts_type.timezone().size() > 0) {
- fb_timezone = fbb_.CreateString(ts_type.timezone());
- }
- type_offset_ = flatbuf::CreateTimestamp(fbb_, fb_unit, fb_timezone).Union();
- return Status::OK();
- }
-
- Status Visit(const DurationType& type) {
- fb_type_ = flatbuf::Type::Duration;
- flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(type.unit());
- type_offset_ = flatbuf::CreateDuration(fbb_, fb_unit).Union();
- return Status::OK();
- }
-
- Status Visit(const DayTimeIntervalType& type) {
- fb_type_ = flatbuf::Type::Interval;
- type_offset_ = flatbuf::CreateInterval(fbb_, flatbuf::IntervalUnit::DAY_TIME).Union();
- return Status::OK();
- }
-
- Status Visit(const MonthIntervalType& type) {
- fb_type_ = flatbuf::Type::Interval;
- type_offset_ =
- flatbuf::CreateInterval(fbb_, flatbuf::IntervalUnit::YEAR_MONTH).Union();
- return Status::OK();
- }
-
+ const auto& index_type = checked_cast<const IntegerType&>(*type.index_type());
+
+ auto index_type_offset =
+ flatbuf::CreateInt(fbb, index_type.bit_width(), index_type.is_signed());
+
+ *out = flatbuf::CreateDictionaryEncoding(fbb, dictionary_id, index_type_offset,
+ type.ordered());
+ return Status::OK();
+}
+
+static KeyValueOffset AppendKeyValue(FBB& fbb, const std::string& key,
+ const std::string& value) {
+ return flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value));
+}
+
+static void AppendKeyValueMetadata(FBB& fbb, const KeyValueMetadata& metadata,
+ std::vector<KeyValueOffset>* key_values) {
+ key_values->reserve(metadata.size());
+ for (int i = 0; i < metadata.size(); ++i) {
+ key_values->push_back(AppendKeyValue(fbb, metadata.key(i), metadata.value(i)));
+ }
+}
+
+class FieldToFlatbufferVisitor {
+ public:
+ FieldToFlatbufferVisitor(FBB& fbb, const DictionaryFieldMapper& mapper,
+ const FieldPosition& field_pos)
+ : fbb_(fbb), mapper_(mapper), field_pos_(field_pos) {}
+
+ Status VisitType(const DataType& type) { return VisitTypeInline(type, this); }
+
+ Status Visit(const NullType& type) {
+ fb_type_ = flatbuf::Type::Null;
+ type_offset_ = flatbuf::CreateNull(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const BooleanType& type) {
+ fb_type_ = flatbuf::Type::Bool;
+ type_offset_ = flatbuf::CreateBool(fbb_).Union();
+ return Status::OK();
+ }
+
+ template <int BitWidth, bool IsSigned, typename T>
+ Status Visit(const T& type) {
+ fb_type_ = flatbuf::Type::Int;
+ type_offset_ = IntToFlatbuffer(fbb_, BitWidth, IsSigned);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_integer<T, Status> Visit(const T& type) {
+ constexpr bool is_signed = is_signed_integer_type<T>::value;
+ return Visit<sizeof(typename T::c_type) * 8, is_signed>(type);
+ }
+
+ Status Visit(const HalfFloatType& type) {
+ fb_type_ = flatbuf::Type::FloatingPoint;
+ type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::HALF);
+ return Status::OK();
+ }
+
+ Status Visit(const FloatType& type) {
+ fb_type_ = flatbuf::Type::FloatingPoint;
+ type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::SINGLE);
+ return Status::OK();
+ }
+
+ Status Visit(const DoubleType& type) {
+ fb_type_ = flatbuf::Type::FloatingPoint;
+ type_offset_ = FloatToFlatbuffer(fbb_, flatbuf::Precision::DOUBLE);
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeBinaryType& type) {
+ const auto& fw_type = checked_cast<const FixedSizeBinaryType&>(type);
+ fb_type_ = flatbuf::Type::FixedSizeBinary;
+ type_offset_ = flatbuf::CreateFixedSizeBinary(fbb_, fw_type.byte_width()).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const BinaryType& type) {
+ fb_type_ = flatbuf::Type::Binary;
+ type_offset_ = flatbuf::CreateBinary(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const LargeBinaryType& type) {
+ fb_type_ = flatbuf::Type::LargeBinary;
+ type_offset_ = flatbuf::CreateLargeBinary(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const StringType& type) {
+ fb_type_ = flatbuf::Type::Utf8;
+ type_offset_ = flatbuf::CreateUtf8(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const LargeStringType& type) {
+ fb_type_ = flatbuf::Type::LargeUtf8;
+ type_offset_ = flatbuf::CreateLargeUtf8(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const Date32Type& type) {
+ fb_type_ = flatbuf::Type::Date;
+ type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit::DAY).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const Date64Type& type) {
+ fb_type_ = flatbuf::Type::Date;
+ type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit::MILLISECOND).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const Time32Type& type) {
+ const auto& time_type = checked_cast<const Time32Type&>(type);
+ fb_type_ = flatbuf::Type::Time;
+ type_offset_ =
+ flatbuf::CreateTime(fbb_, ToFlatbufferUnit(time_type.unit()), 32).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const Time64Type& type) {
+ const auto& time_type = checked_cast<const Time64Type&>(type);
+ fb_type_ = flatbuf::Type::Time;
+ type_offset_ =
+ flatbuf::CreateTime(fbb_, ToFlatbufferUnit(time_type.unit()), 64).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const TimestampType& type) {
+ const auto& ts_type = checked_cast<const TimestampType&>(type);
+ fb_type_ = flatbuf::Type::Timestamp;
+ flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(ts_type.unit());
+ FBString fb_timezone = 0;
+ if (ts_type.timezone().size() > 0) {
+ fb_timezone = fbb_.CreateString(ts_type.timezone());
+ }
+ type_offset_ = flatbuf::CreateTimestamp(fbb_, fb_unit, fb_timezone).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const DurationType& type) {
+ fb_type_ = flatbuf::Type::Duration;
+ flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(type.unit());
+ type_offset_ = flatbuf::CreateDuration(fbb_, fb_unit).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const DayTimeIntervalType& type) {
+ fb_type_ = flatbuf::Type::Interval;
+ type_offset_ = flatbuf::CreateInterval(fbb_, flatbuf::IntervalUnit::DAY_TIME).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const MonthIntervalType& type) {
+ fb_type_ = flatbuf::Type::Interval;
+ type_offset_ =
+ flatbuf::CreateInterval(fbb_, flatbuf::IntervalUnit::YEAR_MONTH).Union();
+ return Status::OK();
+ }
+
Status Visit(const Decimal128Type& type) {
- const auto& dec_type = checked_cast<const Decimal128Type&>(type);
- fb_type_ = flatbuf::Type::Decimal;
+ const auto& dec_type = checked_cast<const Decimal128Type&>(type);
+ fb_type_ = flatbuf::Type::Decimal;
type_offset_ = flatbuf::CreateDecimal(fbb_, dec_type.precision(), dec_type.scale(),
/*bitWidth=*/128)
.Union();
- return Status::OK();
- }
-
+ return Status::OK();
+ }
+
Status Visit(const Decimal256Type& type) {
const auto& dec_type = checked_cast<const Decimal256Type&>(type);
fb_type_ = flatbuf::Type::Decimal;
@@ -612,149 +612,149 @@ class FieldToFlatbufferVisitor {
return Status::OK();
}
- Status Visit(const ListType& type) {
- fb_type_ = flatbuf::Type::List;
- RETURN_NOT_OK(VisitChildFields(type));
- type_offset_ = flatbuf::CreateList(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const LargeListType& type) {
- fb_type_ = flatbuf::Type::LargeList;
- RETURN_NOT_OK(VisitChildFields(type));
- type_offset_ = flatbuf::CreateLargeList(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const MapType& type) {
- fb_type_ = flatbuf::Type::Map;
- RETURN_NOT_OK(VisitChildFields(type));
- type_offset_ = flatbuf::CreateMap(fbb_, type.keys_sorted()).Union();
- return Status::OK();
- }
-
- Status Visit(const FixedSizeListType& type) {
- fb_type_ = flatbuf::Type::FixedSizeList;
- RETURN_NOT_OK(VisitChildFields(type));
- type_offset_ = flatbuf::CreateFixedSizeList(fbb_, type.list_size()).Union();
- return Status::OK();
- }
-
- Status Visit(const StructType& type) {
- fb_type_ = flatbuf::Type::Struct_;
- RETURN_NOT_OK(VisitChildFields(type));
- type_offset_ = flatbuf::CreateStruct_(fbb_).Union();
- return Status::OK();
- }
-
- Status Visit(const UnionType& type) {
- fb_type_ = flatbuf::Type::Union;
- RETURN_NOT_OK(VisitChildFields(type));
-
- const auto& union_type = checked_cast<const UnionType&>(type);
-
- flatbuf::UnionMode mode = union_type.mode() == UnionMode::SPARSE
- ? flatbuf::UnionMode::Sparse
- : flatbuf::UnionMode::Dense;
-
- std::vector<int32_t> type_ids;
- type_ids.reserve(union_type.type_codes().size());
- for (uint8_t code : union_type.type_codes()) {
- type_ids.push_back(code);
- }
-
- auto fb_type_ids = fbb_.CreateVector(type_ids.data(), type_ids.size());
-
- type_offset_ = flatbuf::CreateUnion(fbb_, mode, fb_type_ids).Union();
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& type) {
- // In this library, the dictionary "type" is a logical construct. Here we
- // pass through to the value type, as we've already captured the index
- // type in the DictionaryEncoding metadata in the parent field
- return VisitType(*checked_cast<const DictionaryType&>(type).value_type());
- }
-
- Status Visit(const ExtensionType& type) {
- RETURN_NOT_OK(VisitType(*type.storage_type()));
- extra_type_metadata_[kExtensionTypeKeyName] = type.extension_name();
- extra_type_metadata_[kExtensionMetadataKeyName] = type.Serialize();
- return Status::OK();
- }
-
- Status VisitChildFields(const DataType& type) {
- for (int i = 0; i < type.num_fields(); ++i) {
- FieldOffset child_offset;
- FieldToFlatbufferVisitor child_visitor(fbb_, mapper_, field_pos_.child(i));
- RETURN_NOT_OK(child_visitor.GetResult(type.field(i), &child_offset));
- children_.push_back(child_offset);
- }
- return Status::OK();
- }
-
- Status GetResult(const std::shared_ptr<Field>& field, FieldOffset* offset) {
- RETURN_NOT_OK(VisitType(*field->type()));
-
- DictionaryOffset dictionary = 0;
- const DataType* storage_type = field->type().get();
- if (storage_type->id() == Type::EXTENSION) {
- storage_type =
- checked_cast<const ExtensionType&>(*storage_type).storage_type().get();
- }
- if (storage_type->id() == Type::DICTIONARY) {
- ARROW_ASSIGN_OR_RAISE(const auto dictionary_id,
- mapper_.GetFieldId(field_pos_.path()));
- RETURN_NOT_OK(GetDictionaryEncoding(
- fbb_, field, checked_cast<const DictionaryType&>(*storage_type), dictionary_id,
- &dictionary));
- }
-
- auto metadata = field->metadata();
-
- flatbuffers::Offset<KVVector> fb_custom_metadata;
- std::vector<KeyValueOffset> key_values;
- if (metadata != nullptr) {
- AppendKeyValueMetadata(fbb_, *metadata, &key_values);
- }
-
- for (const auto& pair : extra_type_metadata_) {
- key_values.push_back(AppendKeyValue(fbb_, pair.first, pair.second));
- }
-
- if (key_values.size() > 0) {
- fb_custom_metadata = fbb_.CreateVector(key_values);
- }
-
- auto fb_name = fbb_.CreateString(field->name());
- auto fb_children = fbb_.CreateVector(children_.data(), children_.size());
- *offset =
- flatbuf::CreateField(fbb_, fb_name, field->nullable(), fb_type_, type_offset_,
- dictionary, fb_children, fb_custom_metadata);
- return Status::OK();
- }
-
- private:
- FBB& fbb_;
- const DictionaryFieldMapper& mapper_;
- FieldPosition field_pos_;
- flatbuf::Type fb_type_;
- Offset type_offset_;
- std::vector<FieldOffset> children_;
- std::unordered_map<std::string, std::string> extra_type_metadata_;
-};
-
-Status FieldFromFlatbuffer(const flatbuf::Field* field, FieldPosition field_pos,
- DictionaryMemo* dictionary_memo, std::shared_ptr<Field>* out) {
- std::shared_ptr<DataType> type;
-
- std::shared_ptr<KeyValueMetadata> metadata;
- RETURN_NOT_OK(internal::GetKeyValueMetadata(field->custom_metadata(), &metadata));
-
- // Reconstruct the data type
- // 1. Data type children
+ Status Visit(const ListType& type) {
+ fb_type_ = flatbuf::Type::List;
+ RETURN_NOT_OK(VisitChildFields(type));
+ type_offset_ = flatbuf::CreateList(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const LargeListType& type) {
+ fb_type_ = flatbuf::Type::LargeList;
+ RETURN_NOT_OK(VisitChildFields(type));
+ type_offset_ = flatbuf::CreateLargeList(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const MapType& type) {
+ fb_type_ = flatbuf::Type::Map;
+ RETURN_NOT_OK(VisitChildFields(type));
+ type_offset_ = flatbuf::CreateMap(fbb_, type.keys_sorted()).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeListType& type) {
+ fb_type_ = flatbuf::Type::FixedSizeList;
+ RETURN_NOT_OK(VisitChildFields(type));
+ type_offset_ = flatbuf::CreateFixedSizeList(fbb_, type.list_size()).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const StructType& type) {
+ fb_type_ = flatbuf::Type::Struct_;
+ RETURN_NOT_OK(VisitChildFields(type));
+ type_offset_ = flatbuf::CreateStruct_(fbb_).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const UnionType& type) {
+ fb_type_ = flatbuf::Type::Union;
+ RETURN_NOT_OK(VisitChildFields(type));
+
+ const auto& union_type = checked_cast<const UnionType&>(type);
+
+ flatbuf::UnionMode mode = union_type.mode() == UnionMode::SPARSE
+ ? flatbuf::UnionMode::Sparse
+ : flatbuf::UnionMode::Dense;
+
+ std::vector<int32_t> type_ids;
+ type_ids.reserve(union_type.type_codes().size());
+ for (uint8_t code : union_type.type_codes()) {
+ type_ids.push_back(code);
+ }
+
+ auto fb_type_ids = fbb_.CreateVector(type_ids.data(), type_ids.size());
+
+ type_offset_ = flatbuf::CreateUnion(fbb_, mode, fb_type_ids).Union();
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& type) {
+ // In this library, the dictionary "type" is a logical construct. Here we
+ // pass through to the value type, as we've already captured the index
+ // type in the DictionaryEncoding metadata in the parent field
+ return VisitType(*checked_cast<const DictionaryType&>(type).value_type());
+ }
+
+ Status Visit(const ExtensionType& type) {
+ RETURN_NOT_OK(VisitType(*type.storage_type()));
+ extra_type_metadata_[kExtensionTypeKeyName] = type.extension_name();
+ extra_type_metadata_[kExtensionMetadataKeyName] = type.Serialize();
+ return Status::OK();
+ }
+
+ Status VisitChildFields(const DataType& type) {
+ for (int i = 0; i < type.num_fields(); ++i) {
+ FieldOffset child_offset;
+ FieldToFlatbufferVisitor child_visitor(fbb_, mapper_, field_pos_.child(i));
+ RETURN_NOT_OK(child_visitor.GetResult(type.field(i), &child_offset));
+ children_.push_back(child_offset);
+ }
+ return Status::OK();
+ }
+
+ Status GetResult(const std::shared_ptr<Field>& field, FieldOffset* offset) {
+ RETURN_NOT_OK(VisitType(*field->type()));
+
+ DictionaryOffset dictionary = 0;
+ const DataType* storage_type = field->type().get();
+ if (storage_type->id() == Type::EXTENSION) {
+ storage_type =
+ checked_cast<const ExtensionType&>(*storage_type).storage_type().get();
+ }
+ if (storage_type->id() == Type::DICTIONARY) {
+ ARROW_ASSIGN_OR_RAISE(const auto dictionary_id,
+ mapper_.GetFieldId(field_pos_.path()));
+ RETURN_NOT_OK(GetDictionaryEncoding(
+ fbb_, field, checked_cast<const DictionaryType&>(*storage_type), dictionary_id,
+ &dictionary));
+ }
+
+ auto metadata = field->metadata();
+
+ flatbuffers::Offset<KVVector> fb_custom_metadata;
+ std::vector<KeyValueOffset> key_values;
+ if (metadata != nullptr) {
+ AppendKeyValueMetadata(fbb_, *metadata, &key_values);
+ }
+
+ for (const auto& pair : extra_type_metadata_) {
+ key_values.push_back(AppendKeyValue(fbb_, pair.first, pair.second));
+ }
+
+ if (key_values.size() > 0) {
+ fb_custom_metadata = fbb_.CreateVector(key_values);
+ }
+
+ auto fb_name = fbb_.CreateString(field->name());
+ auto fb_children = fbb_.CreateVector(children_.data(), children_.size());
+ *offset =
+ flatbuf::CreateField(fbb_, fb_name, field->nullable(), fb_type_, type_offset_,
+ dictionary, fb_children, fb_custom_metadata);
+ return Status::OK();
+ }
+
+ private:
+ FBB& fbb_;
+ const DictionaryFieldMapper& mapper_;
+ FieldPosition field_pos_;
+ flatbuf::Type fb_type_;
+ Offset type_offset_;
+ std::vector<FieldOffset> children_;
+ std::unordered_map<std::string, std::string> extra_type_metadata_;
+};
+
+Status FieldFromFlatbuffer(const flatbuf::Field* field, FieldPosition field_pos,
+ DictionaryMemo* dictionary_memo, std::shared_ptr<Field>* out) {
+ std::shared_ptr<DataType> type;
+
+ std::shared_ptr<KeyValueMetadata> metadata;
+ RETURN_NOT_OK(internal::GetKeyValueMetadata(field->custom_metadata(), &metadata));
+
+ // Reconstruct the data type
+ // 1. Data type children
FieldVector child_fields;
- const auto& children = field->children();
+ const auto& children = field->children();
// As a tolerance, allow for a null children field meaning "no children" (ARROW-12100)
if (children != nullptr) {
child_fields.resize(children->size());
@@ -762,725 +762,725 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, FieldPosition field_pos,
RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), field_pos.child(i),
dictionary_memo, &child_fields[i]));
}
- }
-
- // 2. Top-level concrete data type
- auto type_data = field->type();
- CHECK_FLATBUFFERS_NOT_NULL(type_data, "Field.type");
- RETURN_NOT_OK(
- ConcreteTypeFromFlatbuffer(field->type_type(), type_data, child_fields, &type));
-
- // 3. Is it a dictionary type?
- int64_t dictionary_id = -1;
- std::shared_ptr<DataType> dict_value_type;
- const flatbuf::DictionaryEncoding* encoding = field->dictionary();
- if (encoding != nullptr) {
- // The field is dictionary-encoded. Construct the DictionaryType
- // based on the DictionaryEncoding metadata and record in the
- // dictionary_memo
- std::shared_ptr<DataType> index_type;
- auto int_data = encoding->indexType();
- CHECK_FLATBUFFERS_NOT_NULL(int_data, "DictionaryEncoding.indexType");
- RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
- dict_value_type = type;
- ARROW_ASSIGN_OR_RAISE(type,
- DictionaryType::Make(index_type, type, encoding->isOrdered()));
- dictionary_id = encoding->id();
- }
-
- // 4. Is it an extension type?
- if (metadata != nullptr) {
- // Look for extension metadata in custom_metadata field
- int name_index = metadata->FindKey(kExtensionTypeKeyName);
- if (name_index != -1) {
- std::shared_ptr<ExtensionType> ext_type =
- GetExtensionType(metadata->value(name_index));
- if (ext_type != nullptr) {
- int data_index = metadata->FindKey(kExtensionMetadataKeyName);
- std::string type_data = data_index == -1 ? "" : metadata->value(data_index);
-
- ARROW_ASSIGN_OR_RAISE(type, ext_type->Deserialize(type, type_data));
- // Remove the metadata, for faithful roundtripping
- if (data_index != -1) {
- RETURN_NOT_OK(metadata->DeleteMany({name_index, data_index}));
- } else {
- RETURN_NOT_OK(metadata->Delete(name_index));
- }
- }
- // NOTE: if extension type is unknown, we do not raise here and
- // simply return the storage type.
- }
- }
-
- // Reconstruct field
- auto field_name = StringFromFlatbuffers(field->name());
- *out =
- ::arrow::field(std::move(field_name), type, field->nullable(), std::move(metadata));
- if (dictionary_id != -1) {
- // We need both the id -> type mapping (to find the value type when
- // reading a dictionary batch)
- // and the field path -> id mapping (to find the dictionary when
- // reading a record batch)
- RETURN_NOT_OK(dictionary_memo->fields().AddField(dictionary_id, field_pos.path()));
- RETURN_NOT_OK(dictionary_memo->AddDictionaryType(dictionary_id, dict_value_type));
- }
- return Status::OK();
-}
-
-// will return the endianness of the system we are running on
-// based the NUMPY_API function. See NOTICE.txt
-flatbuf::Endianness endianness() {
- union {
- uint32_t i;
- char c[4];
- } bint = {0x01020304};
-
- return bint.c[0] == 1 ? flatbuf::Endianness::Big : flatbuf::Endianness::Little;
-}
-
-flatbuffers::Offset<KVVector> SerializeCustomMetadata(
- FBB& fbb, const std::shared_ptr<const KeyValueMetadata>& metadata) {
- std::vector<KeyValueOffset> key_values;
- if (metadata != nullptr) {
- AppendKeyValueMetadata(fbb, *metadata, &key_values);
- return fbb.CreateVector(key_values);
- } else {
- // null
- return 0;
- }
-}
-
-Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
- const DictionaryFieldMapper& mapper,
- flatbuffers::Offset<flatbuf::Schema>* out) {
- std::vector<FieldOffset> field_offsets;
- FieldPosition pos;
- for (int i = 0; i < schema.num_fields(); ++i) {
- FieldOffset offset;
- FieldToFlatbufferVisitor field_visitor(fbb, mapper, pos.child(i));
- RETURN_NOT_OK(field_visitor.GetResult(schema.field(i), &offset));
- field_offsets.push_back(offset);
- }
-
- auto fb_offsets = fbb.CreateVector(field_offsets);
- *out = flatbuf::CreateSchema(fbb, endianness(), fb_offsets,
- SerializeCustomMetadata(fbb, schema.metadata()));
- return Status::OK();
-}
-
-Result<std::shared_ptr<Buffer>> WriteFBMessage(
- FBB& fbb, flatbuf::MessageHeader header_type, flatbuffers::Offset<void> header,
- int64_t body_length, MetadataVersion version,
+ }
+
+ // 2. Top-level concrete data type
+ auto type_data = field->type();
+ CHECK_FLATBUFFERS_NOT_NULL(type_data, "Field.type");
+ RETURN_NOT_OK(
+ ConcreteTypeFromFlatbuffer(field->type_type(), type_data, child_fields, &type));
+
+ // 3. Is it a dictionary type?
+ int64_t dictionary_id = -1;
+ std::shared_ptr<DataType> dict_value_type;
+ const flatbuf::DictionaryEncoding* encoding = field->dictionary();
+ if (encoding != nullptr) {
+ // The field is dictionary-encoded. Construct the DictionaryType
+ // based on the DictionaryEncoding metadata and record in the
+ // dictionary_memo
+ std::shared_ptr<DataType> index_type;
+ auto int_data = encoding->indexType();
+ CHECK_FLATBUFFERS_NOT_NULL(int_data, "DictionaryEncoding.indexType");
+ RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
+ dict_value_type = type;
+ ARROW_ASSIGN_OR_RAISE(type,
+ DictionaryType::Make(index_type, type, encoding->isOrdered()));
+ dictionary_id = encoding->id();
+ }
+
+ // 4. Is it an extension type?
+ if (metadata != nullptr) {
+ // Look for extension metadata in custom_metadata field
+ int name_index = metadata->FindKey(kExtensionTypeKeyName);
+ if (name_index != -1) {
+ std::shared_ptr<ExtensionType> ext_type =
+ GetExtensionType(metadata->value(name_index));
+ if (ext_type != nullptr) {
+ int data_index = metadata->FindKey(kExtensionMetadataKeyName);
+ std::string type_data = data_index == -1 ? "" : metadata->value(data_index);
+
+ ARROW_ASSIGN_OR_RAISE(type, ext_type->Deserialize(type, type_data));
+ // Remove the metadata, for faithful roundtripping
+ if (data_index != -1) {
+ RETURN_NOT_OK(metadata->DeleteMany({name_index, data_index}));
+ } else {
+ RETURN_NOT_OK(metadata->Delete(name_index));
+ }
+ }
+ // NOTE: if extension type is unknown, we do not raise here and
+ // simply return the storage type.
+ }
+ }
+
+ // Reconstruct field
+ auto field_name = StringFromFlatbuffers(field->name());
+ *out =
+ ::arrow::field(std::move(field_name), type, field->nullable(), std::move(metadata));
+ if (dictionary_id != -1) {
+ // We need both the id -> type mapping (to find the value type when
+ // reading a dictionary batch)
+ // and the field path -> id mapping (to find the dictionary when
+ // reading a record batch)
+ RETURN_NOT_OK(dictionary_memo->fields().AddField(dictionary_id, field_pos.path()));
+ RETURN_NOT_OK(dictionary_memo->AddDictionaryType(dictionary_id, dict_value_type));
+ }
+ return Status::OK();
+}
+
+// will return the endianness of the system we are running on
+// based the NUMPY_API function. See NOTICE.txt
+flatbuf::Endianness endianness() {
+ union {
+ uint32_t i;
+ char c[4];
+ } bint = {0x01020304};
+
+ return bint.c[0] == 1 ? flatbuf::Endianness::Big : flatbuf::Endianness::Little;
+}
+
+flatbuffers::Offset<KVVector> SerializeCustomMetadata(
+ FBB& fbb, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ std::vector<KeyValueOffset> key_values;
+ if (metadata != nullptr) {
+ AppendKeyValueMetadata(fbb, *metadata, &key_values);
+ return fbb.CreateVector(key_values);
+ } else {
+ // null
+ return 0;
+ }
+}
+
+Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
+ const DictionaryFieldMapper& mapper,
+ flatbuffers::Offset<flatbuf::Schema>* out) {
+ std::vector<FieldOffset> field_offsets;
+ FieldPosition pos;
+ for (int i = 0; i < schema.num_fields(); ++i) {
+ FieldOffset offset;
+ FieldToFlatbufferVisitor field_visitor(fbb, mapper, pos.child(i));
+ RETURN_NOT_OK(field_visitor.GetResult(schema.field(i), &offset));
+ field_offsets.push_back(offset);
+ }
+
+ auto fb_offsets = fbb.CreateVector(field_offsets);
+ *out = flatbuf::CreateSchema(fbb, endianness(), fb_offsets,
+ SerializeCustomMetadata(fbb, schema.metadata()));
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Buffer>> WriteFBMessage(
+ FBB& fbb, flatbuf::MessageHeader header_type, flatbuffers::Offset<void> header,
+ int64_t body_length, MetadataVersion version,
const std::shared_ptr<const KeyValueMetadata>& custom_metadata, MemoryPool* pool) {
- auto message = flatbuf::CreateMessage(fbb, MetadataVersionToFlatbuffer(version),
- header_type, header, body_length,
- SerializeCustomMetadata(fbb, custom_metadata));
- fbb.Finish(message);
+ auto message = flatbuf::CreateMessage(fbb, MetadataVersionToFlatbuffer(version),
+ header_type, header, body_length,
+ SerializeCustomMetadata(fbb, custom_metadata));
+ fbb.Finish(message);
return WriteFlatbufferBuilder(fbb, pool);
-}
-
-using FieldNodeVector =
- flatbuffers::Offset<flatbuffers::Vector<const flatbuf::FieldNode*>>;
-using BufferVector = flatbuffers::Offset<flatbuffers::Vector<const flatbuf::Buffer*>>;
-using BodyCompressionOffset = flatbuffers::Offset<flatbuf::BodyCompression>;
-
-static Status WriteFieldNodes(FBB& fbb, const std::vector<FieldMetadata>& nodes,
- FieldNodeVector* out) {
- std::vector<flatbuf::FieldNode> fb_nodes;
- fb_nodes.reserve(nodes.size());
-
- for (size_t i = 0; i < nodes.size(); ++i) {
- const FieldMetadata& node = nodes[i];
- if (node.offset != 0) {
- return Status::Invalid("Field metadata for IPC must have offset 0");
- }
- fb_nodes.emplace_back(node.length, node.null_count);
- }
- *out = fbb.CreateVectorOfStructs(fb_nodes.data(), fb_nodes.size());
- return Status::OK();
-}
-
-static Status WriteBuffers(FBB& fbb, const std::vector<BufferMetadata>& buffers,
- BufferVector* out) {
- std::vector<flatbuf::Buffer> fb_buffers;
- fb_buffers.reserve(buffers.size());
-
- for (size_t i = 0; i < buffers.size(); ++i) {
- const BufferMetadata& buffer = buffers[i];
- fb_buffers.emplace_back(buffer.offset, buffer.length);
- }
- *out = fbb.CreateVectorOfStructs(fb_buffers.data(), fb_buffers.size());
-
- return Status::OK();
-}
-
-static Status GetBodyCompression(FBB& fbb, const IpcWriteOptions& options,
- BodyCompressionOffset* out) {
- if (options.codec != nullptr) {
- flatbuf::CompressionType codec;
- if (options.codec->compression_type() == Compression::LZ4_FRAME) {
- codec = flatbuf::CompressionType::LZ4_FRAME;
- } else if (options.codec->compression_type() == Compression::ZSTD) {
- codec = flatbuf::CompressionType::ZSTD;
- } else {
- return Status::Invalid("Unsupported IPC compression codec: ",
- options.codec->name());
- }
- *out = flatbuf::CreateBodyCompression(fbb, codec,
- flatbuf::BodyCompressionMethod::BUFFER);
- }
- return Status::OK();
-}
-
-static Status MakeRecordBatch(FBB& fbb, int64_t length, int64_t body_length,
- const std::vector<FieldMetadata>& nodes,
- const std::vector<BufferMetadata>& buffers,
- const IpcWriteOptions& options, RecordBatchOffset* offset) {
- FieldNodeVector fb_nodes;
- RETURN_NOT_OK(WriteFieldNodes(fbb, nodes, &fb_nodes));
-
- BufferVector fb_buffers;
- RETURN_NOT_OK(WriteBuffers(fbb, buffers, &fb_buffers));
-
- BodyCompressionOffset fb_compression;
- RETURN_NOT_OK(GetBodyCompression(fbb, options, &fb_compression));
-
- *offset = flatbuf::CreateRecordBatch(fbb, length, fb_nodes, fb_buffers, fb_compression);
- return Status::OK();
-}
-
-Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index,
- const std::vector<BufferMetadata>& buffers,
- flatbuf::SparseTensorIndex* fb_sparse_index_type,
- Offset* fb_sparse_index, size_t* num_buffers) {
- *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseTensorIndexCOO;
-
- // We assume that the value type of indices tensor is an integer.
- const auto& index_value_type =
- checked_cast<const IntegerType&>(*sparse_index.indices()->type());
- auto indices_type_offset =
- flatbuf::CreateInt(fbb, index_value_type.bit_width(), index_value_type.is_signed());
-
- auto fb_strides = fbb.CreateVector(sparse_index.indices()->strides().data(),
- sparse_index.indices()->strides().size());
-
- const BufferMetadata& indices_metadata = buffers[0];
- flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
-
- *fb_sparse_index =
- flatbuf::CreateSparseTensorIndexCOO(fbb, indices_type_offset, fb_strides, &indices,
- sparse_index.is_canonical())
- .Union();
- *num_buffers = 1;
- return Status::OK();
-}
-
-template <typename SparseIndexType>
-struct SparseMatrixCompressedAxis {};
-
-template <>
-struct SparseMatrixCompressedAxis<SparseCSRIndex> {
- constexpr static const auto value = flatbuf::SparseMatrixCompressedAxis::Row;
-};
-
-template <>
-struct SparseMatrixCompressedAxis<SparseCSCIndex> {
- constexpr static const auto value = flatbuf::SparseMatrixCompressedAxis::Column;
-};
-
-template <typename SparseIndexType>
-Status MakeSparseMatrixIndexCSX(FBB& fbb, const SparseIndexType& sparse_index,
- const std::vector<BufferMetadata>& buffers,
- flatbuf::SparseTensorIndex* fb_sparse_index_type,
- Offset* fb_sparse_index, size_t* num_buffers) {
- *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseMatrixIndexCSX;
-
- // We assume that the value type of indptr tensor is an integer.
- const auto& indptr_value_type =
- checked_cast<const IntegerType&>(*sparse_index.indptr()->type());
- auto indptr_type_offset = flatbuf::CreateInt(fbb, indptr_value_type.bit_width(),
- indptr_value_type.is_signed());
-
- const BufferMetadata& indptr_metadata = buffers[0];
- flatbuf::Buffer indptr(indptr_metadata.offset, indptr_metadata.length);
-
- // We assume that the value type of indices tensor is an integer.
- const auto& indices_value_type =
- checked_cast<const IntegerType&>(*sparse_index.indices()->type());
- auto indices_type_offset = flatbuf::CreateInt(fbb, indices_value_type.bit_width(),
- indices_value_type.is_signed());
-
- const BufferMetadata& indices_metadata = buffers[1];
- flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
-
- auto compressedAxis = SparseMatrixCompressedAxis<SparseIndexType>::value;
- *fb_sparse_index =
- flatbuf::CreateSparseMatrixIndexCSX(fbb, compressedAxis, indptr_type_offset,
- &indptr, indices_type_offset, &indices)
- .Union();
- *num_buffers = 2;
- return Status::OK();
-}
-
-Status MakeSparseTensorIndexCSF(FBB& fbb, const SparseCSFIndex& sparse_index,
- const std::vector<BufferMetadata>& buffers,
- flatbuf::SparseTensorIndex* fb_sparse_index_type,
- Offset* fb_sparse_index, size_t* num_buffers) {
- *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseTensorIndexCSF;
- const int ndim = static_cast<int>(sparse_index.axis_order().size());
-
- // We assume that the value type of indptr tensor is an integer.
- const auto& indptr_value_type =
- checked_cast<const IntegerType&>(*sparse_index.indptr()[0]->type());
- auto indptr_type_offset = flatbuf::CreateInt(fbb, indptr_value_type.bit_width(),
- indptr_value_type.is_signed());
-
- // We assume that the value type of indices tensor is an integer.
- const auto& indices_value_type =
- checked_cast<const IntegerType&>(*sparse_index.indices()[0]->type());
- auto indices_type_offset = flatbuf::CreateInt(fbb, indices_value_type.bit_width(),
- indices_value_type.is_signed());
-
- const int64_t indptr_elem_size = GetByteWidth(indptr_value_type);
- const int64_t indices_elem_size = GetByteWidth(indices_value_type);
-
- int64_t offset = 0;
- std::vector<flatbuf::Buffer> indptr, indices;
-
- for (const std::shared_ptr<arrow::Tensor>& tensor : sparse_index.indptr()) {
- const int64_t size = tensor->data()->size() / indptr_elem_size;
- const int64_t padded_size = PaddedLength(tensor->data()->size(), kArrowIpcAlignment);
-
- indptr.push_back({offset, size});
- offset += padded_size;
- }
- for (const std::shared_ptr<arrow::Tensor>& tensor : sparse_index.indices()) {
- const int64_t size = tensor->data()->size() / indices_elem_size;
- const int64_t padded_size = PaddedLength(tensor->data()->size(), kArrowIpcAlignment);
-
- indices.push_back({offset, size});
- offset += padded_size;
- }
-
- auto fb_indices = fbb.CreateVectorOfStructs(indices);
- auto fb_indptr = fbb.CreateVectorOfStructs(indptr);
-
- std::vector<int> axis_order;
- for (int i = 0; i < ndim; ++i) {
- axis_order.emplace_back(static_cast<int>(sparse_index.axis_order()[i]));
- }
- auto fb_axis_order =
- fbb.CreateVector(arrow::util::MakeNonNull(axis_order.data()), axis_order.size());
-
- *fb_sparse_index =
- flatbuf::CreateSparseTensorIndexCSF(fbb, indptr_type_offset, fb_indptr,
- indices_type_offset, fb_indices, fb_axis_order)
- .Union();
- *num_buffers = 2 * ndim - 1;
- return Status::OK();
-}
-
-Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
- const std::vector<BufferMetadata>& buffers,
- flatbuf::SparseTensorIndex* fb_sparse_index_type,
- Offset* fb_sparse_index, size_t* num_buffers) {
- switch (sparse_index.format_id()) {
- case SparseTensorFormat::COO:
- RETURN_NOT_OK(MakeSparseTensorIndexCOO(
- fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers,
- fb_sparse_index_type, fb_sparse_index, num_buffers));
- break;
-
- case SparseTensorFormat::CSR:
- RETURN_NOT_OK(MakeSparseMatrixIndexCSX(
- fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers,
- fb_sparse_index_type, fb_sparse_index, num_buffers));
- break;
-
- case SparseTensorFormat::CSC:
- RETURN_NOT_OK(MakeSparseMatrixIndexCSX(
- fbb, checked_cast<const SparseCSCIndex&>(sparse_index), buffers,
- fb_sparse_index_type, fb_sparse_index, num_buffers));
- break;
-
- case SparseTensorFormat::CSF:
- RETURN_NOT_OK(MakeSparseTensorIndexCSF(
- fbb, checked_cast<const SparseCSFIndex&>(sparse_index), buffers,
- fb_sparse_index_type, fb_sparse_index, num_buffers));
- break;
-
- default:
- *fb_sparse_index_type = flatbuf::SparseTensorIndex::NONE; // Silence warnings
- std::stringstream ss;
- ss << "Unsupported sparse tensor format:: " << sparse_index.ToString() << std::endl;
- return Status::NotImplemented(ss.str());
- }
-
- return Status::OK();
-}
-
-Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor, int64_t body_length,
- const std::vector<BufferMetadata>& buffers,
- SparseTensorOffset* offset) {
- flatbuf::Type fb_type_type;
- Offset fb_type;
- RETURN_NOT_OK(
- TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
-
- using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
- std::vector<TensorDimOffset> dims;
- for (int i = 0; i < sparse_tensor.ndim(); ++i) {
- FBString name = fbb.CreateString(sparse_tensor.dim_name(i));
- dims.push_back(flatbuf::CreateTensorDim(fbb, sparse_tensor.shape()[i], name));
- }
-
- auto fb_shape = fbb.CreateVector(dims);
-
- flatbuf::SparseTensorIndex fb_sparse_index_type;
- Offset fb_sparse_index;
- size_t num_index_buffers = 0;
- RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers,
- &fb_sparse_index_type, &fb_sparse_index,
- &num_index_buffers));
-
- const BufferMetadata& data_metadata = buffers[num_index_buffers];
- flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
-
- const int64_t non_zero_length = sparse_tensor.non_zero_length();
-
- *offset =
- flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
- fb_sparse_index_type, fb_sparse_index, &data);
-
- return Status::OK();
-}
-
-} // namespace
-
-Status GetKeyValueMetadata(const KVVector* fb_metadata,
- std::shared_ptr<KeyValueMetadata>* out) {
- if (fb_metadata == nullptr) {
- *out = nullptr;
- return Status::OK();
- }
-
- auto metadata = std::make_shared<KeyValueMetadata>();
-
- metadata->reserve(fb_metadata->size());
- for (const auto pair : *fb_metadata) {
- CHECK_FLATBUFFERS_NOT_NULL(pair->key(), "custom_metadata.key");
- CHECK_FLATBUFFERS_NOT_NULL(pair->value(), "custom_metadata.value");
- metadata->Append(pair->key()->str(), pair->value()->str());
- }
-
- *out = std::move(metadata);
- return Status::OK();
-}
-
-Status WriteSchemaMessage(const Schema& schema, const DictionaryFieldMapper& mapper,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
- FBB fbb;
- flatbuffers::Offset<flatbuf::Schema> fb_schema;
- RETURN_NOT_OK(SchemaToFlatbuffer(fbb, schema, mapper, &fb_schema));
- return WriteFBMessage(fbb, flatbuf::MessageHeader::Schema, fb_schema.Union(),
+}
+
+using FieldNodeVector =
+ flatbuffers::Offset<flatbuffers::Vector<const flatbuf::FieldNode*>>;
+using BufferVector = flatbuffers::Offset<flatbuffers::Vector<const flatbuf::Buffer*>>;
+using BodyCompressionOffset = flatbuffers::Offset<flatbuf::BodyCompression>;
+
+static Status WriteFieldNodes(FBB& fbb, const std::vector<FieldMetadata>& nodes,
+ FieldNodeVector* out) {
+ std::vector<flatbuf::FieldNode> fb_nodes;
+ fb_nodes.reserve(nodes.size());
+
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ const FieldMetadata& node = nodes[i];
+ if (node.offset != 0) {
+ return Status::Invalid("Field metadata for IPC must have offset 0");
+ }
+ fb_nodes.emplace_back(node.length, node.null_count);
+ }
+ *out = fbb.CreateVectorOfStructs(fb_nodes.data(), fb_nodes.size());
+ return Status::OK();
+}
+
+static Status WriteBuffers(FBB& fbb, const std::vector<BufferMetadata>& buffers,
+ BufferVector* out) {
+ std::vector<flatbuf::Buffer> fb_buffers;
+ fb_buffers.reserve(buffers.size());
+
+ for (size_t i = 0; i < buffers.size(); ++i) {
+ const BufferMetadata& buffer = buffers[i];
+ fb_buffers.emplace_back(buffer.offset, buffer.length);
+ }
+ *out = fbb.CreateVectorOfStructs(fb_buffers.data(), fb_buffers.size());
+
+ return Status::OK();
+}
+
+static Status GetBodyCompression(FBB& fbb, const IpcWriteOptions& options,
+ BodyCompressionOffset* out) {
+ if (options.codec != nullptr) {
+ flatbuf::CompressionType codec;
+ if (options.codec->compression_type() == Compression::LZ4_FRAME) {
+ codec = flatbuf::CompressionType::LZ4_FRAME;
+ } else if (options.codec->compression_type() == Compression::ZSTD) {
+ codec = flatbuf::CompressionType::ZSTD;
+ } else {
+ return Status::Invalid("Unsupported IPC compression codec: ",
+ options.codec->name());
+ }
+ *out = flatbuf::CreateBodyCompression(fbb, codec,
+ flatbuf::BodyCompressionMethod::BUFFER);
+ }
+ return Status::OK();
+}
+
+static Status MakeRecordBatch(FBB& fbb, int64_t length, int64_t body_length,
+ const std::vector<FieldMetadata>& nodes,
+ const std::vector<BufferMetadata>& buffers,
+ const IpcWriteOptions& options, RecordBatchOffset* offset) {
+ FieldNodeVector fb_nodes;
+ RETURN_NOT_OK(WriteFieldNodes(fbb, nodes, &fb_nodes));
+
+ BufferVector fb_buffers;
+ RETURN_NOT_OK(WriteBuffers(fbb, buffers, &fb_buffers));
+
+ BodyCompressionOffset fb_compression;
+ RETURN_NOT_OK(GetBodyCompression(fbb, options, &fb_compression));
+
+ *offset = flatbuf::CreateRecordBatch(fbb, length, fb_nodes, fb_buffers, fb_compression);
+ return Status::OK();
+}
+
+Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index,
+ const std::vector<BufferMetadata>& buffers,
+ flatbuf::SparseTensorIndex* fb_sparse_index_type,
+ Offset* fb_sparse_index, size_t* num_buffers) {
+ *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseTensorIndexCOO;
+
+ // We assume that the value type of indices tensor is an integer.
+ const auto& index_value_type =
+ checked_cast<const IntegerType&>(*sparse_index.indices()->type());
+ auto indices_type_offset =
+ flatbuf::CreateInt(fbb, index_value_type.bit_width(), index_value_type.is_signed());
+
+ auto fb_strides = fbb.CreateVector(sparse_index.indices()->strides().data(),
+ sparse_index.indices()->strides().size());
+
+ const BufferMetadata& indices_metadata = buffers[0];
+ flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+
+ *fb_sparse_index =
+ flatbuf::CreateSparseTensorIndexCOO(fbb, indices_type_offset, fb_strides, &indices,
+ sparse_index.is_canonical())
+ .Union();
+ *num_buffers = 1;
+ return Status::OK();
+}
+
+template <typename SparseIndexType>
+struct SparseMatrixCompressedAxis {};
+
+template <>
+struct SparseMatrixCompressedAxis<SparseCSRIndex> {
+ constexpr static const auto value = flatbuf::SparseMatrixCompressedAxis::Row;
+};
+
+template <>
+struct SparseMatrixCompressedAxis<SparseCSCIndex> {
+ constexpr static const auto value = flatbuf::SparseMatrixCompressedAxis::Column;
+};
+
+template <typename SparseIndexType>
+Status MakeSparseMatrixIndexCSX(FBB& fbb, const SparseIndexType& sparse_index,
+ const std::vector<BufferMetadata>& buffers,
+ flatbuf::SparseTensorIndex* fb_sparse_index_type,
+ Offset* fb_sparse_index, size_t* num_buffers) {
+ *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseMatrixIndexCSX;
+
+ // We assume that the value type of indptr tensor is an integer.
+ const auto& indptr_value_type =
+ checked_cast<const IntegerType&>(*sparse_index.indptr()->type());
+ auto indptr_type_offset = flatbuf::CreateInt(fbb, indptr_value_type.bit_width(),
+ indptr_value_type.is_signed());
+
+ const BufferMetadata& indptr_metadata = buffers[0];
+ flatbuf::Buffer indptr(indptr_metadata.offset, indptr_metadata.length);
+
+ // We assume that the value type of indices tensor is an integer.
+ const auto& indices_value_type =
+ checked_cast<const IntegerType&>(*sparse_index.indices()->type());
+ auto indices_type_offset = flatbuf::CreateInt(fbb, indices_value_type.bit_width(),
+ indices_value_type.is_signed());
+
+ const BufferMetadata& indices_metadata = buffers[1];
+ flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+
+ auto compressedAxis = SparseMatrixCompressedAxis<SparseIndexType>::value;
+ *fb_sparse_index =
+ flatbuf::CreateSparseMatrixIndexCSX(fbb, compressedAxis, indptr_type_offset,
+ &indptr, indices_type_offset, &indices)
+ .Union();
+ *num_buffers = 2;
+ return Status::OK();
+}
+
+Status MakeSparseTensorIndexCSF(FBB& fbb, const SparseCSFIndex& sparse_index,
+ const std::vector<BufferMetadata>& buffers,
+ flatbuf::SparseTensorIndex* fb_sparse_index_type,
+ Offset* fb_sparse_index, size_t* num_buffers) {
+ *fb_sparse_index_type = flatbuf::SparseTensorIndex::SparseTensorIndexCSF;
+ const int ndim = static_cast<int>(sparse_index.axis_order().size());
+
+ // We assume that the value type of indptr tensor is an integer.
+ const auto& indptr_value_type =
+ checked_cast<const IntegerType&>(*sparse_index.indptr()[0]->type());
+ auto indptr_type_offset = flatbuf::CreateInt(fbb, indptr_value_type.bit_width(),
+ indptr_value_type.is_signed());
+
+ // We assume that the value type of indices tensor is an integer.
+ const auto& indices_value_type =
+ checked_cast<const IntegerType&>(*sparse_index.indices()[0]->type());
+ auto indices_type_offset = flatbuf::CreateInt(fbb, indices_value_type.bit_width(),
+ indices_value_type.is_signed());
+
+ const int64_t indptr_elem_size = GetByteWidth(indptr_value_type);
+ const int64_t indices_elem_size = GetByteWidth(indices_value_type);
+
+ int64_t offset = 0;
+ std::vector<flatbuf::Buffer> indptr, indices;
+
+ for (const std::shared_ptr<arrow::Tensor>& tensor : sparse_index.indptr()) {
+ const int64_t size = tensor->data()->size() / indptr_elem_size;
+ const int64_t padded_size = PaddedLength(tensor->data()->size(), kArrowIpcAlignment);
+
+ indptr.push_back({offset, size});
+ offset += padded_size;
+ }
+ for (const std::shared_ptr<arrow::Tensor>& tensor : sparse_index.indices()) {
+ const int64_t size = tensor->data()->size() / indices_elem_size;
+ const int64_t padded_size = PaddedLength(tensor->data()->size(), kArrowIpcAlignment);
+
+ indices.push_back({offset, size});
+ offset += padded_size;
+ }
+
+ auto fb_indices = fbb.CreateVectorOfStructs(indices);
+ auto fb_indptr = fbb.CreateVectorOfStructs(indptr);
+
+ std::vector<int> axis_order;
+ for (int i = 0; i < ndim; ++i) {
+ axis_order.emplace_back(static_cast<int>(sparse_index.axis_order()[i]));
+ }
+ auto fb_axis_order =
+ fbb.CreateVector(arrow::util::MakeNonNull(axis_order.data()), axis_order.size());
+
+ *fb_sparse_index =
+ flatbuf::CreateSparseTensorIndexCSF(fbb, indptr_type_offset, fb_indptr,
+ indices_type_offset, fb_indices, fb_axis_order)
+ .Union();
+ *num_buffers = 2 * ndim - 1;
+ return Status::OK();
+}
+
+Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
+ const std::vector<BufferMetadata>& buffers,
+ flatbuf::SparseTensorIndex* fb_sparse_index_type,
+ Offset* fb_sparse_index, size_t* num_buffers) {
+ switch (sparse_index.format_id()) {
+ case SparseTensorFormat::COO:
+ RETURN_NOT_OK(MakeSparseTensorIndexCOO(
+ fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers,
+ fb_sparse_index_type, fb_sparse_index, num_buffers));
+ break;
+
+ case SparseTensorFormat::CSR:
+ RETURN_NOT_OK(MakeSparseMatrixIndexCSX(
+ fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers,
+ fb_sparse_index_type, fb_sparse_index, num_buffers));
+ break;
+
+ case SparseTensorFormat::CSC:
+ RETURN_NOT_OK(MakeSparseMatrixIndexCSX(
+ fbb, checked_cast<const SparseCSCIndex&>(sparse_index), buffers,
+ fb_sparse_index_type, fb_sparse_index, num_buffers));
+ break;
+
+ case SparseTensorFormat::CSF:
+ RETURN_NOT_OK(MakeSparseTensorIndexCSF(
+ fbb, checked_cast<const SparseCSFIndex&>(sparse_index), buffers,
+ fb_sparse_index_type, fb_sparse_index, num_buffers));
+ break;
+
+ default:
+ *fb_sparse_index_type = flatbuf::SparseTensorIndex::NONE; // Silence warnings
+ std::stringstream ss;
+ ss << "Unsupported sparse tensor format:: " << sparse_index.ToString() << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+
+ return Status::OK();
+}
+
+Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor, int64_t body_length,
+ const std::vector<BufferMetadata>& buffers,
+ SparseTensorOffset* offset) {
+ flatbuf::Type fb_type_type;
+ Offset fb_type;
+ RETURN_NOT_OK(
+ TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
+
+ using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
+ std::vector<TensorDimOffset> dims;
+ for (int i = 0; i < sparse_tensor.ndim(); ++i) {
+ FBString name = fbb.CreateString(sparse_tensor.dim_name(i));
+ dims.push_back(flatbuf::CreateTensorDim(fbb, sparse_tensor.shape()[i], name));
+ }
+
+ auto fb_shape = fbb.CreateVector(dims);
+
+ flatbuf::SparseTensorIndex fb_sparse_index_type;
+ Offset fb_sparse_index;
+ size_t num_index_buffers = 0;
+ RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers,
+ &fb_sparse_index_type, &fb_sparse_index,
+ &num_index_buffers));
+
+ const BufferMetadata& data_metadata = buffers[num_index_buffers];
+ flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
+
+ const int64_t non_zero_length = sparse_tensor.non_zero_length();
+
+ *offset =
+ flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
+ fb_sparse_index_type, fb_sparse_index, &data);
+
+ return Status::OK();
+}
+
+} // namespace
+
+Status GetKeyValueMetadata(const KVVector* fb_metadata,
+ std::shared_ptr<KeyValueMetadata>* out) {
+ if (fb_metadata == nullptr) {
+ *out = nullptr;
+ return Status::OK();
+ }
+
+ auto metadata = std::make_shared<KeyValueMetadata>();
+
+ metadata->reserve(fb_metadata->size());
+ for (const auto pair : *fb_metadata) {
+ CHECK_FLATBUFFERS_NOT_NULL(pair->key(), "custom_metadata.key");
+ CHECK_FLATBUFFERS_NOT_NULL(pair->value(), "custom_metadata.value");
+ metadata->Append(pair->key()->str(), pair->value()->str());
+ }
+
+ *out = std::move(metadata);
+ return Status::OK();
+}
+
+Status WriteSchemaMessage(const Schema& schema, const DictionaryFieldMapper& mapper,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
+ FBB fbb;
+ flatbuffers::Offset<flatbuf::Schema> fb_schema;
+ RETURN_NOT_OK(SchemaToFlatbuffer(fbb, schema, mapper, &fb_schema));
+ return WriteFBMessage(fbb, flatbuf::MessageHeader::Schema, fb_schema.Union(),
/*body_length=*/0, options.metadata_version,
/*custom_metadata=*/nullptr, options.memory_pool)
- .Value(out);
-}
-
-Status WriteRecordBatchMessage(
- int64_t length, int64_t body_length,
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
- const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
- FBB fbb;
- RecordBatchOffset record_batch;
- RETURN_NOT_OK(
- MakeRecordBatch(fbb, length, body_length, nodes, buffers, options, &record_batch));
- return WriteFBMessage(fbb, flatbuf::MessageHeader::RecordBatch, record_batch.Union(),
+ .Value(out);
+}
+
+Status WriteRecordBatchMessage(
+ int64_t length, int64_t body_length,
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
+ const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
+ FBB fbb;
+ RecordBatchOffset record_batch;
+ RETURN_NOT_OK(
+ MakeRecordBatch(fbb, length, body_length, nodes, buffers, options, &record_batch));
+ return WriteFBMessage(fbb, flatbuf::MessageHeader::RecordBatch, record_batch.Union(),
body_length, options.metadata_version, custom_metadata,
options.memory_pool)
- .Value(out);
-}
-
-Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
- int64_t buffer_start_offset,
- const IpcWriteOptions& options) {
- using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
- using TensorOffset = flatbuffers::Offset<flatbuf::Tensor>;
-
- FBB fbb;
- const int elem_size = GetByteWidth(*tensor.type());
-
- flatbuf::Type fb_type_type;
- Offset fb_type;
- RETURN_NOT_OK(TensorTypeToFlatbuffer(fbb, *tensor.type(), &fb_type_type, &fb_type));
-
- std::vector<TensorDimOffset> dims;
- for (int i = 0; i < tensor.ndim(); ++i) {
- FBString name = fbb.CreateString(tensor.dim_name(i));
- dims.push_back(flatbuf::CreateTensorDim(fbb, tensor.shape()[i], name));
- }
-
- auto fb_shape = fbb.CreateVector(dims.data(), dims.size());
-
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> fb_strides;
- fb_strides = fbb.CreateVector(tensor.strides().data(), tensor.strides().size());
- int64_t body_length = tensor.size() * elem_size;
- flatbuf::Buffer buffer(buffer_start_offset, body_length);
-
- TensorOffset fb_tensor =
- flatbuf::CreateTensor(fbb, fb_type_type, fb_type, fb_shape, fb_strides, &buffer);
-
- return WriteFBMessage(fbb, flatbuf::MessageHeader::Tensor, fb_tensor.Union(),
+ .Value(out);
+}
+
+Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
+ int64_t buffer_start_offset,
+ const IpcWriteOptions& options) {
+ using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
+ using TensorOffset = flatbuffers::Offset<flatbuf::Tensor>;
+
+ FBB fbb;
+ const int elem_size = GetByteWidth(*tensor.type());
+
+ flatbuf::Type fb_type_type;
+ Offset fb_type;
+ RETURN_NOT_OK(TensorTypeToFlatbuffer(fbb, *tensor.type(), &fb_type_type, &fb_type));
+
+ std::vector<TensorDimOffset> dims;
+ for (int i = 0; i < tensor.ndim(); ++i) {
+ FBString name = fbb.CreateString(tensor.dim_name(i));
+ dims.push_back(flatbuf::CreateTensorDim(fbb, tensor.shape()[i], name));
+ }
+
+ auto fb_shape = fbb.CreateVector(dims.data(), dims.size());
+
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> fb_strides;
+ fb_strides = fbb.CreateVector(tensor.strides().data(), tensor.strides().size());
+ int64_t body_length = tensor.size() * elem_size;
+ flatbuf::Buffer buffer(buffer_start_offset, body_length);
+
+ TensorOffset fb_tensor =
+ flatbuf::CreateTensor(fbb, fb_type_type, fb_type, fb_shape, fb_strides, &buffer);
+
+ return WriteFBMessage(fbb, flatbuf::MessageHeader::Tensor, fb_tensor.Union(),
body_length, options.metadata_version,
/*custom_metadata=*/nullptr, options.memory_pool);
-}
-
-Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
- const SparseTensor& sparse_tensor, int64_t body_length,
- const std::vector<BufferMetadata>& buffers, const IpcWriteOptions& options) {
- FBB fbb;
- SparseTensorOffset fb_sparse_tensor;
- RETURN_NOT_OK(
- MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
- return WriteFBMessage(fbb, flatbuf::MessageHeader::SparseTensor,
+}
+
+Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
+ const SparseTensor& sparse_tensor, int64_t body_length,
+ const std::vector<BufferMetadata>& buffers, const IpcWriteOptions& options) {
+ FBB fbb;
+ SparseTensorOffset fb_sparse_tensor;
+ RETURN_NOT_OK(
+ MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
+ return WriteFBMessage(fbb, flatbuf::MessageHeader::SparseTensor,
fb_sparse_tensor.Union(), body_length, options.metadata_version,
/*custom_metadata=*/nullptr, options.memory_pool);
-}
-
-Status WriteDictionaryMessage(
- int64_t id, bool is_delta, int64_t length, int64_t body_length,
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
- const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
- FBB fbb;
- RecordBatchOffset record_batch;
- RETURN_NOT_OK(
- MakeRecordBatch(fbb, length, body_length, nodes, buffers, options, &record_batch));
- auto dictionary_batch =
- flatbuf::CreateDictionaryBatch(fbb, id, record_batch, is_delta).Union();
- return WriteFBMessage(fbb, flatbuf::MessageHeader::DictionaryBatch, dictionary_batch,
+}
+
+Status WriteDictionaryMessage(
+ int64_t id, bool is_delta, int64_t length, int64_t body_length,
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
+ const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out) {
+ FBB fbb;
+ RecordBatchOffset record_batch;
+ RETURN_NOT_OK(
+ MakeRecordBatch(fbb, length, body_length, nodes, buffers, options, &record_batch));
+ auto dictionary_batch =
+ flatbuf::CreateDictionaryBatch(fbb, id, record_batch, is_delta).Union();
+ return WriteFBMessage(fbb, flatbuf::MessageHeader::DictionaryBatch, dictionary_batch,
body_length, options.metadata_version, custom_metadata,
options.memory_pool)
- .Value(out);
-}
-
-static flatbuffers::Offset<flatbuffers::Vector<const flatbuf::Block*>>
-FileBlocksToFlatbuffer(FBB& fbb, const std::vector<FileBlock>& blocks) {
- std::vector<flatbuf::Block> fb_blocks;
-
- for (const FileBlock& block : blocks) {
- fb_blocks.emplace_back(block.offset, block.metadata_length, block.body_length);
- }
-
- return fbb.CreateVectorOfStructs(fb_blocks.data(), fb_blocks.size());
-}
-
-Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
- const std::vector<FileBlock>& record_batches,
- const std::shared_ptr<const KeyValueMetadata>& metadata,
- io::OutputStream* out) {
- FBB fbb;
-
- flatbuffers::Offset<flatbuf::Schema> fb_schema;
- DictionaryFieldMapper mapper(schema);
- RETURN_NOT_OK(SchemaToFlatbuffer(fbb, schema, mapper, &fb_schema));
-
-#ifndef NDEBUG
- for (size_t i = 0; i < dictionaries.size(); ++i) {
- DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].offset)) << i;
- DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].metadata_length)) << i;
- DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].body_length)) << i;
- }
-
- for (size_t i = 0; i < record_batches.size(); ++i) {
- DCHECK(BitUtil::IsMultipleOf8(record_batches[i].offset)) << i;
- DCHECK(BitUtil::IsMultipleOf8(record_batches[i].metadata_length)) << i;
- DCHECK(BitUtil::IsMultipleOf8(record_batches[i].body_length)) << i;
- }
-#endif
-
- auto fb_dictionaries = FileBlocksToFlatbuffer(fbb, dictionaries);
- auto fb_record_batches = FileBlocksToFlatbuffer(fbb, record_batches);
-
- auto fb_custom_metadata = SerializeCustomMetadata(fbb, metadata);
-
- auto footer =
- flatbuf::CreateFooter(fbb, kCurrentMetadataVersion, fb_schema, fb_dictionaries,
- fb_record_batches, fb_custom_metadata);
- fbb.Finish(footer);
-
- int32_t size = fbb.GetSize();
-
- return out->Write(fbb.GetBufferPointer(), size);
-}
-
-// ----------------------------------------------------------------------
-
-Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
- std::shared_ptr<Schema>* out) {
- auto schema = static_cast<const flatbuf::Schema*>(opaque_schema);
- CHECK_FLATBUFFERS_NOT_NULL(schema, "schema");
- CHECK_FLATBUFFERS_NOT_NULL(schema->fields(), "Schema.fields");
- int num_fields = static_cast<int>(schema->fields()->size());
-
- FieldPosition field_pos;
-
- std::vector<std::shared_ptr<Field>> fields(num_fields);
- for (int i = 0; i < num_fields; ++i) {
- const flatbuf::Field* field = schema->fields()->Get(i);
- // XXX I don't think this check is necessary (AP)
- CHECK_FLATBUFFERS_NOT_NULL(field, "DictionaryEncoding.indexType");
- RETURN_NOT_OK(
- FieldFromFlatbuffer(field, field_pos.child(i), dictionary_memo, &fields[i]));
- }
-
- std::shared_ptr<KeyValueMetadata> metadata;
- RETURN_NOT_OK(internal::GetKeyValueMetadata(schema->custom_metadata(), &metadata));
+ .Value(out);
+}
+
+static flatbuffers::Offset<flatbuffers::Vector<const flatbuf::Block*>>
+FileBlocksToFlatbuffer(FBB& fbb, const std::vector<FileBlock>& blocks) {
+ std::vector<flatbuf::Block> fb_blocks;
+
+ for (const FileBlock& block : blocks) {
+ fb_blocks.emplace_back(block.offset, block.metadata_length, block.body_length);
+ }
+
+ return fbb.CreateVectorOfStructs(fb_blocks.data(), fb_blocks.size());
+}
+
+Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
+ const std::vector<FileBlock>& record_batches,
+ const std::shared_ptr<const KeyValueMetadata>& metadata,
+ io::OutputStream* out) {
+ FBB fbb;
+
+ flatbuffers::Offset<flatbuf::Schema> fb_schema;
+ DictionaryFieldMapper mapper(schema);
+ RETURN_NOT_OK(SchemaToFlatbuffer(fbb, schema, mapper, &fb_schema));
+
+#ifndef NDEBUG
+ for (size_t i = 0; i < dictionaries.size(); ++i) {
+ DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].offset)) << i;
+ DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].metadata_length)) << i;
+ DCHECK(BitUtil::IsMultipleOf8(dictionaries[i].body_length)) << i;
+ }
+
+ for (size_t i = 0; i < record_batches.size(); ++i) {
+ DCHECK(BitUtil::IsMultipleOf8(record_batches[i].offset)) << i;
+ DCHECK(BitUtil::IsMultipleOf8(record_batches[i].metadata_length)) << i;
+ DCHECK(BitUtil::IsMultipleOf8(record_batches[i].body_length)) << i;
+ }
+#endif
+
+ auto fb_dictionaries = FileBlocksToFlatbuffer(fbb, dictionaries);
+ auto fb_record_batches = FileBlocksToFlatbuffer(fbb, record_batches);
+
+ auto fb_custom_metadata = SerializeCustomMetadata(fbb, metadata);
+
+ auto footer =
+ flatbuf::CreateFooter(fbb, kCurrentMetadataVersion, fb_schema, fb_dictionaries,
+ fb_record_batches, fb_custom_metadata);
+ fbb.Finish(footer);
+
+ int32_t size = fbb.GetSize();
+
+ return out->Write(fbb.GetBufferPointer(), size);
+}
+
+// ----------------------------------------------------------------------
+
+Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
+ std::shared_ptr<Schema>* out) {
+ auto schema = static_cast<const flatbuf::Schema*>(opaque_schema);
+ CHECK_FLATBUFFERS_NOT_NULL(schema, "schema");
+ CHECK_FLATBUFFERS_NOT_NULL(schema->fields(), "Schema.fields");
+ int num_fields = static_cast<int>(schema->fields()->size());
+
+ FieldPosition field_pos;
+
+ std::vector<std::shared_ptr<Field>> fields(num_fields);
+ for (int i = 0; i < num_fields; ++i) {
+ const flatbuf::Field* field = schema->fields()->Get(i);
+ // XXX I don't think this check is necessary (AP)
+ CHECK_FLATBUFFERS_NOT_NULL(field, "DictionaryEncoding.indexType");
+ RETURN_NOT_OK(
+ FieldFromFlatbuffer(field, field_pos.child(i), dictionary_memo, &fields[i]));
+ }
+
+ std::shared_ptr<KeyValueMetadata> metadata;
+ RETURN_NOT_OK(internal::GetKeyValueMetadata(schema->custom_metadata(), &metadata));
// set endianess using the value in flatbuf schema
auto endianness = schema->endianness() == flatbuf::Endianness::Little
? Endianness::Little
: Endianness::Big;
*out = ::arrow::schema(std::move(fields), endianness, metadata);
- return Status::OK();
-}
-
-Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
- std::vector<int64_t>* shape, std::vector<int64_t>* strides,
- std::vector<std::string>* dim_names) {
- const flatbuf::Message* message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
- auto tensor = message->header_as_Tensor();
- if (tensor == nullptr) {
- return Status::IOError("Header-type of flatbuffer-encoded Message is not Tensor.");
- }
-
+ return Status::OK();
+}
+
+Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+ std::vector<int64_t>* shape, std::vector<int64_t>* strides,
+ std::vector<std::string>* dim_names) {
+ const flatbuf::Message* message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
+ auto tensor = message->header_as_Tensor();
+ if (tensor == nullptr) {
+ return Status::IOError("Header-type of flatbuffer-encoded Message is not Tensor.");
+ }
+
flatbuffers::uoffset_t ndim = tensor->shape()->size();
-
+
for (flatbuffers::uoffset_t i = 0; i < ndim; ++i) {
- auto dim = tensor->shape()->Get(i);
-
- shape->push_back(dim->size());
- dim_names->push_back(StringFromFlatbuffers(dim->name()));
- }
-
- if (tensor->strides() && tensor->strides()->size() > 0) {
+ auto dim = tensor->shape()->Get(i);
+
+ shape->push_back(dim->size());
+ dim_names->push_back(StringFromFlatbuffers(dim->name()));
+ }
+
+ if (tensor->strides() && tensor->strides()->size() > 0) {
if (tensor->strides()->size() != ndim) {
return Status::IOError(
"The sizes of shape and strides in a tensor are mismatched.");
}
for (decltype(ndim) i = 0; i < ndim; ++i) {
- strides->push_back(tensor->strides()->Get(i));
- }
- }
-
- auto type_data = tensor->type(); // Required
- return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
-}
-
-Status GetSparseCOOIndexMetadata(const flatbuf::SparseTensorIndexCOO* sparse_index,
- std::shared_ptr<DataType>* indices_type) {
- return IntFromFlatbuffer(sparse_index->indicesType(), indices_type);
-}
-
-Status GetSparseCSXIndexMetadata(const flatbuf::SparseMatrixIndexCSX* sparse_index,
- std::shared_ptr<DataType>* indptr_type,
- std::shared_ptr<DataType>* indices_type) {
- RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indptrType(), indptr_type));
- RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indicesType(), indices_type));
- return Status::OK();
-}
-
-Status GetSparseCSFIndexMetadata(const flatbuf::SparseTensorIndexCSF* sparse_index,
- std::vector<int64_t>* axis_order,
- std::vector<int64_t>* indices_size,
- std::shared_ptr<DataType>* indptr_type,
- std::shared_ptr<DataType>* indices_type) {
- RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indptrType(), indptr_type));
- RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indicesType(), indices_type));
-
- const int ndim = static_cast<int>(sparse_index->axisOrder()->size());
- for (int i = 0; i < ndim; ++i) {
- axis_order->push_back(sparse_index->axisOrder()->Get(i));
- indices_size->push_back(sparse_index->indicesBuffers()->Get(i)->length());
- }
-
- return Status::OK();
-}
-
-Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
- std::vector<int64_t>* shape,
- std::vector<std::string>* dim_names,
- int64_t* non_zero_length,
- SparseTensorFormat::type* sparse_tensor_format_id) {
- const flatbuf::Message* message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
- auto sparse_tensor = message->header_as_SparseTensor();
- if (sparse_tensor == nullptr) {
- return Status::IOError(
- "Header-type of flatbuffer-encoded Message is not SparseTensor.");
- }
- int ndim = static_cast<int>(sparse_tensor->shape()->size());
-
- if (shape || dim_names) {
- for (int i = 0; i < ndim; ++i) {
- auto dim = sparse_tensor->shape()->Get(i);
-
- if (shape) {
- shape->push_back(dim->size());
- }
-
- if (dim_names) {
- dim_names->push_back(StringFromFlatbuffers(dim->name()));
- }
- }
- }
-
- if (non_zero_length) {
- *non_zero_length = sparse_tensor->non_zero_length();
- }
-
- if (sparse_tensor_format_id) {
- switch (sparse_tensor->sparseIndex_type()) {
- case flatbuf::SparseTensorIndex::SparseTensorIndexCOO:
- *sparse_tensor_format_id = SparseTensorFormat::COO;
- break;
-
- case flatbuf::SparseTensorIndex::SparseMatrixIndexCSX: {
- auto cs = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX();
- switch (cs->compressedAxis()) {
- case flatbuf::SparseMatrixCompressedAxis::Row:
- *sparse_tensor_format_id = SparseTensorFormat::CSR;
- break;
-
- case flatbuf::SparseMatrixCompressedAxis::Column:
- *sparse_tensor_format_id = SparseTensorFormat::CSC;
- break;
-
- default:
- return Status::Invalid("Invalid value of SparseMatrixCompressedAxis");
- }
- } break;
-
- case flatbuf::SparseTensorIndex::SparseTensorIndexCSF:
- *sparse_tensor_format_id = SparseTensorFormat::CSF;
- break;
-
- default:
- return Status::Invalid("Unrecognized sparse index type");
- }
- }
-
- auto type_data = sparse_tensor->type(); // Required
- if (type) {
- return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type);
- } else {
- return Status::OK();
- }
-}
-
-} // namespace internal
-} // namespace ipc
-} // namespace arrow
+ strides->push_back(tensor->strides()->Get(i));
+ }
+ }
+
+ auto type_data = tensor->type(); // Required
+ return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
+}
+
+Status GetSparseCOOIndexMetadata(const flatbuf::SparseTensorIndexCOO* sparse_index,
+ std::shared_ptr<DataType>* indices_type) {
+ return IntFromFlatbuffer(sparse_index->indicesType(), indices_type);
+}
+
+Status GetSparseCSXIndexMetadata(const flatbuf::SparseMatrixIndexCSX* sparse_index,
+ std::shared_ptr<DataType>* indptr_type,
+ std::shared_ptr<DataType>* indices_type) {
+ RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indptrType(), indptr_type));
+ RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indicesType(), indices_type));
+ return Status::OK();
+}
+
+Status GetSparseCSFIndexMetadata(const flatbuf::SparseTensorIndexCSF* sparse_index,
+ std::vector<int64_t>* axis_order,
+ std::vector<int64_t>* indices_size,
+ std::shared_ptr<DataType>* indptr_type,
+ std::shared_ptr<DataType>* indices_type) {
+ RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indptrType(), indptr_type));
+ RETURN_NOT_OK(IntFromFlatbuffer(sparse_index->indicesType(), indices_type));
+
+ const int ndim = static_cast<int>(sparse_index->axisOrder()->size());
+ for (int i = 0; i < ndim; ++i) {
+ axis_order->push_back(sparse_index->axisOrder()->Get(i));
+ indices_size->push_back(sparse_index->indicesBuffers()->Get(i)->length());
+ }
+
+ return Status::OK();
+}
+
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+ std::vector<int64_t>* shape,
+ std::vector<std::string>* dim_names,
+ int64_t* non_zero_length,
+ SparseTensorFormat::type* sparse_tensor_format_id) {
+ const flatbuf::Message* message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
+ auto sparse_tensor = message->header_as_SparseTensor();
+ if (sparse_tensor == nullptr) {
+ return Status::IOError(
+ "Header-type of flatbuffer-encoded Message is not SparseTensor.");
+ }
+ int ndim = static_cast<int>(sparse_tensor->shape()->size());
+
+ if (shape || dim_names) {
+ for (int i = 0; i < ndim; ++i) {
+ auto dim = sparse_tensor->shape()->Get(i);
+
+ if (shape) {
+ shape->push_back(dim->size());
+ }
+
+ if (dim_names) {
+ dim_names->push_back(StringFromFlatbuffers(dim->name()));
+ }
+ }
+ }
+
+ if (non_zero_length) {
+ *non_zero_length = sparse_tensor->non_zero_length();
+ }
+
+ if (sparse_tensor_format_id) {
+ switch (sparse_tensor->sparseIndex_type()) {
+ case flatbuf::SparseTensorIndex::SparseTensorIndexCOO:
+ *sparse_tensor_format_id = SparseTensorFormat::COO;
+ break;
+
+ case flatbuf::SparseTensorIndex::SparseMatrixIndexCSX: {
+ auto cs = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX();
+ switch (cs->compressedAxis()) {
+ case flatbuf::SparseMatrixCompressedAxis::Row:
+ *sparse_tensor_format_id = SparseTensorFormat::CSR;
+ break;
+
+ case flatbuf::SparseMatrixCompressedAxis::Column:
+ *sparse_tensor_format_id = SparseTensorFormat::CSC;
+ break;
+
+ default:
+ return Status::Invalid("Invalid value of SparseMatrixCompressedAxis");
+ }
+ } break;
+
+ case flatbuf::SparseTensorIndex::SparseTensorIndexCSF:
+ *sparse_tensor_format_id = SparseTensorFormat::CSF;
+ break;
+
+ default:
+ return Status::Invalid("Unrecognized sparse index type");
+ }
+ }
+
+ auto type_data = sparse_tensor->type(); // Required
+ if (type) {
+ return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type);
+ } else {
+ return Status::OK();
+ }
+}
+
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
index 9cf489dd668..759a17945c6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
@@ -1,161 +1,161 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Internal metadata serialization matters
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <flatbuffers/flatbuffers.h>
-
-#include "arrow/buffer.h"
-#include "arrow/io/type_fwd.h"
-#include "arrow/ipc/message.h"
-#include "arrow/result.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-#include "generated/Message_generated.h"
-#include "generated/Schema_generated.h"
-#include "generated/SparseTensor_generated.h" // IWYU pragma: keep
-
-namespace arrow {
-
-namespace flatbuf = org::apache::arrow::flatbuf;
-
-namespace ipc {
-
-class DictionaryFieldMapper;
-class DictionaryMemo;
-
-namespace internal {
-
-using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
-using KVVector = flatbuffers::Vector<KeyValueOffset>;
-
-// This 0xFFFFFFFF value is the first 4 bytes of a valid IPC message
-constexpr int32_t kIpcContinuationToken = -1;
-
-static constexpr flatbuf::MetadataVersion kCurrentMetadataVersion =
- flatbuf::MetadataVersion::V5;
-
-static constexpr flatbuf::MetadataVersion kLatestMetadataVersion =
- flatbuf::MetadataVersion::V5;
-
-static constexpr flatbuf::MetadataVersion kMinMetadataVersion =
- flatbuf::MetadataVersion::V4;
-
-MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version);
-
-// This function is used in a unit test
-ARROW_EXPORT
-flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version);
-
-// Whether the type has a validity bitmap in the given IPC version
-bool HasValidityBitmap(Type::type type_id, MetadataVersion version);
-
-static constexpr const char* kArrowMagicBytes = "ARROW1";
-
-struct FieldMetadata {
- int64_t length;
- int64_t null_count;
- int64_t offset;
-};
-
-struct BufferMetadata {
- /// The relative offset into the memory page to the starting byte of the buffer
- int64_t offset;
-
- /// Absolute length in bytes of the buffer
- int64_t length;
-};
-
-struct FileBlock {
- int64_t offset;
- int32_t metadata_length;
- int64_t body_length;
-};
-
-// Low-level utilities to help with reading Flatbuffers data.
-
-#define CHECK_FLATBUFFERS_NOT_NULL(fb_value, name) \
- if ((fb_value) == NULLPTR) { \
- return Status::IOError("Unexpected null field ", name, \
- " in flatbuffer-encoded metadata"); \
- }
-
-template <typename T>
-inline uint32_t FlatBuffersVectorSize(const flatbuffers::Vector<T>* vec) {
- return (vec == NULLPTR) ? 0 : vec->size();
-}
-
-inline std::string StringFromFlatbuffers(const flatbuffers::String* s) {
- return (s == NULLPTR) ? "" : s->str();
-}
-
-// Read interface classes. We do not fully deserialize the flatbuffers so that
-// individual fields metadata can be retrieved from very large schema without
-//
-
-// Construct a complete Schema from the message and add
-// dictionary-encoded fields to a DictionaryMemo instance. May be
-// expensive for very large schemas if you are only interested in a
-// few fields
-Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
- std::shared_ptr<Schema>* out);
-
-Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
- std::vector<int64_t>* shape, std::vector<int64_t>* strides,
- std::vector<std::string>* dim_names);
-
-// EXPERIMENTAL: Extracting metadata of a SparseCOOIndex from the message
-Status GetSparseCOOIndexMetadata(const flatbuf::SparseTensorIndexCOO* sparse_index,
- std::shared_ptr<DataType>* indices_type);
-
-// EXPERIMENTAL: Extracting metadata of a SparseCSXIndex from the message
-Status GetSparseCSXIndexMetadata(const flatbuf::SparseMatrixIndexCSX* sparse_index,
- std::shared_ptr<DataType>* indptr_type,
- std::shared_ptr<DataType>* indices_type);
-
-// EXPERIMENTAL: Extracting metadata of a SparseCSFIndex from the message
-Status GetSparseCSFIndexMetadata(const flatbuf::SparseTensorIndexCSF* sparse_index,
- std::vector<int64_t>* axis_order,
- std::vector<int64_t>* indices_size,
- std::shared_ptr<DataType>* indptr_type,
- std::shared_ptr<DataType>* indices_type);
-
-// EXPERIMENTAL: Extracting metadata of a sparse tensor from the message
-Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
- std::vector<int64_t>* shape,
- std::vector<std::string>* dim_names, int64_t* length,
- SparseTensorFormat::type* sparse_tensor_format_id);
-
-Status GetKeyValueMetadata(const KVVector* fb_metadata,
- std::shared_ptr<KeyValueMetadata>* out);
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Internal metadata serialization matters
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <flatbuffers/flatbuffers.h>
+
+#include "arrow/buffer.h"
+#include "arrow/io/type_fwd.h"
+#include "arrow/ipc/message.h"
+#include "arrow/result.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+#include "generated/Message_generated.h"
+#include "generated/Schema_generated.h"
+#include "generated/SparseTensor_generated.h" // IWYU pragma: keep
+
+namespace arrow {
+
+namespace flatbuf = org::apache::arrow::flatbuf;
+
+namespace ipc {
+
+class DictionaryFieldMapper;
+class DictionaryMemo;
+
+namespace internal {
+
+using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
+using KVVector = flatbuffers::Vector<KeyValueOffset>;
+
+// This 0xFFFFFFFF value is the first 4 bytes of a valid IPC message
+constexpr int32_t kIpcContinuationToken = -1;
+
+static constexpr flatbuf::MetadataVersion kCurrentMetadataVersion =
+ flatbuf::MetadataVersion::V5;
+
+static constexpr flatbuf::MetadataVersion kLatestMetadataVersion =
+ flatbuf::MetadataVersion::V5;
+
+static constexpr flatbuf::MetadataVersion kMinMetadataVersion =
+ flatbuf::MetadataVersion::V4;
+
+MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version);
+
+// This function is used in a unit test
+ARROW_EXPORT
+flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version);
+
+// Whether the type has a validity bitmap in the given IPC version
+bool HasValidityBitmap(Type::type type_id, MetadataVersion version);
+
+static constexpr const char* kArrowMagicBytes = "ARROW1";
+
+struct FieldMetadata {
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+};
+
+struct BufferMetadata {
+ /// The relative offset into the memory page to the starting byte of the buffer
+ int64_t offset;
+
+ /// Absolute length in bytes of the buffer
+ int64_t length;
+};
+
+struct FileBlock {
+ int64_t offset;
+ int32_t metadata_length;
+ int64_t body_length;
+};
+
+// Low-level utilities to help with reading Flatbuffers data.
+
+#define CHECK_FLATBUFFERS_NOT_NULL(fb_value, name) \
+ if ((fb_value) == NULLPTR) { \
+ return Status::IOError("Unexpected null field ", name, \
+ " in flatbuffer-encoded metadata"); \
+ }
+
+template <typename T>
+inline uint32_t FlatBuffersVectorSize(const flatbuffers::Vector<T>* vec) {
+ return (vec == NULLPTR) ? 0 : vec->size();
+}
+
+inline std::string StringFromFlatbuffers(const flatbuffers::String* s) {
+ return (s == NULLPTR) ? "" : s->str();
+}
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+// Construct a complete Schema from the message and add
+// dictionary-encoded fields to a DictionaryMemo instance. May be
+// expensive for very large schemas if you are only interested in a
+// few fields
+Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
+ std::shared_ptr<Schema>* out);
+
+Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+ std::vector<int64_t>* shape, std::vector<int64_t>* strides,
+ std::vector<std::string>* dim_names);
+
+// EXPERIMENTAL: Extracting metadata of a SparseCOOIndex from the message
+Status GetSparseCOOIndexMetadata(const flatbuf::SparseTensorIndexCOO* sparse_index,
+ std::shared_ptr<DataType>* indices_type);
+
+// EXPERIMENTAL: Extracting metadata of a SparseCSXIndex from the message
+Status GetSparseCSXIndexMetadata(const flatbuf::SparseMatrixIndexCSX* sparse_index,
+ std::shared_ptr<DataType>* indptr_type,
+ std::shared_ptr<DataType>* indices_type);
+
+// EXPERIMENTAL: Extracting metadata of a SparseCSFIndex from the message
+Status GetSparseCSFIndexMetadata(const flatbuf::SparseTensorIndexCSF* sparse_index,
+ std::vector<int64_t>* axis_order,
+ std::vector<int64_t>* indices_size,
+ std::shared_ptr<DataType>* indptr_type,
+ std::shared_ptr<DataType>* indices_type);
+
+// EXPERIMENTAL: Extracting metadata of a sparse tensor from the message
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+ std::vector<int64_t>* shape,
+ std::vector<std::string>* dim_names, int64_t* length,
+ SparseTensorFormat::type* sparse_tensor_format_id);
+
+Status GetKeyValueMetadata(const KVVector* fb_metadata,
+ std::shared_ptr<KeyValueMetadata>* out);
+
template <typename RootType>
bool VerifyFlatbuffers(const uint8_t* data, int64_t size) {
// Heuristic: tables in a Arrow flatbuffers buffer must take at least 1 bit
@@ -169,59 +169,59 @@ bool VerifyFlatbuffers(const uint8_t* data, int64_t size) {
return verifier.VerifyBuffer<RootType>(nullptr);
}
-static inline Status VerifyMessage(const uint8_t* data, int64_t size,
- const flatbuf::Message** out) {
+static inline Status VerifyMessage(const uint8_t* data, int64_t size,
+ const flatbuf::Message** out) {
if (!VerifyFlatbuffers<flatbuf::Message>(data, size)) {
- return Status::IOError("Invalid flatbuffers message.");
- }
- *out = flatbuf::GetMessage(data);
- return Status::OK();
-}
-
-// Serialize arrow::Schema as a Flatbuffer
-Status WriteSchemaMessage(const Schema& schema, const DictionaryFieldMapper& mapper,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
-
-// This function is used in a unit test
-ARROW_EXPORT
-Status WriteRecordBatchMessage(
- const int64_t length, const int64_t body_length,
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
- const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
-
-Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
- const int64_t buffer_start_offset,
- const IpcWriteOptions& options);
-
-Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
- const SparseTensor& sparse_tensor, int64_t body_length,
- const std::vector<BufferMetadata>& buffers, const IpcWriteOptions& options);
-
-Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
- const std::vector<FileBlock>& record_batches,
- const std::shared_ptr<const KeyValueMetadata>& metadata,
- io::OutputStream* out);
-
-Status WriteDictionaryMessage(
- const int64_t id, const bool is_delta, const int64_t length,
- const int64_t body_length,
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
- const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
- const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
-
-static inline Result<std::shared_ptr<Buffer>> WriteFlatbufferBuilder(
+ return Status::IOError("Invalid flatbuffers message.");
+ }
+ *out = flatbuf::GetMessage(data);
+ return Status::OK();
+}
+
+// Serialize arrow::Schema as a Flatbuffer
+Status WriteSchemaMessage(const Schema& schema, const DictionaryFieldMapper& mapper,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
+
+// This function is used in a unit test
+ARROW_EXPORT
+Status WriteRecordBatchMessage(
+ const int64_t length, const int64_t body_length,
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
+ const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
+
+Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
+ const int64_t buffer_start_offset,
+ const IpcWriteOptions& options);
+
+Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
+ const SparseTensor& sparse_tensor, int64_t body_length,
+ const std::vector<BufferMetadata>& buffers, const IpcWriteOptions& options);
+
+Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
+ const std::vector<FileBlock>& record_batches,
+ const std::shared_ptr<const KeyValueMetadata>& metadata,
+ io::OutputStream* out);
+
+Status WriteDictionaryMessage(
+ const int64_t id, const bool is_delta, const int64_t length,
+ const int64_t body_length,
+ const std::shared_ptr<const KeyValueMetadata>& custom_metadata,
+ const std::vector<FieldMetadata>& nodes, const std::vector<BufferMetadata>& buffers,
+ const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
+
+static inline Result<std::shared_ptr<Buffer>> WriteFlatbufferBuilder(
flatbuffers::FlatBufferBuilder& fbb, // NOLINT non-const reference
MemoryPool* pool = default_memory_pool()) {
- int32_t size = fbb.GetSize();
-
+ int32_t size = fbb.GetSize();
+
ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(size, pool));
-
- uint8_t* dst = result->mutable_data();
- memcpy(dst, fbb.GetBufferPointer(), size);
- return std::move(result);
-}
-
-} // namespace internal
-} // namespace ipc
-} // namespace arrow
+
+ uint8_t* dst = result->mutable_data();
+ memcpy(dst, fbb.GetBufferPointer(), size);
+ return std::move(result);
+}
+
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc
index e5b14a47fac..ecc7c8e6bf2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc
@@ -1,41 +1,41 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/options.h"
-
-#include "arrow/status.h"
-
-namespace arrow {
-namespace ipc {
-
-IpcWriteOptions IpcWriteOptions::Defaults() { return IpcWriteOptions(); }
-
-IpcReadOptions IpcReadOptions::Defaults() { return IpcReadOptions(); }
-
-namespace internal {
-
-Status CheckCompressionSupported(Compression::type codec) {
- if (!(codec == Compression::LZ4_FRAME || codec == Compression::ZSTD)) {
- return Status::Invalid("Only LZ4_FRAME and ZSTD compression allowed");
- }
- return Status::OK();
-}
-
-} // namespace internal
-
-} // namespace ipc
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/options.h"
+
+#include "arrow/status.h"
+
+namespace arrow {
+namespace ipc {
+
+IpcWriteOptions IpcWriteOptions::Defaults() { return IpcWriteOptions(); }
+
+IpcReadOptions IpcReadOptions::Defaults() { return IpcReadOptions(); }
+
+namespace internal {
+
+Status CheckCompressionSupported(Compression::type codec) {
+ if (!(codec == Compression::LZ4_FRAME || codec == Compression::ZSTD)) {
+ return Status::Invalid("Only LZ4_FRAME and ZSTD compression allowed");
+ }
+ return Status::OK();
+}
+
+} // namespace internal
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
index 2e0f800b5ad..6bc2e8a2f3c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
@@ -1,75 +1,75 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <vector>
-
-#include "arrow/ipc/type_fwd.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/compression.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-namespace ipc {
-
-// ARROW-109: We set this number arbitrarily to help catch user mistakes. For
-// deeply nested schemas, it is expected the user will indicate explicitly the
-// maximum allowed recursion depth
-constexpr int kMaxNestingDepth = 64;
-
-/// \brief Options for writing Arrow IPC messages
-struct ARROW_EXPORT IpcWriteOptions {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/ipc/type_fwd.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace ipc {
+
+// ARROW-109: We set this number arbitrarily to help catch user mistakes. For
+// deeply nested schemas, it is expected the user will indicate explicitly the
+// maximum allowed recursion depth
+constexpr int kMaxNestingDepth = 64;
+
+/// \brief Options for writing Arrow IPC messages
+struct ARROW_EXPORT IpcWriteOptions {
/// \brief If true, allow field lengths that don't fit in a signed 32-bit int.
///
/// Some implementations may not be able to parse streams created with this option.
- bool allow_64bit = false;
+ bool allow_64bit = false;
/// \brief The maximum permitted schema nesting depth.
- int max_recursion_depth = kMaxNestingDepth;
-
+ int max_recursion_depth = kMaxNestingDepth;
+
/// \brief Write padding after memory buffers up to this multiple of bytes.
- int32_t alignment = 8;
-
+ int32_t alignment = 8;
+
/// \brief Write the pre-0.15.0 IPC message format
///
/// This legacy format consists of a 4-byte prefix instead of 8-byte.
- bool write_legacy_ipc_format = false;
-
- /// \brief The memory pool to use for allocations made during IPC writing
+ bool write_legacy_ipc_format = false;
+
+ /// \brief The memory pool to use for allocations made during IPC writing
///
/// While Arrow IPC is predominantly zero-copy, it may have to allocate
/// memory in some cases (for example if compression is enabled).
- MemoryPool* memory_pool = default_memory_pool();
-
- /// \brief Compression codec to use for record batch body buffers
- ///
- /// May only be UNCOMPRESSED, LZ4_FRAME and ZSTD.
- std::shared_ptr<util::Codec> codec;
-
- /// \brief Use global CPU thread pool to parallelize any computational tasks
- /// like compression
- bool use_threads = true;
-
+ MemoryPool* memory_pool = default_memory_pool();
+
+ /// \brief Compression codec to use for record batch body buffers
+ ///
+ /// May only be UNCOMPRESSED, LZ4_FRAME and ZSTD.
+ std::shared_ptr<util::Codec> codec;
+
+ /// \brief Use global CPU thread pool to parallelize any computational tasks
+ /// like compression
+ bool use_threads = true;
+
/// \brief Whether to emit dictionary deltas
///
/// If false, a changed dictionary for a given field will emit a full
@@ -102,41 +102,41 @@ struct ARROW_EXPORT IpcWriteOptions {
/// and deltas.
bool unify_dictionaries = false;
- /// \brief Format version to use for IPC messages and their metadata.
- ///
- /// Presently using V5 version (readable by 1.0.0 and later).
- /// V4 is also available (readable by 0.8.0 and later).
- MetadataVersion metadata_version = MetadataVersion::V5;
-
- static IpcWriteOptions Defaults();
-};
-
-#ifndef ARROW_NO_DEPRECATED_API
-using IpcOptions = IpcWriteOptions;
-#endif
-
+ /// \brief Format version to use for IPC messages and their metadata.
+ ///
+ /// Presently using V5 version (readable by 1.0.0 and later).
+ /// V4 is also available (readable by 0.8.0 and later).
+ MetadataVersion metadata_version = MetadataVersion::V5;
+
+ static IpcWriteOptions Defaults();
+};
+
+#ifndef ARROW_NO_DEPRECATED_API
+using IpcOptions = IpcWriteOptions;
+#endif
+
/// \brief Options for reading Arrow IPC messages
-struct ARROW_EXPORT IpcReadOptions {
+struct ARROW_EXPORT IpcReadOptions {
/// \brief The maximum permitted schema nesting depth.
- int max_recursion_depth = kMaxNestingDepth;
-
+ int max_recursion_depth = kMaxNestingDepth;
+
/// \brief The memory pool to use for allocations made during IPC reading
///
/// While Arrow IPC is predominantly zero-copy, it may have to allocate
/// memory in some cases (for example if compression is enabled).
- MemoryPool* memory_pool = default_memory_pool();
-
- /// \brief EXPERIMENTAL: Top-level schema fields to include when
+ MemoryPool* memory_pool = default_memory_pool();
+
+ /// \brief EXPERIMENTAL: Top-level schema fields to include when
/// deserializing RecordBatch.
///
/// If empty (the default), return all deserialized fields.
/// If non-empty, the values are the indices of fields in the top-level schema.
- std::vector<int> included_fields;
-
- /// \brief Use global CPU thread pool to parallelize any computational tasks
- /// like decompression
- bool use_threads = true;
-
+ std::vector<int> included_fields;
+
+ /// \brief Use global CPU thread pool to parallelize any computational tasks
+ /// like decompression
+ bool use_threads = true;
+
/// \brief EXPERIMENTAL: Convert incoming data to platform-native endianness
///
/// If the endianness of the received schema is not equal to platform-native
@@ -149,13 +149,13 @@ struct ARROW_EXPORT IpcReadOptions {
/// RecordBatchStreamReader and StreamDecoder classes.
bool ensure_native_endian = true;
- static IpcReadOptions Defaults();
-};
-
-namespace internal {
-
-Status CheckCompressionSupported(Compression::type codec);
-
-} // namespace internal
-} // namespace ipc
-} // namespace arrow
+ static IpcReadOptions Defaults();
+};
+
+namespace internal {
+
+Status CheckCompressionSupported(Compression::type codec);
+
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
index a3c345cc440..3e3036b331d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
@@ -1,117 +1,117 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/reader.h"
-
-#include <algorithm>
-#include <climits>
-#include <cstdint>
-#include <cstring>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include <flatbuffers/flatbuffers.h> // IWYU pragma: export
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/extension_type.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/reader.h"
+
+#include <algorithm>
+#include <climits>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <flatbuffers/flatbuffers.h> // IWYU pragma: export
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/extension_type.h"
#include "arrow/io/caching.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/io/memory.h"
-#include "arrow/ipc/message.h"
-#include "arrow/ipc/metadata_internal.h"
-#include "arrow/ipc/util.h"
-#include "arrow/ipc/writer.h"
-#include "arrow/record_batch.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/compression.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/message.h"
+#include "arrow/ipc/metadata_internal.h"
+#include "arrow/ipc/util.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/compression.h"
#include "arrow/util/endian.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/parallel.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/parallel.h"
#include "arrow/util/string.h"
#include "arrow/util/thread_pool.h"
-#include "arrow/util/ubsan.h"
+#include "arrow/util/ubsan.h"
#include "arrow/util/vector.h"
-#include "arrow/visitor_inline.h"
-
-#include "generated/File_generated.h" // IWYU pragma: export
-#include "generated/Message_generated.h"
-#include "generated/Schema_generated.h"
-#include "generated/SparseTensor_generated.h"
-
-namespace arrow {
-
-namespace flatbuf = org::apache::arrow::flatbuf;
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-using internal::GetByteWidth;
-
-namespace ipc {
-
-using internal::FileBlock;
-using internal::kArrowMagicBytes;
-
-namespace {
-
-enum class DictionaryKind { New, Delta, Replacement };
-
-Status InvalidMessageType(MessageType expected, MessageType actual) {
- return Status::IOError("Expected IPC message of type ", FormatMessageType(expected),
- " but got ", FormatMessageType(actual));
-}
-
-#define CHECK_MESSAGE_TYPE(expected, actual) \
- do { \
- if ((actual) != (expected)) { \
- return InvalidMessageType((expected), (actual)); \
- } \
- } while (0)
-
-#define CHECK_HAS_BODY(message) \
- do { \
- if ((message).body() == nullptr) { \
- return Status::IOError("Expected body in IPC message of type ", \
- FormatMessageType((message).type())); \
- } \
- } while (0)
-
-#define CHECK_HAS_NO_BODY(message) \
- do { \
- if ((message).body_length() != 0) { \
- return Status::IOError("Unexpected body in IPC message of type ", \
- FormatMessageType((message).type())); \
- } \
- } while (0)
-
-} // namespace
-
-// ----------------------------------------------------------------------
-// Record batch read path
-
+#include "arrow/visitor_inline.h"
+
+#include "generated/File_generated.h" // IWYU pragma: export
+#include "generated/Message_generated.h"
+#include "generated/Schema_generated.h"
+#include "generated/SparseTensor_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = org::apache::arrow::flatbuf;
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+using internal::GetByteWidth;
+
+namespace ipc {
+
+using internal::FileBlock;
+using internal::kArrowMagicBytes;
+
+namespace {
+
+enum class DictionaryKind { New, Delta, Replacement };
+
+Status InvalidMessageType(MessageType expected, MessageType actual) {
+ return Status::IOError("Expected IPC message of type ", FormatMessageType(expected),
+ " but got ", FormatMessageType(actual));
+}
+
+#define CHECK_MESSAGE_TYPE(expected, actual) \
+ do { \
+ if ((actual) != (expected)) { \
+ return InvalidMessageType((expected), (actual)); \
+ } \
+ } while (0)
+
+#define CHECK_HAS_BODY(message) \
+ do { \
+ if ((message).body() == nullptr) { \
+ return Status::IOError("Expected body in IPC message of type ", \
+ FormatMessageType((message).type())); \
+ } \
+ } while (0)
+
+#define CHECK_HAS_NO_BODY(message) \
+ do { \
+ if ((message).body_length() != 0) { \
+ return Status::IOError("Unexpected body in IPC message of type ", \
+ FormatMessageType((message).type())); \
+ } \
+ } while (0)
+
+} // namespace
+
+// ----------------------------------------------------------------------
+// Record batch read path
+
/// \brief Structure to keep common arguments to be passed
struct IpcReadContext {
IpcReadContext(DictionaryMemo* memo, const IpcReadOptions& option, bool swap,
@@ -136,384 +136,384 @@ struct IpcReadContext {
const bool swap_endian;
};
-/// The field_index and buffer_index are incremented based on how much of the
-/// batch is "consumed" (through nested data reconstruction, for example)
-class ArrayLoader {
- public:
- explicit ArrayLoader(const flatbuf::RecordBatch* metadata,
- MetadataVersion metadata_version, const IpcReadOptions& options,
- io::RandomAccessFile* file)
- : metadata_(metadata),
- metadata_version_(metadata_version),
- file_(file),
- max_recursion_depth_(options.max_recursion_depth) {}
-
- Status ReadBuffer(int64_t offset, int64_t length, std::shared_ptr<Buffer>* out) {
- if (skip_io_) {
- return Status::OK();
- }
- if (offset < 0) {
- return Status::Invalid("Negative offset for reading buffer ", buffer_index_);
- }
- if (length < 0) {
- return Status::Invalid("Negative length for reading buffer ", buffer_index_);
- }
- // This construct permits overriding GetBuffer at compile time
- if (!BitUtil::IsMultipleOf8(offset)) {
- return Status::Invalid("Buffer ", buffer_index_,
- " did not start on 8-byte aligned offset: ", offset);
- }
- return file_->ReadAt(offset, length).Value(out);
- }
-
- Status LoadType(const DataType& type) { return VisitTypeInline(type, this); }
-
- Status Load(const Field* field, ArrayData* out) {
- if (max_recursion_depth_ <= 0) {
- return Status::Invalid("Max recursion depth reached");
- }
-
- field_ = field;
- out_ = out;
- out_->type = field_->type();
- return LoadType(*field_->type());
- }
-
- Status SkipField(const Field* field) {
- ArrayData dummy;
- skip_io_ = true;
- Status status = Load(field, &dummy);
- skip_io_ = false;
- return status;
- }
-
- Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
- auto buffers = metadata_->buffers();
- CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
- if (buffer_index >= static_cast<int>(buffers->size())) {
- return Status::IOError("buffer_index out of range.");
- }
- const flatbuf::Buffer* buffer = buffers->Get(buffer_index);
- if (buffer->length() == 0) {
- // Should never return a null buffer here.
- // (zero-sized buffer allocations are cheap)
- return AllocateBuffer(0).Value(out);
- } else {
- return ReadBuffer(buffer->offset(), buffer->length(), out);
- }
- }
-
- Status GetFieldMetadata(int field_index, ArrayData* out) {
- auto nodes = metadata_->nodes();
- CHECK_FLATBUFFERS_NOT_NULL(nodes, "Table.nodes");
- // pop off a field
- if (field_index >= static_cast<int>(nodes->size())) {
- return Status::Invalid("Ran out of field metadata, likely malformed");
- }
- const flatbuf::FieldNode* node = nodes->Get(field_index);
-
- out->length = node->length();
- out->null_count = node->null_count();
- out->offset = 0;
- return Status::OK();
- }
-
- Status LoadCommon(Type::type type_id) {
- // This only contains the length and null count, which we need to figure
- // out what to do with the buffers. For example, if null_count == 0, then
- // we can skip that buffer without reading from shared memory
- RETURN_NOT_OK(GetFieldMetadata(field_index_++, out_));
-
- if (internal::HasValidityBitmap(type_id, metadata_version_)) {
- // Extract null_bitmap which is common to all arrays except for unions
- // and nulls.
- if (out_->null_count != 0) {
- RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
- }
- buffer_index_++;
- }
- return Status::OK();
- }
-
- template <typename TYPE>
- Status LoadPrimitive(Type::type type_id) {
- out_->buffers.resize(2);
-
- RETURN_NOT_OK(LoadCommon(type_id));
- if (out_->length > 0) {
- RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
- } else {
- buffer_index_++;
- out_->buffers[1].reset(new Buffer(nullptr, 0));
- }
- return Status::OK();
- }
-
- template <typename TYPE>
- Status LoadBinary(Type::type type_id) {
- out_->buffers.resize(3);
-
- RETURN_NOT_OK(LoadCommon(type_id));
- RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
- return GetBuffer(buffer_index_++, &out_->buffers[2]);
- }
-
- template <typename TYPE>
- Status LoadList(const TYPE& type) {
- out_->buffers.resize(2);
-
- RETURN_NOT_OK(LoadCommon(type.id()));
- RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
-
- const int num_children = type.num_fields();
- if (num_children != 1) {
- return Status::Invalid("Wrong number of children: ", num_children);
- }
-
- return LoadChildren(type.fields());
- }
-
- Status LoadChildren(const std::vector<std::shared_ptr<Field>>& child_fields) {
- ArrayData* parent = out_;
-
- parent->child_data.resize(child_fields.size());
- for (int i = 0; i < static_cast<int>(child_fields.size()); ++i) {
- parent->child_data[i] = std::make_shared<ArrayData>();
- --max_recursion_depth_;
- RETURN_NOT_OK(Load(child_fields[i].get(), parent->child_data[i].get()));
- ++max_recursion_depth_;
- }
- out_ = parent;
- return Status::OK();
- }
-
- Status Visit(const NullType& type) {
- out_->buffers.resize(1);
-
- // ARROW-6379: NullType has no buffers in the IPC payload
- return GetFieldMetadata(field_index_++, out_);
- }
-
- template <typename T>
- enable_if_t<std::is_base_of<FixedWidthType, T>::value &&
- !std::is_base_of<FixedSizeBinaryType, T>::value &&
- !std::is_base_of<DictionaryType, T>::value,
- Status>
- Visit(const T& type) {
- return LoadPrimitive<T>(type.id());
- }
-
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T& type) {
- return LoadBinary<T>(type.id());
- }
-
- Status Visit(const FixedSizeBinaryType& type) {
- out_->buffers.resize(2);
- RETURN_NOT_OK(LoadCommon(type.id()));
- return GetBuffer(buffer_index_++, &out_->buffers[1]);
- }
-
- template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T& type) {
- return LoadList(type);
- }
-
- Status Visit(const MapType& type) {
- RETURN_NOT_OK(LoadList(type));
- return MapArray::ValidateChildData(out_->child_data);
- }
-
- Status Visit(const FixedSizeListType& type) {
- out_->buffers.resize(1);
-
- RETURN_NOT_OK(LoadCommon(type.id()));
-
- const int num_children = type.num_fields();
- if (num_children != 1) {
- return Status::Invalid("Wrong number of children: ", num_children);
- }
-
- return LoadChildren(type.fields());
- }
-
- Status Visit(const StructType& type) {
- out_->buffers.resize(1);
- RETURN_NOT_OK(LoadCommon(type.id()));
- return LoadChildren(type.fields());
- }
-
- Status Visit(const UnionType& type) {
- int n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
- out_->buffers.resize(n_buffers);
-
- RETURN_NOT_OK(LoadCommon(type.id()));
-
- // With metadata V4, we can get a validity bitmap.
- // Trying to fix up union data to do without the top-level validity bitmap
- // is hairy:
- // - type ids must be rewritten to all have valid values (even for former
- // null slots)
- // - sparse union children must have their validity bitmaps rewritten
- // by ANDing the top-level validity bitmap
- // - dense union children must be rewritten (at least one of them)
- // to insert the required null slots that were formerly omitted
- // So instead we bail out.
- if (out_->null_count != 0 && out_->buffers[0] != nullptr) {
- return Status::Invalid(
- "Cannot read pre-1.0.0 Union array with top-level validity bitmap");
- }
- out_->buffers[0] = nullptr;
- out_->null_count = 0;
-
- if (out_->length > 0) {
- RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[1]));
- if (type.mode() == UnionMode::DENSE) {
- RETURN_NOT_OK(GetBuffer(buffer_index_ + 1, &out_->buffers[2]));
- }
- }
- buffer_index_ += n_buffers - 1;
- return LoadChildren(type.fields());
- }
-
- Status Visit(const DictionaryType& type) {
- // out_->dictionary will be filled later in ResolveDictionaries()
- return LoadType(*type.index_type());
- }
-
- Status Visit(const ExtensionType& type) { return LoadType(*type.storage_type()); }
-
- private:
- const flatbuf::RecordBatch* metadata_;
- const MetadataVersion metadata_version_;
- io::RandomAccessFile* file_;
- int max_recursion_depth_;
- int buffer_index_ = 0;
- int field_index_ = 0;
- bool skip_io_ = false;
-
- const Field* field_;
- ArrayData* out_;
-};
-
-Result<std::shared_ptr<Buffer>> DecompressBuffer(const std::shared_ptr<Buffer>& buf,
- const IpcReadOptions& options,
- util::Codec* codec) {
- if (buf == nullptr || buf->size() == 0) {
- return buf;
- }
-
- if (buf->size() < 8) {
- return Status::Invalid(
- "Likely corrupted message, compressed buffers "
- "are larger than 8 bytes by construction");
- }
-
- const uint8_t* data = buf->data();
- int64_t compressed_size = buf->size() - sizeof(int64_t);
- int64_t uncompressed_size = BitUtil::FromLittleEndian(util::SafeLoadAs<int64_t>(data));
-
- ARROW_ASSIGN_OR_RAISE(auto uncompressed,
- AllocateBuffer(uncompressed_size, options.memory_pool));
-
- ARROW_ASSIGN_OR_RAISE(
- int64_t actual_decompressed,
- codec->Decompress(compressed_size, data + sizeof(int64_t), uncompressed_size,
- uncompressed->mutable_data()));
- if (actual_decompressed != uncompressed_size) {
- return Status::Invalid("Failed to fully decompress buffer, expected ",
- uncompressed_size, " bytes but decompressed ",
- actual_decompressed);
- }
-
- return std::move(uncompressed);
-}
-
-Status DecompressBuffers(Compression::type compression, const IpcReadOptions& options,
- ArrayDataVector* fields) {
- struct BufferAccumulator {
- using BufferPtrVector = std::vector<std::shared_ptr<Buffer>*>;
-
- void AppendFrom(const ArrayDataVector& fields) {
- for (const auto& field : fields) {
- for (auto& buffer : field->buffers) {
- buffers_.push_back(&buffer);
- }
- AppendFrom(field->child_data);
- }
- }
-
- BufferPtrVector Get(const ArrayDataVector& fields) && {
- AppendFrom(fields);
- return std::move(buffers_);
- }
-
- BufferPtrVector buffers_;
- };
-
- // Flatten all buffers
- auto buffers = BufferAccumulator{}.Get(*fields);
-
- std::unique_ptr<util::Codec> codec;
- ARROW_ASSIGN_OR_RAISE(codec, util::Codec::Create(compression));
-
- return ::arrow::internal::OptionalParallelFor(
- options.use_threads, static_cast<int>(buffers.size()), [&](int i) {
- ARROW_ASSIGN_OR_RAISE(*buffers[i],
- DecompressBuffer(*buffers[i], options, codec.get()));
- return Status::OK();
- });
-}
-
-Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
- const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
+/// The field_index and buffer_index are incremented based on how much of the
+/// batch is "consumed" (through nested data reconstruction, for example)
+class ArrayLoader {
+ public:
+ explicit ArrayLoader(const flatbuf::RecordBatch* metadata,
+ MetadataVersion metadata_version, const IpcReadOptions& options,
+ io::RandomAccessFile* file)
+ : metadata_(metadata),
+ metadata_version_(metadata_version),
+ file_(file),
+ max_recursion_depth_(options.max_recursion_depth) {}
+
+ Status ReadBuffer(int64_t offset, int64_t length, std::shared_ptr<Buffer>* out) {
+ if (skip_io_) {
+ return Status::OK();
+ }
+ if (offset < 0) {
+ return Status::Invalid("Negative offset for reading buffer ", buffer_index_);
+ }
+ if (length < 0) {
+ return Status::Invalid("Negative length for reading buffer ", buffer_index_);
+ }
+ // This construct permits overriding GetBuffer at compile time
+ if (!BitUtil::IsMultipleOf8(offset)) {
+ return Status::Invalid("Buffer ", buffer_index_,
+ " did not start on 8-byte aligned offset: ", offset);
+ }
+ return file_->ReadAt(offset, length).Value(out);
+ }
+
+ Status LoadType(const DataType& type) { return VisitTypeInline(type, this); }
+
+ Status Load(const Field* field, ArrayData* out) {
+ if (max_recursion_depth_ <= 0) {
+ return Status::Invalid("Max recursion depth reached");
+ }
+
+ field_ = field;
+ out_ = out;
+ out_->type = field_->type();
+ return LoadType(*field_->type());
+ }
+
+ Status SkipField(const Field* field) {
+ ArrayData dummy;
+ skip_io_ = true;
+ Status status = Load(field, &dummy);
+ skip_io_ = false;
+ return status;
+ }
+
+ Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
+ auto buffers = metadata_->buffers();
+ CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
+ if (buffer_index >= static_cast<int>(buffers->size())) {
+ return Status::IOError("buffer_index out of range.");
+ }
+ const flatbuf::Buffer* buffer = buffers->Get(buffer_index);
+ if (buffer->length() == 0) {
+ // Should never return a null buffer here.
+ // (zero-sized buffer allocations are cheap)
+ return AllocateBuffer(0).Value(out);
+ } else {
+ return ReadBuffer(buffer->offset(), buffer->length(), out);
+ }
+ }
+
+ Status GetFieldMetadata(int field_index, ArrayData* out) {
+ auto nodes = metadata_->nodes();
+ CHECK_FLATBUFFERS_NOT_NULL(nodes, "Table.nodes");
+ // pop off a field
+ if (field_index >= static_cast<int>(nodes->size())) {
+ return Status::Invalid("Ran out of field metadata, likely malformed");
+ }
+ const flatbuf::FieldNode* node = nodes->Get(field_index);
+
+ out->length = node->length();
+ out->null_count = node->null_count();
+ out->offset = 0;
+ return Status::OK();
+ }
+
+ Status LoadCommon(Type::type type_id) {
+ // This only contains the length and null count, which we need to figure
+ // out what to do with the buffers. For example, if null_count == 0, then
+ // we can skip that buffer without reading from shared memory
+ RETURN_NOT_OK(GetFieldMetadata(field_index_++, out_));
+
+ if (internal::HasValidityBitmap(type_id, metadata_version_)) {
+ // Extract null_bitmap which is common to all arrays except for unions
+ // and nulls.
+ if (out_->null_count != 0) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+ }
+ buffer_index_++;
+ }
+ return Status::OK();
+ }
+
+ template <typename TYPE>
+ Status LoadPrimitive(Type::type type_id) {
+ out_->buffers.resize(2);
+
+ RETURN_NOT_OK(LoadCommon(type_id));
+ if (out_->length > 0) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
+ } else {
+ buffer_index_++;
+ out_->buffers[1].reset(new Buffer(nullptr, 0));
+ }
+ return Status::OK();
+ }
+
+ template <typename TYPE>
+ Status LoadBinary(Type::type type_id) {
+ out_->buffers.resize(3);
+
+ RETURN_NOT_OK(LoadCommon(type_id));
+ RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
+ return GetBuffer(buffer_index_++, &out_->buffers[2]);
+ }
+
+ template <typename TYPE>
+ Status LoadList(const TYPE& type) {
+ out_->buffers.resize(2);
+
+ RETURN_NOT_OK(LoadCommon(type.id()));
+ RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
+
+ const int num_children = type.num_fields();
+ if (num_children != 1) {
+ return Status::Invalid("Wrong number of children: ", num_children);
+ }
+
+ return LoadChildren(type.fields());
+ }
+
+ Status LoadChildren(const std::vector<std::shared_ptr<Field>>& child_fields) {
+ ArrayData* parent = out_;
+
+ parent->child_data.resize(child_fields.size());
+ for (int i = 0; i < static_cast<int>(child_fields.size()); ++i) {
+ parent->child_data[i] = std::make_shared<ArrayData>();
+ --max_recursion_depth_;
+ RETURN_NOT_OK(Load(child_fields[i].get(), parent->child_data[i].get()));
+ ++max_recursion_depth_;
+ }
+ out_ = parent;
+ return Status::OK();
+ }
+
+ Status Visit(const NullType& type) {
+ out_->buffers.resize(1);
+
+ // ARROW-6379: NullType has no buffers in the IPC payload
+ return GetFieldMetadata(field_index_++, out_);
+ }
+
+ template <typename T>
+ enable_if_t<std::is_base_of<FixedWidthType, T>::value &&
+ !std::is_base_of<FixedSizeBinaryType, T>::value &&
+ !std::is_base_of<DictionaryType, T>::value,
+ Status>
+ Visit(const T& type) {
+ return LoadPrimitive<T>(type.id());
+ }
+
+ template <typename T>
+ enable_if_base_binary<T, Status> Visit(const T& type) {
+ return LoadBinary<T>(type.id());
+ }
+
+ Status Visit(const FixedSizeBinaryType& type) {
+ out_->buffers.resize(2);
+ RETURN_NOT_OK(LoadCommon(type.id()));
+ return GetBuffer(buffer_index_++, &out_->buffers[1]);
+ }
+
+ template <typename T>
+ enable_if_var_size_list<T, Status> Visit(const T& type) {
+ return LoadList(type);
+ }
+
+ Status Visit(const MapType& type) {
+ RETURN_NOT_OK(LoadList(type));
+ return MapArray::ValidateChildData(out_->child_data);
+ }
+
+ Status Visit(const FixedSizeListType& type) {
+ out_->buffers.resize(1);
+
+ RETURN_NOT_OK(LoadCommon(type.id()));
+
+ const int num_children = type.num_fields();
+ if (num_children != 1) {
+ return Status::Invalid("Wrong number of children: ", num_children);
+ }
+
+ return LoadChildren(type.fields());
+ }
+
+ Status Visit(const StructType& type) {
+ out_->buffers.resize(1);
+ RETURN_NOT_OK(LoadCommon(type.id()));
+ return LoadChildren(type.fields());
+ }
+
+ Status Visit(const UnionType& type) {
+ int n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
+ out_->buffers.resize(n_buffers);
+
+ RETURN_NOT_OK(LoadCommon(type.id()));
+
+ // With metadata V4, we can get a validity bitmap.
+ // Trying to fix up union data to do without the top-level validity bitmap
+ // is hairy:
+ // - type ids must be rewritten to all have valid values (even for former
+ // null slots)
+ // - sparse union children must have their validity bitmaps rewritten
+ // by ANDing the top-level validity bitmap
+ // - dense union children must be rewritten (at least one of them)
+ // to insert the required null slots that were formerly omitted
+ // So instead we bail out.
+ if (out_->null_count != 0 && out_->buffers[0] != nullptr) {
+ return Status::Invalid(
+ "Cannot read pre-1.0.0 Union array with top-level validity bitmap");
+ }
+ out_->buffers[0] = nullptr;
+ out_->null_count = 0;
+
+ if (out_->length > 0) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[1]));
+ if (type.mode() == UnionMode::DENSE) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_ + 1, &out_->buffers[2]));
+ }
+ }
+ buffer_index_ += n_buffers - 1;
+ return LoadChildren(type.fields());
+ }
+
+ Status Visit(const DictionaryType& type) {
+ // out_->dictionary will be filled later in ResolveDictionaries()
+ return LoadType(*type.index_type());
+ }
+
+ Status Visit(const ExtensionType& type) { return LoadType(*type.storage_type()); }
+
+ private:
+ const flatbuf::RecordBatch* metadata_;
+ const MetadataVersion metadata_version_;
+ io::RandomAccessFile* file_;
+ int max_recursion_depth_;
+ int buffer_index_ = 0;
+ int field_index_ = 0;
+ bool skip_io_ = false;
+
+ const Field* field_;
+ ArrayData* out_;
+};
+
+Result<std::shared_ptr<Buffer>> DecompressBuffer(const std::shared_ptr<Buffer>& buf,
+ const IpcReadOptions& options,
+ util::Codec* codec) {
+ if (buf == nullptr || buf->size() == 0) {
+ return buf;
+ }
+
+ if (buf->size() < 8) {
+ return Status::Invalid(
+ "Likely corrupted message, compressed buffers "
+ "are larger than 8 bytes by construction");
+ }
+
+ const uint8_t* data = buf->data();
+ int64_t compressed_size = buf->size() - sizeof(int64_t);
+ int64_t uncompressed_size = BitUtil::FromLittleEndian(util::SafeLoadAs<int64_t>(data));
+
+ ARROW_ASSIGN_OR_RAISE(auto uncompressed,
+ AllocateBuffer(uncompressed_size, options.memory_pool));
+
+ ARROW_ASSIGN_OR_RAISE(
+ int64_t actual_decompressed,
+ codec->Decompress(compressed_size, data + sizeof(int64_t), uncompressed_size,
+ uncompressed->mutable_data()));
+ if (actual_decompressed != uncompressed_size) {
+ return Status::Invalid("Failed to fully decompress buffer, expected ",
+ uncompressed_size, " bytes but decompressed ",
+ actual_decompressed);
+ }
+
+ return std::move(uncompressed);
+}
+
+Status DecompressBuffers(Compression::type compression, const IpcReadOptions& options,
+ ArrayDataVector* fields) {
+ struct BufferAccumulator {
+ using BufferPtrVector = std::vector<std::shared_ptr<Buffer>*>;
+
+ void AppendFrom(const ArrayDataVector& fields) {
+ for (const auto& field : fields) {
+ for (auto& buffer : field->buffers) {
+ buffers_.push_back(&buffer);
+ }
+ AppendFrom(field->child_data);
+ }
+ }
+
+ BufferPtrVector Get(const ArrayDataVector& fields) && {
+ AppendFrom(fields);
+ return std::move(buffers_);
+ }
+
+ BufferPtrVector buffers_;
+ };
+
+ // Flatten all buffers
+ auto buffers = BufferAccumulator{}.Get(*fields);
+
+ std::unique_ptr<util::Codec> codec;
+ ARROW_ASSIGN_OR_RAISE(codec, util::Codec::Create(compression));
+
+ return ::arrow::internal::OptionalParallelFor(
+ options.use_threads, static_cast<int>(buffers.size()), [&](int i) {
+ ARROW_ASSIGN_OR_RAISE(*buffers[i],
+ DecompressBuffer(*buffers[i], options, codec.get()));
+ return Status::OK();
+ });
+}
+
+Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
+ const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
const std::vector<bool>* inclusion_mask, const IpcReadContext& context,
io::RandomAccessFile* file) {
ArrayLoader loader(metadata, context.metadata_version, context.options, file);
-
- ArrayDataVector columns(schema->num_fields());
- ArrayDataVector filtered_columns;
- FieldVector filtered_fields;
- std::shared_ptr<Schema> filtered_schema;
-
- for (int i = 0; i < schema->num_fields(); ++i) {
- const Field& field = *schema->field(i);
- if (!inclusion_mask || (*inclusion_mask)[i]) {
- // Read field
- auto column = std::make_shared<ArrayData>();
- RETURN_NOT_OK(loader.Load(&field, column.get()));
- if (metadata->length() != column->length) {
- return Status::IOError("Array length did not match record batch length");
- }
- columns[i] = std::move(column);
- if (inclusion_mask) {
- filtered_columns.push_back(columns[i]);
- filtered_fields.push_back(schema->field(i));
- }
- } else {
- // Skip field. This logic must be executed to advance the state of the
- // loader to the next field
- RETURN_NOT_OK(loader.SkipField(&field));
- }
- }
-
- // Dictionary resolution needs to happen on the unfiltered columns,
- // because fields are mapped structurally (by path in the original schema).
+
+ ArrayDataVector columns(schema->num_fields());
+ ArrayDataVector filtered_columns;
+ FieldVector filtered_fields;
+ std::shared_ptr<Schema> filtered_schema;
+
+ for (int i = 0; i < schema->num_fields(); ++i) {
+ const Field& field = *schema->field(i);
+ if (!inclusion_mask || (*inclusion_mask)[i]) {
+ // Read field
+ auto column = std::make_shared<ArrayData>();
+ RETURN_NOT_OK(loader.Load(&field, column.get()));
+ if (metadata->length() != column->length) {
+ return Status::IOError("Array length did not match record batch length");
+ }
+ columns[i] = std::move(column);
+ if (inclusion_mask) {
+ filtered_columns.push_back(columns[i]);
+ filtered_fields.push_back(schema->field(i));
+ }
+ } else {
+ // Skip field. This logic must be executed to advance the state of the
+ // loader to the next field
+ RETURN_NOT_OK(loader.SkipField(&field));
+ }
+ }
+
+ // Dictionary resolution needs to happen on the unfiltered columns,
+ // because fields are mapped structurally (by path in the original schema).
RETURN_NOT_OK(ResolveDictionaries(columns, *context.dictionary_memo,
context.options.memory_pool));
-
- if (inclusion_mask) {
- filtered_schema = ::arrow::schema(std::move(filtered_fields), schema->metadata());
- columns.clear();
- } else {
- filtered_schema = schema;
- filtered_columns = std::move(columns);
- }
+
+ if (inclusion_mask) {
+ filtered_schema = ::arrow::schema(std::move(filtered_fields), schema->metadata());
+ columns.clear();
+ } else {
+ filtered_schema = schema;
+ filtered_columns = std::move(columns);
+ }
if (context.compression != Compression::UNCOMPRESSED) {
RETURN_NOT_OK(
DecompressBuffers(context.compression, context.options, &filtered_columns));
- }
-
+ }
+
// swap endian in a set of ArrayData if necessary (swap_endian == true)
if (context.swap_endian) {
for (int i = 0; i < static_cast<int>(filtered_columns.size()); ++i) {
@@ -522,161 +522,161 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
}
}
return RecordBatch::Make(std::move(filtered_schema), metadata->length(),
- std::move(filtered_columns));
-}
-
-Result<std::shared_ptr<RecordBatch>> LoadRecordBatch(
- const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
+ std::move(filtered_columns));
+}
+
+Result<std::shared_ptr<RecordBatch>> LoadRecordBatch(
+ const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
const std::vector<bool>& inclusion_mask, const IpcReadContext& context,
io::RandomAccessFile* file) {
- if (inclusion_mask.size() > 0) {
+ if (inclusion_mask.size() > 0) {
return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
- } else {
+ } else {
return LoadRecordBatchSubset(metadata, schema, /*param_name=*/nullptr, context, file);
- }
-}
-
-// ----------------------------------------------------------------------
-// Array loading
-
-Status GetCompression(const flatbuf::RecordBatch* batch, Compression::type* out) {
- *out = Compression::UNCOMPRESSED;
- const flatbuf::BodyCompression* compression = batch->compression();
- if (compression != nullptr) {
- if (compression->method() != flatbuf::BodyCompressionMethod::BUFFER) {
- // Forward compatibility
- return Status::Invalid("This library only supports BUFFER compression method");
- }
-
- if (compression->codec() == flatbuf::CompressionType::LZ4_FRAME) {
- *out = Compression::LZ4_FRAME;
- } else if (compression->codec() == flatbuf::CompressionType::ZSTD) {
- *out = Compression::ZSTD;
- } else {
- return Status::Invalid("Unsupported codec in RecordBatch::compression metadata");
- }
- return Status::OK();
- }
- return Status::OK();
-}
-
-Status GetCompressionExperimental(const flatbuf::Message* message,
- Compression::type* out) {
- *out = Compression::UNCOMPRESSED;
- if (message->custom_metadata() != nullptr) {
- // TODO: Ensure this deserialization only ever happens once
- std::shared_ptr<KeyValueMetadata> metadata;
- RETURN_NOT_OK(internal::GetKeyValueMetadata(message->custom_metadata(), &metadata));
- int index = metadata->FindKey("ARROW:experimental_compression");
- if (index != -1) {
+ }
+}
+
+// ----------------------------------------------------------------------
+// Array loading
+
+Status GetCompression(const flatbuf::RecordBatch* batch, Compression::type* out) {
+ *out = Compression::UNCOMPRESSED;
+ const flatbuf::BodyCompression* compression = batch->compression();
+ if (compression != nullptr) {
+ if (compression->method() != flatbuf::BodyCompressionMethod::BUFFER) {
+ // Forward compatibility
+ return Status::Invalid("This library only supports BUFFER compression method");
+ }
+
+ if (compression->codec() == flatbuf::CompressionType::LZ4_FRAME) {
+ *out = Compression::LZ4_FRAME;
+ } else if (compression->codec() == flatbuf::CompressionType::ZSTD) {
+ *out = Compression::ZSTD;
+ } else {
+ return Status::Invalid("Unsupported codec in RecordBatch::compression metadata");
+ }
+ return Status::OK();
+ }
+ return Status::OK();
+}
+
+Status GetCompressionExperimental(const flatbuf::Message* message,
+ Compression::type* out) {
+ *out = Compression::UNCOMPRESSED;
+ if (message->custom_metadata() != nullptr) {
+ // TODO: Ensure this deserialization only ever happens once
+ std::shared_ptr<KeyValueMetadata> metadata;
+ RETURN_NOT_OK(internal::GetKeyValueMetadata(message->custom_metadata(), &metadata));
+ int index = metadata->FindKey("ARROW:experimental_compression");
+ if (index != -1) {
// Arrow 0.17 stored string in upper case, internal utils now require lower case
auto name = arrow::internal::AsciiToLower(metadata->value(index));
ARROW_ASSIGN_OR_RAISE(*out, util::Codec::GetCompressionType(name));
- }
- return internal::CheckCompressionSupported(*out);
- }
- return Status::OK();
-}
-
-static Status ReadContiguousPayload(io::InputStream* file,
- std::unique_ptr<Message>* message) {
- ARROW_ASSIGN_OR_RAISE(*message, ReadMessage(file));
- if (*message == nullptr) {
- return Status::Invalid("Unable to read metadata at offset");
- }
- return Status::OK();
-}
-
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo,
- const IpcReadOptions& options, io::InputStream* file) {
- std::unique_ptr<Message> message;
- RETURN_NOT_OK(ReadContiguousPayload(file, &message));
- CHECK_HAS_BODY(*message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
- return ReadRecordBatch(*message->metadata(), schema, dictionary_memo, options,
- reader.get());
-}
-
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const Message& message, const std::shared_ptr<Schema>& schema,
- const DictionaryMemo* dictionary_memo, const IpcReadOptions& options) {
- CHECK_MESSAGE_TYPE(MessageType::RECORD_BATCH, message.type());
- CHECK_HAS_BODY(message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
- return ReadRecordBatch(*message.metadata(), schema, dictionary_memo, options,
- reader.get());
-}
-
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatchInternal(
- const Buffer& metadata, const std::shared_ptr<Schema>& schema,
+ }
+ return internal::CheckCompressionSupported(*out);
+ }
+ return Status::OK();
+}
+
+static Status ReadContiguousPayload(io::InputStream* file,
+ std::unique_ptr<Message>* message) {
+ ARROW_ASSIGN_OR_RAISE(*message, ReadMessage(file));
+ if (*message == nullptr) {
+ return Status::Invalid("Unable to read metadata at offset");
+ }
+ return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo,
+ const IpcReadOptions& options, io::InputStream* file) {
+ std::unique_ptr<Message> message;
+ RETURN_NOT_OK(ReadContiguousPayload(file, &message));
+ CHECK_HAS_BODY(*message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+ return ReadRecordBatch(*message->metadata(), schema, dictionary_memo, options,
+ reader.get());
+}
+
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const Message& message, const std::shared_ptr<Schema>& schema,
+ const DictionaryMemo* dictionary_memo, const IpcReadOptions& options) {
+ CHECK_MESSAGE_TYPE(MessageType::RECORD_BATCH, message.type());
+ CHECK_HAS_BODY(message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
+ return ReadRecordBatch(*message.metadata(), schema, dictionary_memo, options,
+ reader.get());
+}
+
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatchInternal(
+ const Buffer& metadata, const std::shared_ptr<Schema>& schema,
const std::vector<bool>& inclusion_mask, IpcReadContext& context,
io::RandomAccessFile* file) {
- const flatbuf::Message* message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
- auto batch = message->header_as_RecordBatch();
- if (batch == nullptr) {
- return Status::IOError(
- "Header-type of flatbuffer-encoded Message is not RecordBatch.");
- }
-
- Compression::type compression;
- RETURN_NOT_OK(GetCompression(batch, &compression));
+ const flatbuf::Message* message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
+ auto batch = message->header_as_RecordBatch();
+ if (batch == nullptr) {
+ return Status::IOError(
+ "Header-type of flatbuffer-encoded Message is not RecordBatch.");
+ }
+
+ Compression::type compression;
+ RETURN_NOT_OK(GetCompression(batch, &compression));
if (context.compression == Compression::UNCOMPRESSED &&
- message->version() == flatbuf::MetadataVersion::V4) {
- // Possibly obtain codec information from experimental serialization format
- // in 0.17.x
- RETURN_NOT_OK(GetCompressionExperimental(message, &compression));
- }
+ message->version() == flatbuf::MetadataVersion::V4) {
+ // Possibly obtain codec information from experimental serialization format
+ // in 0.17.x
+ RETURN_NOT_OK(GetCompressionExperimental(message, &compression));
+ }
context.compression = compression;
context.metadata_version = internal::GetMetadataVersion(message->version());
return LoadRecordBatch(batch, schema, inclusion_mask, context, file);
-}
-
-// If we are selecting only certain fields, populate an inclusion mask for fast lookups.
-// Additionally, drop deselected fields from the reader's schema.
-Status GetInclusionMaskAndOutSchema(const std::shared_ptr<Schema>& full_schema,
- const std::vector<int>& included_indices,
- std::vector<bool>* inclusion_mask,
- std::shared_ptr<Schema>* out_schema) {
- inclusion_mask->clear();
- if (included_indices.empty()) {
- *out_schema = full_schema;
- return Status::OK();
- }
-
- inclusion_mask->resize(full_schema->num_fields(), false);
-
- auto included_indices_sorted = included_indices;
- std::sort(included_indices_sorted.begin(), included_indices_sorted.end());
-
- FieldVector included_fields;
- for (int i : included_indices_sorted) {
- // Ignore out of bounds indices
- if (i < 0 || i >= full_schema->num_fields()) {
- return Status::Invalid("Out of bounds field index: ", i);
- }
-
- if (inclusion_mask->at(i)) continue;
-
- inclusion_mask->at(i) = true;
- included_fields.push_back(full_schema->field(i));
- }
-
+}
+
+// If we are selecting only certain fields, populate an inclusion mask for fast lookups.
+// Additionally, drop deselected fields from the reader's schema.
+Status GetInclusionMaskAndOutSchema(const std::shared_ptr<Schema>& full_schema,
+ const std::vector<int>& included_indices,
+ std::vector<bool>* inclusion_mask,
+ std::shared_ptr<Schema>* out_schema) {
+ inclusion_mask->clear();
+ if (included_indices.empty()) {
+ *out_schema = full_schema;
+ return Status::OK();
+ }
+
+ inclusion_mask->resize(full_schema->num_fields(), false);
+
+ auto included_indices_sorted = included_indices;
+ std::sort(included_indices_sorted.begin(), included_indices_sorted.end());
+
+ FieldVector included_fields;
+ for (int i : included_indices_sorted) {
+ // Ignore out of bounds indices
+ if (i < 0 || i >= full_schema->num_fields()) {
+ return Status::Invalid("Out of bounds field index: ", i);
+ }
+
+ if (inclusion_mask->at(i)) continue;
+
+ inclusion_mask->at(i) = true;
+ included_fields.push_back(full_schema->field(i));
+ }
+
*out_schema = schema(std::move(included_fields), full_schema->endianness(),
full_schema->metadata());
- return Status::OK();
-}
-
-Status UnpackSchemaMessage(const void* opaque_schema, const IpcReadOptions& options,
- DictionaryMemo* dictionary_memo,
- std::shared_ptr<Schema>* schema,
- std::shared_ptr<Schema>* out_schema,
+ return Status::OK();
+}
+
+Status UnpackSchemaMessage(const void* opaque_schema, const IpcReadOptions& options,
+ DictionaryMemo* dictionary_memo,
+ std::shared_ptr<Schema>* schema,
+ std::shared_ptr<Schema>* out_schema,
std::vector<bool>* field_inclusion_mask, bool* swap_endian) {
- RETURN_NOT_OK(internal::GetSchema(opaque_schema, dictionary_memo, schema));
-
- // If we are selecting only certain fields, populate the inclusion mask now
- // for fast lookups
+ RETURN_NOT_OK(internal::GetSchema(opaque_schema, dictionary_memo, schema));
+
+ // If we are selecting only certain fields, populate the inclusion mask now
+ // for fast lookups
RETURN_NOT_OK(GetInclusionMaskAndOutSchema(*schema, options.included_fields,
field_inclusion_mask, out_schema));
*swap_endian = options.ensure_native_endian && !out_schema->get()->is_native_endian();
@@ -686,287 +686,287 @@ Status UnpackSchemaMessage(const void* opaque_schema, const IpcReadOptions& opti
*out_schema = out_schema->get()->WithEndianness(Endianness::Native);
}
return Status::OK();
-}
-
-Status UnpackSchemaMessage(const Message& message, const IpcReadOptions& options,
- DictionaryMemo* dictionary_memo,
- std::shared_ptr<Schema>* schema,
- std::shared_ptr<Schema>* out_schema,
+}
+
+Status UnpackSchemaMessage(const Message& message, const IpcReadOptions& options,
+ DictionaryMemo* dictionary_memo,
+ std::shared_ptr<Schema>* schema,
+ std::shared_ptr<Schema>* out_schema,
std::vector<bool>* field_inclusion_mask, bool* swap_endian) {
- CHECK_MESSAGE_TYPE(MessageType::SCHEMA, message.type());
- CHECK_HAS_NO_BODY(message);
-
- return UnpackSchemaMessage(message.header(), options, dictionary_memo, schema,
+ CHECK_MESSAGE_TYPE(MessageType::SCHEMA, message.type());
+ CHECK_HAS_NO_BODY(message);
+
+ return UnpackSchemaMessage(message.header(), options, dictionary_memo, schema,
out_schema, field_inclusion_mask, swap_endian);
-}
-
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const Buffer& metadata, const std::shared_ptr<Schema>& schema,
- const DictionaryMemo* dictionary_memo, const IpcReadOptions& options,
- io::RandomAccessFile* file) {
- std::shared_ptr<Schema> out_schema;
- // Empty means do not use
- std::vector<bool> inclusion_mask;
+}
+
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const Buffer& metadata, const std::shared_ptr<Schema>& schema,
+ const DictionaryMemo* dictionary_memo, const IpcReadOptions& options,
+ io::RandomAccessFile* file) {
+ std::shared_ptr<Schema> out_schema;
+ // Empty means do not use
+ std::vector<bool> inclusion_mask;
IpcReadContext context(const_cast<DictionaryMemo*>(dictionary_memo), options, false);
RETURN_NOT_OK(GetInclusionMaskAndOutSchema(schema, context.options.included_fields,
- &inclusion_mask, &out_schema));
+ &inclusion_mask, &out_schema));
return ReadRecordBatchInternal(metadata, schema, inclusion_mask, context, file);
-}
-
+}
+
Status ReadDictionary(const Buffer& metadata, const IpcReadContext& context,
DictionaryKind* kind, io::RandomAccessFile* file) {
- const flatbuf::Message* message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
- const auto dictionary_batch = message->header_as_DictionaryBatch();
- if (dictionary_batch == nullptr) {
- return Status::IOError(
- "Header-type of flatbuffer-encoded Message is not DictionaryBatch.");
- }
-
- // The dictionary is embedded in a record batch with a single column
- const auto batch_meta = dictionary_batch->data();
-
- CHECK_FLATBUFFERS_NOT_NULL(batch_meta, "DictionaryBatch.data");
-
- Compression::type compression;
- RETURN_NOT_OK(GetCompression(batch_meta, &compression));
- if (compression == Compression::UNCOMPRESSED &&
- message->version() == flatbuf::MetadataVersion::V4) {
- // Possibly obtain codec information from experimental serialization format
- // in 0.17.x
- RETURN_NOT_OK(GetCompressionExperimental(message, &compression));
- }
-
- const int64_t id = dictionary_batch->id();
-
- // Look up the dictionary value type, which must have been added to the
- // DictionaryMemo already prior to invoking this function
+ const flatbuf::Message* message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
+ const auto dictionary_batch = message->header_as_DictionaryBatch();
+ if (dictionary_batch == nullptr) {
+ return Status::IOError(
+ "Header-type of flatbuffer-encoded Message is not DictionaryBatch.");
+ }
+
+ // The dictionary is embedded in a record batch with a single column
+ const auto batch_meta = dictionary_batch->data();
+
+ CHECK_FLATBUFFERS_NOT_NULL(batch_meta, "DictionaryBatch.data");
+
+ Compression::type compression;
+ RETURN_NOT_OK(GetCompression(batch_meta, &compression));
+ if (compression == Compression::UNCOMPRESSED &&
+ message->version() == flatbuf::MetadataVersion::V4) {
+ // Possibly obtain codec information from experimental serialization format
+ // in 0.17.x
+ RETURN_NOT_OK(GetCompressionExperimental(message, &compression));
+ }
+
+ const int64_t id = dictionary_batch->id();
+
+ // Look up the dictionary value type, which must have been added to the
+ // DictionaryMemo already prior to invoking this function
ARROW_ASSIGN_OR_RAISE(auto value_type, context.dictionary_memo->GetDictionaryType(id));
-
- // Load the dictionary data from the dictionary batch
- ArrayLoader loader(batch_meta, internal::GetMetadataVersion(message->version()),
+
+ // Load the dictionary data from the dictionary batch
+ ArrayLoader loader(batch_meta, internal::GetMetadataVersion(message->version()),
context.options, file);
auto dict_data = std::make_shared<ArrayData>();
- const Field dummy_field("", value_type);
- RETURN_NOT_OK(loader.Load(&dummy_field, dict_data.get()));
-
- if (compression != Compression::UNCOMPRESSED) {
- ArrayDataVector dict_fields{dict_data};
+ const Field dummy_field("", value_type);
+ RETURN_NOT_OK(loader.Load(&dummy_field, dict_data.get()));
+
+ if (compression != Compression::UNCOMPRESSED) {
+ ArrayDataVector dict_fields{dict_data};
RETURN_NOT_OK(DecompressBuffers(compression, context.options, &dict_fields));
- }
-
+ }
+
// swap endian in dict_data if necessary (swap_endian == true)
if (context.swap_endian) {
ARROW_ASSIGN_OR_RAISE(dict_data, ::arrow::internal::SwapEndianArrayData(dict_data));
}
- if (dictionary_batch->isDelta()) {
- if (kind != nullptr) {
- *kind = DictionaryKind::Delta;
- }
+ if (dictionary_batch->isDelta()) {
+ if (kind != nullptr) {
+ *kind = DictionaryKind::Delta;
+ }
return context.dictionary_memo->AddDictionaryDelta(id, dict_data);
- }
- ARROW_ASSIGN_OR_RAISE(bool inserted,
+ }
+ ARROW_ASSIGN_OR_RAISE(bool inserted,
context.dictionary_memo->AddOrReplaceDictionary(id, dict_data));
- if (kind != nullptr) {
- *kind = inserted ? DictionaryKind::New : DictionaryKind::Replacement;
- }
- return Status::OK();
-}
-
+ if (kind != nullptr) {
+ *kind = inserted ? DictionaryKind::New : DictionaryKind::Replacement;
+ }
+ return Status::OK();
+}
+
Status ReadDictionary(const Message& message, const IpcReadContext& context,
DictionaryKind* kind) {
- // Only invoke this method if we already know we have a dictionary message
- DCHECK_EQ(message.type(), MessageType::DICTIONARY_BATCH);
- CHECK_HAS_BODY(message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
+ // Only invoke this method if we already know we have a dictionary message
+ DCHECK_EQ(message.type(), MessageType::DICTIONARY_BATCH);
+ CHECK_HAS_BODY(message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
return ReadDictionary(*message.metadata(), context, kind, reader.get());
-}
-
-// ----------------------------------------------------------------------
-// RecordBatchStreamReader implementation
-
-class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
- public:
- Status Open(std::unique_ptr<MessageReader> message_reader,
- const IpcReadOptions& options) {
- message_reader_ = std::move(message_reader);
- options_ = options;
-
- // Read schema
- ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message, ReadNextMessage());
- if (!message) {
- return Status::Invalid("Tried reading schema message, was null or length 0");
- }
-
+}
+
+// ----------------------------------------------------------------------
+// RecordBatchStreamReader implementation
+
+class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
+ public:
+ Status Open(std::unique_ptr<MessageReader> message_reader,
+ const IpcReadOptions& options) {
+ message_reader_ = std::move(message_reader);
+ options_ = options;
+
+ // Read schema
+ ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message, ReadNextMessage());
+ if (!message) {
+ return Status::Invalid("Tried reading schema message, was null or length 0");
+ }
+
RETURN_NOT_OK(UnpackSchemaMessage(*message, options, &dictionary_memo_, &schema_,
&out_schema_, &field_inclusion_mask_,
&swap_endian_));
return Status::OK();
- }
-
- Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
- if (!have_read_initial_dictionaries_) {
- RETURN_NOT_OK(ReadInitialDictionaries());
- }
-
- if (empty_stream_) {
- // ARROW-6006: Degenerate case where stream contains no data, we do not
- // bother trying to read a RecordBatch message from the stream
- *batch = nullptr;
- return Status::OK();
- }
-
- // Continue to read other dictionaries, if any
- std::unique_ptr<Message> message;
- ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
-
- while (message != nullptr && message->type() == MessageType::DICTIONARY_BATCH) {
- RETURN_NOT_OK(ReadDictionary(*message));
- ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
- }
-
- if (message == nullptr) {
- // End of stream
- *batch = nullptr;
- return Status::OK();
- }
-
- CHECK_HAS_BODY(*message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+ }
+
+ Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+ if (!have_read_initial_dictionaries_) {
+ RETURN_NOT_OK(ReadInitialDictionaries());
+ }
+
+ if (empty_stream_) {
+ // ARROW-6006: Degenerate case where stream contains no data, we do not
+ // bother trying to read a RecordBatch message from the stream
+ *batch = nullptr;
+ return Status::OK();
+ }
+
+ // Continue to read other dictionaries, if any
+ std::unique_ptr<Message> message;
+ ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
+
+ while (message != nullptr && message->type() == MessageType::DICTIONARY_BATCH) {
+ RETURN_NOT_OK(ReadDictionary(*message));
+ ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
+ }
+
+ if (message == nullptr) {
+ // End of stream
+ *batch = nullptr;
+ return Status::OK();
+ }
+
+ CHECK_HAS_BODY(*message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
- return ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
+ return ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
context, reader.get())
- .Value(batch);
- }
-
- std::shared_ptr<Schema> schema() const override { return out_schema_; }
-
- ReadStats stats() const override { return stats_; }
-
- private:
- Result<std::unique_ptr<Message>> ReadNextMessage() {
- ARROW_ASSIGN_OR_RAISE(auto message, message_reader_->ReadNextMessage());
- if (message) {
- ++stats_.num_messages;
- switch (message->type()) {
- case MessageType::RECORD_BATCH:
- ++stats_.num_record_batches;
- break;
- case MessageType::DICTIONARY_BATCH:
- ++stats_.num_dictionary_batches;
- break;
- default:
- break;
- }
- }
- return std::move(message);
- }
-
- // Read dictionary from dictionary batch
- Status ReadDictionary(const Message& message) {
- DictionaryKind kind;
+ .Value(batch);
+ }
+
+ std::shared_ptr<Schema> schema() const override { return out_schema_; }
+
+ ReadStats stats() const override { return stats_; }
+
+ private:
+ Result<std::unique_ptr<Message>> ReadNextMessage() {
+ ARROW_ASSIGN_OR_RAISE(auto message, message_reader_->ReadNextMessage());
+ if (message) {
+ ++stats_.num_messages;
+ switch (message->type()) {
+ case MessageType::RECORD_BATCH:
+ ++stats_.num_record_batches;
+ break;
+ case MessageType::DICTIONARY_BATCH:
+ ++stats_.num_dictionary_batches;
+ break;
+ default:
+ break;
+ }
+ }
+ return std::move(message);
+ }
+
+ // Read dictionary from dictionary batch
+ Status ReadDictionary(const Message& message) {
+ DictionaryKind kind;
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind));
- switch (kind) {
- case DictionaryKind::New:
- break;
- case DictionaryKind::Delta:
- ++stats_.num_dictionary_deltas;
- break;
- case DictionaryKind::Replacement:
- ++stats_.num_replaced_dictionaries;
- break;
- }
- return Status::OK();
- }
-
- Status ReadInitialDictionaries() {
- // We must receive all dictionaries before reconstructing the
- // first record batch. Subsequent dictionary deltas modify the memo
- std::unique_ptr<Message> message;
-
- // TODO(wesm): In future, we may want to reconcile the ids in the stream with
- // those found in the schema
+ switch (kind) {
+ case DictionaryKind::New:
+ break;
+ case DictionaryKind::Delta:
+ ++stats_.num_dictionary_deltas;
+ break;
+ case DictionaryKind::Replacement:
+ ++stats_.num_replaced_dictionaries;
+ break;
+ }
+ return Status::OK();
+ }
+
+ Status ReadInitialDictionaries() {
+ // We must receive all dictionaries before reconstructing the
+ // first record batch. Subsequent dictionary deltas modify the memo
+ std::unique_ptr<Message> message;
+
+ // TODO(wesm): In future, we may want to reconcile the ids in the stream with
+ // those found in the schema
const auto num_dicts = dictionary_memo_.fields().num_dicts();
- for (int i = 0; i < num_dicts; ++i) {
- ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
- if (!message) {
- if (i == 0) {
- /// ARROW-6006: If we fail to find any dictionaries in the stream, then
- /// it may be that the stream has a schema but no actual data. In such
- /// case we communicate that we were unable to find the dictionaries
- /// (but there was no failure otherwise), so the caller can decide what
- /// to do
- empty_stream_ = true;
- break;
- } else {
- // ARROW-6126, the stream terminated before receiving the expected
- // number of dictionaries
- return Status::Invalid("IPC stream ended without reading the expected number (",
- num_dicts, ") of dictionaries");
- }
- }
-
- if (message->type() != MessageType::DICTIONARY_BATCH) {
- return Status::Invalid("IPC stream did not have the expected number (", num_dicts,
- ") of dictionaries at the start of the stream");
- }
- RETURN_NOT_OK(ReadDictionary(*message));
- }
-
- have_read_initial_dictionaries_ = true;
- return Status::OK();
- }
-
- std::unique_ptr<MessageReader> message_reader_;
- IpcReadOptions options_;
- std::vector<bool> field_inclusion_mask_;
-
- bool have_read_initial_dictionaries_ = false;
-
- // Flag to set in case where we fail to observe all dictionaries in a stream,
- // and so the reader should not attempt to parse any messages
- bool empty_stream_ = false;
-
- ReadStats stats_;
-
- DictionaryMemo dictionary_memo_;
- std::shared_ptr<Schema> schema_, out_schema_;
+ for (int i = 0; i < num_dicts; ++i) {
+ ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
+ if (!message) {
+ if (i == 0) {
+ /// ARROW-6006: If we fail to find any dictionaries in the stream, then
+ /// it may be that the stream has a schema but no actual data. In such
+ /// case we communicate that we were unable to find the dictionaries
+ /// (but there was no failure otherwise), so the caller can decide what
+ /// to do
+ empty_stream_ = true;
+ break;
+ } else {
+ // ARROW-6126, the stream terminated before receiving the expected
+ // number of dictionaries
+ return Status::Invalid("IPC stream ended without reading the expected number (",
+ num_dicts, ") of dictionaries");
+ }
+ }
+
+ if (message->type() != MessageType::DICTIONARY_BATCH) {
+ return Status::Invalid("IPC stream did not have the expected number (", num_dicts,
+ ") of dictionaries at the start of the stream");
+ }
+ RETURN_NOT_OK(ReadDictionary(*message));
+ }
+
+ have_read_initial_dictionaries_ = true;
+ return Status::OK();
+ }
+
+ std::unique_ptr<MessageReader> message_reader_;
+ IpcReadOptions options_;
+ std::vector<bool> field_inclusion_mask_;
+
+ bool have_read_initial_dictionaries_ = false;
+
+ // Flag to set in case where we fail to observe all dictionaries in a stream,
+ // and so the reader should not attempt to parse any messages
+ bool empty_stream_ = false;
+
+ ReadStats stats_;
+
+ DictionaryMemo dictionary_memo_;
+ std::shared_ptr<Schema> schema_, out_schema_;
bool swap_endian_;
-};
-
-// ----------------------------------------------------------------------
-// Stream reader constructors
-
-Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
- std::unique_ptr<MessageReader> message_reader, const IpcReadOptions& options) {
- // Private ctor
- auto result = std::make_shared<RecordBatchStreamReaderImpl>();
- RETURN_NOT_OK(result->Open(std::move(message_reader), options));
- return result;
-}
-
-Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
- io::InputStream* stream, const IpcReadOptions& options) {
- return Open(MessageReader::Open(stream), options);
-}
-
-Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
- const std::shared_ptr<io::InputStream>& stream, const IpcReadOptions& options) {
- return Open(MessageReader::Open(stream), options);
-}
-
-// ----------------------------------------------------------------------
-// Reader implementation
-
+};
+
+// ----------------------------------------------------------------------
+// Stream reader constructors
+
+Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
+ std::unique_ptr<MessageReader> message_reader, const IpcReadOptions& options) {
+ // Private ctor
+ auto result = std::make_shared<RecordBatchStreamReaderImpl>();
+ RETURN_NOT_OK(result->Open(std::move(message_reader), options));
+ return result;
+}
+
+Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
+ io::InputStream* stream, const IpcReadOptions& options) {
+ return Open(MessageReader::Open(stream), options);
+}
+
+Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
+ const std::shared_ptr<io::InputStream>& stream, const IpcReadOptions& options) {
+ return Open(MessageReader::Open(stream), options);
+}
+
+// ----------------------------------------------------------------------
+// Reader implementation
+
// Common functions used in both the random-access file reader and the
// asynchronous generator
-static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
- return FileBlock{block->offset(), block->metaDataLength(), block->bodyLength()};
-}
-
+static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
+ return FileBlock{block->offset(), block->metaDataLength(), block->bodyLength()};
+}
+
static Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block,
io::RandomAccessFile* file) {
if (!BitUtil::IsMultipleOf8(block.offset) ||
@@ -1049,39 +1049,39 @@ class ARROW_EXPORT IpcFileRecordBatchGenerator {
Future<> read_dictionaries_;
};
-class RecordBatchFileReaderImpl : public RecordBatchFileReader {
- public:
- RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {}
-
- int num_record_batches() const override {
- return static_cast<int>(internal::FlatBuffersVectorSize(footer_->recordBatches()));
- }
-
- MetadataVersion version() const override {
- return internal::GetMetadataVersion(footer_->version());
- }
-
- Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) override {
- DCHECK_GE(i, 0);
- DCHECK_LT(i, num_record_batches());
-
- if (!read_dictionaries_) {
- RETURN_NOT_OK(ReadDictionaries());
- read_dictionaries_ = true;
- }
-
- ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetRecordBatchBlock(i)));
-
- CHECK_HAS_BODY(*message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+class RecordBatchFileReaderImpl : public RecordBatchFileReader {
+ public:
+ RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {}
+
+ int num_record_batches() const override {
+ return static_cast<int>(internal::FlatBuffersVectorSize(footer_->recordBatches()));
+ }
+
+ MetadataVersion version() const override {
+ return internal::GetMetadataVersion(footer_->version());
+ }
+
+ Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) override {
+ DCHECK_GE(i, 0);
+ DCHECK_LT(i, num_record_batches());
+
+ if (!read_dictionaries_) {
+ RETURN_NOT_OK(ReadDictionaries());
+ read_dictionaries_ = true;
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetRecordBatchBlock(i)));
+
+ CHECK_HAS_BODY(*message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
ARROW_ASSIGN_OR_RAISE(auto batch, ReadRecordBatchInternal(
*message->metadata(), schema_,
field_inclusion_mask_, context, reader.get()));
- ++stats_.num_record_batches;
- return batch;
- }
-
+ ++stats_.num_record_batches;
+ return batch;
+ }
+
Result<int64_t> CountRows() override {
int64_t total = 0;
for (int i = 0; i < num_record_batches(); i++) {
@@ -1101,27 +1101,27 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
return total;
}
- Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
- const IpcReadOptions& options) {
- owned_file_ = file;
- return Open(file.get(), footer_offset, options);
- }
-
- Status Open(io::RandomAccessFile* file, int64_t footer_offset,
- const IpcReadOptions& options) {
- file_ = file;
- options_ = options;
- footer_offset_ = footer_offset;
- RETURN_NOT_OK(ReadFooter());
-
- // Get the schema and record any observed dictionaries
- RETURN_NOT_OK(UnpackSchemaMessage(footer_->schema(), options, &dictionary_memo_,
+ Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+ const IpcReadOptions& options) {
+ owned_file_ = file;
+ return Open(file.get(), footer_offset, options);
+ }
+
+ Status Open(io::RandomAccessFile* file, int64_t footer_offset,
+ const IpcReadOptions& options) {
+ file_ = file;
+ options_ = options;
+ footer_offset_ = footer_offset;
+ RETURN_NOT_OK(ReadFooter());
+
+ // Get the schema and record any observed dictionaries
+ RETURN_NOT_OK(UnpackSchemaMessage(footer_->schema(), options, &dictionary_memo_,
&schema_, &out_schema_, &field_inclusion_mask_,
&swap_endian_));
- ++stats_.num_messages;
- return Status::OK();
- }
-
+ ++stats_.num_messages;
+ return Status::OK();
+ }
+
Future<> OpenAsync(const std::shared_ptr<io::RandomAccessFile>& file,
int64_t footer_offset, const IpcReadOptions& options) {
owned_file_ = file;
@@ -1145,12 +1145,12 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
});
}
- std::shared_ptr<Schema> schema() const override { return out_schema_; }
-
- std::shared_ptr<const KeyValueMetadata> metadata() const override { return metadata_; }
-
- ReadStats stats() const override { return stats_; }
-
+ std::shared_ptr<Schema> schema() const override { return out_schema_; }
+
+ std::shared_ptr<const KeyValueMetadata> metadata() const override { return metadata_; }
+
+ ReadStats stats() const override { return stats_; }
+
Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
const bool coalesce, const io::IOContext& io_context,
const io::CacheOptions cache_options,
@@ -1180,49 +1180,49 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
io_context, executor);
}
- private:
+ private:
friend AsyncGenerator<std::shared_ptr<Message>> MakeMessageGenerator(
std::shared_ptr<RecordBatchFileReaderImpl>, const io::IOContext&);
friend class IpcFileRecordBatchGenerator;
- FileBlock GetRecordBatchBlock(int i) const {
- return FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
- }
-
- FileBlock GetDictionaryBlock(int i) const {
- return FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i));
- }
-
- Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block) {
+ FileBlock GetRecordBatchBlock(int i) const {
+ return FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
+ }
+
+ FileBlock GetDictionaryBlock(int i) const {
+ return FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i));
+ }
+
+ Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block) {
ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_));
- ++stats_.num_messages;
- return std::move(message);
- }
-
- Status ReadDictionaries() {
- // Read all the dictionaries
+ ++stats_.num_messages;
+ return std::move(message);
+ }
+
+ Status ReadDictionaries() {
+ // Read all the dictionaries
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
- for (int i = 0; i < num_dictionaries(); ++i) {
- ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetDictionaryBlock(i)));
+ for (int i = 0; i < num_dictionaries(); ++i) {
+ ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetDictionaryBlock(i)));
RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
- ++stats_.num_dictionary_batches;
- }
- return Status::OK();
- }
-
- Status ReadFooter() {
+ ++stats_.num_dictionary_batches;
+ }
+ return Status::OK();
+ }
+
+ Status ReadFooter() {
auto fut = ReadFooterAsync(/*executor=*/nullptr);
return fut.status();
}
Future<> ReadFooterAsync(arrow::internal::Executor* executor) {
- const int32_t magic_size = static_cast<int>(strlen(kArrowMagicBytes));
-
- if (footer_offset_ <= magic_size * 2 + 4) {
- return Status::Invalid("File is too small: ", footer_offset_);
- }
-
- int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
+ const int32_t magic_size = static_cast<int>(strlen(kArrowMagicBytes));
+
+ if (footer_offset_ <= magic_size * 2 + 4) {
+ return Status::Invalid("File is too small: ", footer_offset_);
+ }
+
+ int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
auto read_magic = file_->ReadAsync(footer_offset_ - file_end_size, file_end_size);
if (executor) read_magic = executor->Transfer(std::move(read_magic));
@@ -1234,19 +1234,19 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
return Status::Invalid("Unable to read ", expected_footer_size,
"from end of file");
}
-
+
if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
return Status::Invalid("Not an Arrow file");
}
-
+
int32_t footer_length = BitUtil::FromLittleEndian(
*reinterpret_cast<const int32_t*>(buffer->data()));
-
+
if (footer_length <= 0 ||
footer_length > self->footer_offset_ - magic_size * 2 - 4) {
return Status::Invalid("File is smaller than indicated metadata size");
}
-
+
// Now read the footer
auto read_footer = self->file_->ReadAsync(
self->footer_offset_ - footer_length - file_end_size, footer_length);
@@ -1261,7 +1261,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
}
self->footer_ = flatbuf::GetFooter(data);
-
+
auto fb_metadata = self->footer_->custom_metadata();
if (fb_metadata != nullptr) {
std::shared_ptr<KeyValueMetadata> md;
@@ -1270,67 +1270,67 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
}
return Status::OK();
});
- }
-
- int num_dictionaries() const {
- return static_cast<int>(internal::FlatBuffersVectorSize(footer_->dictionaries()));
- }
-
- io::RandomAccessFile* file_;
- IpcReadOptions options_;
- std::vector<bool> field_inclusion_mask_;
-
- std::shared_ptr<io::RandomAccessFile> owned_file_;
-
- // The location where the Arrow file layout ends. May be the end of the file
- // or some other location if embedded in a larger file.
- int64_t footer_offset_;
-
- // Footer metadata
- std::shared_ptr<Buffer> footer_buffer_;
- const flatbuf::Footer* footer_;
- std::shared_ptr<const KeyValueMetadata> metadata_;
-
- bool read_dictionaries_ = false;
- DictionaryMemo dictionary_memo_;
-
- // Reconstructed schema, including any read dictionaries
- std::shared_ptr<Schema> schema_;
- // Schema with deselected fields dropped
- std::shared_ptr<Schema> out_schema_;
-
- ReadStats stats_;
+ }
+
+ int num_dictionaries() const {
+ return static_cast<int>(internal::FlatBuffersVectorSize(footer_->dictionaries()));
+ }
+
+ io::RandomAccessFile* file_;
+ IpcReadOptions options_;
+ std::vector<bool> field_inclusion_mask_;
+
+ std::shared_ptr<io::RandomAccessFile> owned_file_;
+
+ // The location where the Arrow file layout ends. May be the end of the file
+ // or some other location if embedded in a larger file.
+ int64_t footer_offset_;
+
+ // Footer metadata
+ std::shared_ptr<Buffer> footer_buffer_;
+ const flatbuf::Footer* footer_;
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+
+ bool read_dictionaries_ = false;
+ DictionaryMemo dictionary_memo_;
+
+ // Reconstructed schema, including any read dictionaries
+ std::shared_ptr<Schema> schema_;
+ // Schema with deselected fields dropped
+ std::shared_ptr<Schema> out_schema_;
+
+ ReadStats stats_;
bool swap_endian_;
-};
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
- io::RandomAccessFile* file, const IpcReadOptions& options) {
- ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
- return Open(file, footer_offset, options);
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
- io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
- auto result = std::make_shared<RecordBatchFileReaderImpl>();
- RETURN_NOT_OK(result->Open(file, footer_offset, options));
- return result;
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
- const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
- ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
- return Open(file, footer_offset, options);
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
- const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
- const IpcReadOptions& options) {
- auto result = std::make_shared<RecordBatchFileReaderImpl>();
- RETURN_NOT_OK(result->Open(file, footer_offset, options));
- return result;
-}
-
+};
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+ io::RandomAccessFile* file, const IpcReadOptions& options) {
+ ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+ return Open(file, footer_offset, options);
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+ io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
+ auto result = std::make_shared<RecordBatchFileReaderImpl>();
+ RETURN_NOT_OK(result->Open(file, footer_offset, options));
+ return result;
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+ const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
+ ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+ return Open(file, footer_offset, options);
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+ const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+ const IpcReadOptions& options) {
+ auto result = std::make_shared<RecordBatchFileReaderImpl>();
+ RETURN_NOT_OK(result->Open(file, footer_offset, options));
+ return result;
+}
+
Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
@@ -1434,119 +1434,119 @@ Result<std::shared_ptr<RecordBatch>> IpcFileRecordBatchGenerator::ReadRecordBatc
state->field_inclusion_mask_, context, reader.get());
}
-Status Listener::OnEOS() { return Status::OK(); }
-
-Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return Status::OK(); }
-
-Status Listener::OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch) {
- return Status::NotImplemented("OnRecordBatchDecoded() callback isn't implemented");
-}
-
-class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
- private:
- enum State {
- SCHEMA,
- INITIAL_DICTIONARIES,
- RECORD_BATCHES,
- EOS,
- };
-
- public:
+Status Listener::OnEOS() { return Status::OK(); }
+
+Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return Status::OK(); }
+
+Status Listener::OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch) {
+ return Status::NotImplemented("OnRecordBatchDecoded() callback isn't implemented");
+}
+
+class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
+ private:
+ enum State {
+ SCHEMA,
+ INITIAL_DICTIONARIES,
+ RECORD_BATCHES,
+ EOS,
+ };
+
+ public:
explicit StreamDecoderImpl(std::shared_ptr<Listener> listener, IpcReadOptions options)
: listener_(std::move(listener)),
options_(std::move(options)),
- state_(State::SCHEMA),
- message_decoder_(std::shared_ptr<StreamDecoderImpl>(this, [](void*) {}),
- options_.memory_pool),
+ state_(State::SCHEMA),
+ message_decoder_(std::shared_ptr<StreamDecoderImpl>(this, [](void*) {}),
+ options_.memory_pool),
n_required_dictionaries_(0) {}
-
- Status OnMessageDecoded(std::unique_ptr<Message> message) override {
+
+ Status OnMessageDecoded(std::unique_ptr<Message> message) override {
++stats_.num_messages;
- switch (state_) {
- case State::SCHEMA:
- ARROW_RETURN_NOT_OK(OnSchemaMessageDecoded(std::move(message)));
- break;
- case State::INITIAL_DICTIONARIES:
- ARROW_RETURN_NOT_OK(OnInitialDictionaryMessageDecoded(std::move(message)));
- break;
- case State::RECORD_BATCHES:
- ARROW_RETURN_NOT_OK(OnRecordBatchMessageDecoded(std::move(message)));
- break;
- case State::EOS:
- break;
- }
- return Status::OK();
- }
-
- Status OnEOS() override {
- state_ = State::EOS;
- return listener_->OnEOS();
- }
-
- Status Consume(const uint8_t* data, int64_t size) {
- return message_decoder_.Consume(data, size);
- }
-
- Status Consume(std::shared_ptr<Buffer> buffer) {
- return message_decoder_.Consume(std::move(buffer));
- }
-
- std::shared_ptr<Schema> schema() const { return out_schema_; }
-
- int64_t next_required_size() const { return message_decoder_.next_required_size(); }
-
+ switch (state_) {
+ case State::SCHEMA:
+ ARROW_RETURN_NOT_OK(OnSchemaMessageDecoded(std::move(message)));
+ break;
+ case State::INITIAL_DICTIONARIES:
+ ARROW_RETURN_NOT_OK(OnInitialDictionaryMessageDecoded(std::move(message)));
+ break;
+ case State::RECORD_BATCHES:
+ ARROW_RETURN_NOT_OK(OnRecordBatchMessageDecoded(std::move(message)));
+ break;
+ case State::EOS:
+ break;
+ }
+ return Status::OK();
+ }
+
+ Status OnEOS() override {
+ state_ = State::EOS;
+ return listener_->OnEOS();
+ }
+
+ Status Consume(const uint8_t* data, int64_t size) {
+ return message_decoder_.Consume(data, size);
+ }
+
+ Status Consume(std::shared_ptr<Buffer> buffer) {
+ return message_decoder_.Consume(std::move(buffer));
+ }
+
+ std::shared_ptr<Schema> schema() const { return out_schema_; }
+
+ int64_t next_required_size() const { return message_decoder_.next_required_size(); }
+
ReadStats stats() const { return stats_; }
- private:
- Status OnSchemaMessageDecoded(std::unique_ptr<Message> message) {
- RETURN_NOT_OK(UnpackSchemaMessage(*message, options_, &dictionary_memo_, &schema_,
+ private:
+ Status OnSchemaMessageDecoded(std::unique_ptr<Message> message) {
+ RETURN_NOT_OK(UnpackSchemaMessage(*message, options_, &dictionary_memo_, &schema_,
&out_schema_, &field_inclusion_mask_,
&swap_endian_));
-
- n_required_dictionaries_ = dictionary_memo_.fields().num_fields();
- if (n_required_dictionaries_ == 0) {
- state_ = State::RECORD_BATCHES;
- RETURN_NOT_OK(listener_->OnSchemaDecoded(schema_));
- } else {
- state_ = State::INITIAL_DICTIONARIES;
- }
- return Status::OK();
- }
-
- Status OnInitialDictionaryMessageDecoded(std::unique_ptr<Message> message) {
- if (message->type() != MessageType::DICTIONARY_BATCH) {
- return Status::Invalid("IPC stream did not have the expected number (",
- dictionary_memo_.fields().num_fields(),
- ") of dictionaries at the start of the stream");
- }
- RETURN_NOT_OK(ReadDictionary(*message));
- n_required_dictionaries_--;
- if (n_required_dictionaries_ == 0) {
- state_ = State::RECORD_BATCHES;
- ARROW_RETURN_NOT_OK(listener_->OnSchemaDecoded(schema_));
- }
- return Status::OK();
- }
-
- Status OnRecordBatchMessageDecoded(std::unique_ptr<Message> message) {
+
+ n_required_dictionaries_ = dictionary_memo_.fields().num_fields();
+ if (n_required_dictionaries_ == 0) {
+ state_ = State::RECORD_BATCHES;
+ RETURN_NOT_OK(listener_->OnSchemaDecoded(schema_));
+ } else {
+ state_ = State::INITIAL_DICTIONARIES;
+ }
+ return Status::OK();
+ }
+
+ Status OnInitialDictionaryMessageDecoded(std::unique_ptr<Message> message) {
+ if (message->type() != MessageType::DICTIONARY_BATCH) {
+ return Status::Invalid("IPC stream did not have the expected number (",
+ dictionary_memo_.fields().num_fields(),
+ ") of dictionaries at the start of the stream");
+ }
+ RETURN_NOT_OK(ReadDictionary(*message));
+ n_required_dictionaries_--;
+ if (n_required_dictionaries_ == 0) {
+ state_ = State::RECORD_BATCHES;
+ ARROW_RETURN_NOT_OK(listener_->OnSchemaDecoded(schema_));
+ }
+ return Status::OK();
+ }
+
+ Status OnRecordBatchMessageDecoded(std::unique_ptr<Message> message) {
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
- if (message->type() == MessageType::DICTIONARY_BATCH) {
- return ReadDictionary(*message);
- } else {
- CHECK_HAS_BODY(*message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+ if (message->type() == MessageType::DICTIONARY_BATCH) {
+ return ReadDictionary(*message);
+ } else {
+ CHECK_HAS_BODY(*message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
- ARROW_ASSIGN_OR_RAISE(
- auto batch,
- ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
+ ARROW_ASSIGN_OR_RAISE(
+ auto batch,
+ ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
context, reader.get()));
++stats_.num_record_batches;
- return listener_->OnRecordBatchDecoded(std::move(batch));
- }
- }
-
- // Read dictionary from dictionary batch
- Status ReadDictionary(const Message& message) {
+ return listener_->OnRecordBatchDecoded(std::move(batch));
+ }
+ }
+
+ // Read dictionary from dictionary batch
+ Status ReadDictionary(const Message& message) {
DictionaryKind kind;
IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind));
@@ -1562,503 +1562,503 @@ class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
break;
}
return Status::OK();
- }
-
- std::shared_ptr<Listener> listener_;
+ }
+
+ std::shared_ptr<Listener> listener_;
const IpcReadOptions options_;
- State state_;
- MessageDecoder message_decoder_;
- std::vector<bool> field_inclusion_mask_;
- int n_required_dictionaries_;
- DictionaryMemo dictionary_memo_;
- std::shared_ptr<Schema> schema_, out_schema_;
+ State state_;
+ MessageDecoder message_decoder_;
+ std::vector<bool> field_inclusion_mask_;
+ int n_required_dictionaries_;
+ DictionaryMemo dictionary_memo_;
+ std::shared_ptr<Schema> schema_, out_schema_;
ReadStats stats_;
bool swap_endian_;
-};
-
+};
+
StreamDecoder::StreamDecoder(std::shared_ptr<Listener> listener, IpcReadOptions options) {
- impl_.reset(new StreamDecoderImpl(std::move(listener), options));
-}
-
-StreamDecoder::~StreamDecoder() {}
-
-Status StreamDecoder::Consume(const uint8_t* data, int64_t size) {
- return impl_->Consume(data, size);
-}
-Status StreamDecoder::Consume(std::shared_ptr<Buffer> buffer) {
- return impl_->Consume(std::move(buffer));
-}
-
-std::shared_ptr<Schema> StreamDecoder::schema() const { return impl_->schema(); }
-
-int64_t StreamDecoder::next_required_size() const { return impl_->next_required_size(); }
-
+ impl_.reset(new StreamDecoderImpl(std::move(listener), options));
+}
+
+StreamDecoder::~StreamDecoder() {}
+
+Status StreamDecoder::Consume(const uint8_t* data, int64_t size) {
+ return impl_->Consume(data, size);
+}
+Status StreamDecoder::Consume(std::shared_ptr<Buffer> buffer) {
+ return impl_->Consume(std::move(buffer));
+}
+
+std::shared_ptr<Schema> StreamDecoder::schema() const { return impl_->schema(); }
+
+int64_t StreamDecoder::next_required_size() const { return impl_->next_required_size(); }
+
ReadStats StreamDecoder::stats() const { return impl_->stats(); }
-Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
- DictionaryMemo* dictionary_memo) {
- std::unique_ptr<MessageReader> reader = MessageReader::Open(stream);
- ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message, reader->ReadNextMessage());
- if (!message) {
- return Status::Invalid("Tried reading schema message, was null or length 0");
- }
- CHECK_MESSAGE_TYPE(MessageType::SCHEMA, message->type());
- return ReadSchema(*message, dictionary_memo);
-}
-
-Result<std::shared_ptr<Schema>> ReadSchema(const Message& message,
- DictionaryMemo* dictionary_memo) {
- std::shared_ptr<Schema> result;
- RETURN_NOT_OK(internal::GetSchema(message.header(), dictionary_memo, &result));
- return result;
-}
-
-Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file) {
- std::unique_ptr<Message> message;
- RETURN_NOT_OK(ReadContiguousPayload(file, &message));
- return ReadTensor(*message);
-}
-
-Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message) {
- std::shared_ptr<DataType> type;
- std::vector<int64_t> shape;
- std::vector<int64_t> strides;
- std::vector<std::string> dim_names;
- CHECK_HAS_BODY(message);
- RETURN_NOT_OK(internal::GetTensorMetadata(*message.metadata(), &type, &shape, &strides,
- &dim_names));
- return Tensor::Make(type, message.body(), shape, strides, dim_names);
-}
-
-namespace {
-
-Result<std::shared_ptr<SparseIndex>> ReadSparseCOOIndex(
- const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
- int64_t non_zero_length, io::RandomAccessFile* file) {
- auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
- const auto ndim = static_cast<int64_t>(shape.size());
-
- std::shared_ptr<DataType> indices_type;
- RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(sparse_index, &indices_type));
- const int64_t indices_elsize = GetByteWidth(*indices_type);
-
- auto* indices_buffer = sparse_index->indicesBuffer();
- ARROW_ASSIGN_OR_RAISE(auto indices_data,
- file->ReadAt(indices_buffer->offset(), indices_buffer->length()));
- std::vector<int64_t> indices_shape({non_zero_length, ndim});
- auto* indices_strides = sparse_index->indicesStrides();
- std::vector<int64_t> strides(2);
- if (indices_strides && indices_strides->size() > 0) {
- if (indices_strides->size() != 2) {
- return Status::Invalid("Wrong size for indicesStrides in SparseCOOIndex");
- }
- strides[0] = indices_strides->Get(0);
- strides[1] = indices_strides->Get(1);
- } else {
- // Row-major by default
- strides[0] = indices_elsize * ndim;
- strides[1] = indices_elsize;
- }
- return SparseCOOIndex::Make(
- std::make_shared<Tensor>(indices_type, indices_data, indices_shape, strides),
- sparse_index->isCanonical());
-}
-
-Result<std::shared_ptr<SparseIndex>> ReadSparseCSXIndex(
- const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
- int64_t non_zero_length, io::RandomAccessFile* file) {
- if (shape.size() != 2) {
- return Status::Invalid("Invalid shape length for a sparse matrix");
- }
-
- auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX();
-
- std::shared_ptr<DataType> indptr_type, indices_type;
- RETURN_NOT_OK(
- internal::GetSparseCSXIndexMetadata(sparse_index, &indptr_type, &indices_type));
- const int indptr_byte_width = GetByteWidth(*indptr_type);
-
- auto* indptr_buffer = sparse_index->indptrBuffer();
- ARROW_ASSIGN_OR_RAISE(auto indptr_data,
- file->ReadAt(indptr_buffer->offset(), indptr_buffer->length()));
-
- auto* indices_buffer = sparse_index->indicesBuffer();
- ARROW_ASSIGN_OR_RAISE(auto indices_data,
- file->ReadAt(indices_buffer->offset(), indices_buffer->length()));
-
- std::vector<int64_t> indices_shape({non_zero_length});
- const auto indices_minimum_bytes = indices_shape[0] * GetByteWidth(*indices_type);
- if (indices_minimum_bytes > indices_buffer->length()) {
- return Status::Invalid("shape is inconsistent to the size of indices buffer");
- }
-
- switch (sparse_index->compressedAxis()) {
- case flatbuf::SparseMatrixCompressedAxis::Row: {
- std::vector<int64_t> indptr_shape({shape[0] + 1});
- const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
- if (indptr_minimum_bytes > indptr_buffer->length()) {
- return Status::Invalid("shape is inconsistent to the size of indptr buffer");
- }
- return std::make_shared<SparseCSRIndex>(
- std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
- std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
- }
- case flatbuf::SparseMatrixCompressedAxis::Column: {
- std::vector<int64_t> indptr_shape({shape[1] + 1});
- const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
- if (indptr_minimum_bytes > indptr_buffer->length()) {
- return Status::Invalid("shape is inconsistent to the size of indptr buffer");
- }
- return std::make_shared<SparseCSCIndex>(
- std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
- std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
- }
- default:
- return Status::Invalid("Invalid value of SparseMatrixCompressedAxis");
- }
-}
-
-Result<std::shared_ptr<SparseIndex>> ReadSparseCSFIndex(
- const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
- io::RandomAccessFile* file) {
- auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCSF();
- const auto ndim = static_cast<int64_t>(shape.size());
- auto* indptr_buffers = sparse_index->indptrBuffers();
- auto* indices_buffers = sparse_index->indicesBuffers();
- std::vector<std::shared_ptr<Buffer>> indptr_data(ndim - 1);
- std::vector<std::shared_ptr<Buffer>> indices_data(ndim);
-
- std::shared_ptr<DataType> indptr_type, indices_type;
- std::vector<int64_t> axis_order, indices_size;
-
- RETURN_NOT_OK(internal::GetSparseCSFIndexMetadata(
- sparse_index, &axis_order, &indices_size, &indptr_type, &indices_type));
- for (int i = 0; i < static_cast<int>(indptr_buffers->size()); ++i) {
- ARROW_ASSIGN_OR_RAISE(indptr_data[i], file->ReadAt(indptr_buffers->Get(i)->offset(),
- indptr_buffers->Get(i)->length()));
- }
- for (int i = 0; i < static_cast<int>(indices_buffers->size()); ++i) {
- ARROW_ASSIGN_OR_RAISE(indices_data[i],
- file->ReadAt(indices_buffers->Get(i)->offset(),
- indices_buffers->Get(i)->length()));
- }
-
- return SparseCSFIndex::Make(indptr_type, indices_type, indices_size, axis_order,
- indptr_data, indices_data);
-}
-
-Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCOOIndex(
- const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names,
- const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
- const std::shared_ptr<Buffer>& data) {
- return SparseCOOTensor::Make(sparse_index, type, data, shape, dim_names);
-}
-
-Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSRIndex(
- const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names,
- const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
- const std::shared_ptr<Buffer>& data) {
- return SparseCSRMatrix::Make(sparse_index, type, data, shape, dim_names);
-}
-
-Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSCIndex(
- const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names,
- const std::shared_ptr<SparseCSCIndex>& sparse_index, int64_t non_zero_length,
- const std::shared_ptr<Buffer>& data) {
- return SparseCSCMatrix::Make(sparse_index, type, data, shape, dim_names);
-}
-
-Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSFIndex(
- const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names,
- const std::shared_ptr<SparseCSFIndex>& sparse_index,
- const std::shared_ptr<Buffer>& data) {
- return SparseCSFTensor::Make(sparse_index, type, data, shape, dim_names);
-}
-
-Status ReadSparseTensorMetadata(const Buffer& metadata,
- std::shared_ptr<DataType>* out_type,
- std::vector<int64_t>* out_shape,
- std::vector<std::string>* out_dim_names,
- int64_t* out_non_zero_length,
- SparseTensorFormat::type* out_format_id,
- const flatbuf::SparseTensor** out_fb_sparse_tensor,
- const flatbuf::Buffer** out_buffer) {
- RETURN_NOT_OK(internal::GetSparseTensorMetadata(
- metadata, out_type, out_shape, out_dim_names, out_non_zero_length, out_format_id));
-
- const flatbuf::Message* message = nullptr;
- RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
-
- auto sparse_tensor = message->header_as_SparseTensor();
- if (sparse_tensor == nullptr) {
- return Status::IOError(
- "Header-type of flatbuffer-encoded Message is not SparseTensor.");
- }
- *out_fb_sparse_tensor = sparse_tensor;
-
- auto buffer = sparse_tensor->data();
- if (!BitUtil::IsMultipleOf8(buffer->offset())) {
- return Status::Invalid(
- "Buffer of sparse index data did not start on 8-byte aligned offset: ",
- buffer->offset());
- }
- *out_buffer = buffer;
-
- return Status::OK();
-}
-
-} // namespace
-
-namespace internal {
-
-namespace {
-
-Result<size_t> GetSparseTensorBodyBufferCount(SparseTensorFormat::type format_id,
- const size_t ndim) {
- switch (format_id) {
- case SparseTensorFormat::COO:
- return 2;
-
- case SparseTensorFormat::CSR:
- return 3;
-
- case SparseTensorFormat::CSC:
- return 3;
-
- case SparseTensorFormat::CSF:
- return 2 * ndim;
-
- default:
- return Status::Invalid("Unrecognized sparse tensor format");
- }
-}
-
-Status CheckSparseTensorBodyBufferCount(const IpcPayload& payload,
- SparseTensorFormat::type sparse_tensor_format_id,
- const size_t ndim) {
- size_t expected_body_buffer_count = 0;
- ARROW_ASSIGN_OR_RAISE(expected_body_buffer_count,
- GetSparseTensorBodyBufferCount(sparse_tensor_format_id, ndim));
- if (payload.body_buffers.size() != expected_body_buffer_count) {
- return Status::Invalid("Invalid body buffer count for a sparse tensor");
- }
-
- return Status::OK();
-}
-
-} // namespace
-
-Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata) {
- SparseTensorFormat::type format_id;
- std::vector<int64_t> shape;
-
- RETURN_NOT_OK(internal::GetSparseTensorMetadata(metadata, nullptr, &shape, nullptr,
- nullptr, &format_id));
-
- return GetSparseTensorBodyBufferCount(format_id, static_cast<size_t>(shape.size()));
-}
-
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload) {
- std::shared_ptr<DataType> type;
- std::vector<int64_t> shape;
- std::vector<std::string> dim_names;
- int64_t non_zero_length;
- SparseTensorFormat::type sparse_tensor_format_id;
- const flatbuf::SparseTensor* sparse_tensor;
- const flatbuf::Buffer* buffer;
-
- RETURN_NOT_OK(ReadSparseTensorMetadata(*payload.metadata, &type, &shape, &dim_names,
- &non_zero_length, &sparse_tensor_format_id,
- &sparse_tensor, &buffer));
-
- RETURN_NOT_OK(CheckSparseTensorBodyBufferCount(payload, sparse_tensor_format_id,
- static_cast<size_t>(shape.size())));
-
- switch (sparse_tensor_format_id) {
- case SparseTensorFormat::COO: {
- std::shared_ptr<SparseCOOIndex> sparse_index;
- std::shared_ptr<DataType> indices_type;
- RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(
- sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type));
- ARROW_ASSIGN_OR_RAISE(sparse_index,
- SparseCOOIndex::Make(indices_type, shape, non_zero_length,
- payload.body_buffers[0]));
- return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index,
- non_zero_length, payload.body_buffers[1]);
- }
- case SparseTensorFormat::CSR: {
- std::shared_ptr<SparseCSRIndex> sparse_index;
- std::shared_ptr<DataType> indptr_type;
- std::shared_ptr<DataType> indices_type;
- RETURN_NOT_OK(internal::GetSparseCSXIndexMetadata(
- sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX(), &indptr_type,
- &indices_type));
- ARROW_CHECK_EQ(indptr_type, indices_type);
- ARROW_ASSIGN_OR_RAISE(
- sparse_index,
- SparseCSRIndex::Make(indices_type, shape, non_zero_length,
- payload.body_buffers[0], payload.body_buffers[1]));
- return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index,
- non_zero_length, payload.body_buffers[2]);
- }
- case SparseTensorFormat::CSC: {
- std::shared_ptr<SparseCSCIndex> sparse_index;
- std::shared_ptr<DataType> indptr_type;
- std::shared_ptr<DataType> indices_type;
- RETURN_NOT_OK(internal::GetSparseCSXIndexMetadata(
- sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX(), &indptr_type,
- &indices_type));
- ARROW_CHECK_EQ(indptr_type, indices_type);
- ARROW_ASSIGN_OR_RAISE(
- sparse_index,
- SparseCSCIndex::Make(indices_type, shape, non_zero_length,
- payload.body_buffers[0], payload.body_buffers[1]));
- return MakeSparseTensorWithSparseCSCIndex(type, shape, dim_names, sparse_index,
- non_zero_length, payload.body_buffers[2]);
- }
- case SparseTensorFormat::CSF: {
- std::shared_ptr<SparseCSFIndex> sparse_index;
- std::shared_ptr<DataType> indptr_type, indices_type;
- std::vector<int64_t> axis_order, indices_size;
-
- RETURN_NOT_OK(internal::GetSparseCSFIndexMetadata(
- sparse_tensor->sparseIndex_as_SparseTensorIndexCSF(), &axis_order,
- &indices_size, &indptr_type, &indices_type));
- ARROW_CHECK_EQ(indptr_type, indices_type);
-
- const int64_t ndim = shape.size();
- std::vector<std::shared_ptr<Buffer>> indptr_data(ndim - 1);
- std::vector<std::shared_ptr<Buffer>> indices_data(ndim);
-
- for (int64_t i = 0; i < ndim - 1; ++i) {
- indptr_data[i] = payload.body_buffers[i];
- }
- for (int64_t i = 0; i < ndim; ++i) {
- indices_data[i] = payload.body_buffers[i + ndim - 1];
- }
-
- ARROW_ASSIGN_OR_RAISE(sparse_index,
- SparseCSFIndex::Make(indptr_type, indices_type, indices_size,
- axis_order, indptr_data, indices_data));
- return MakeSparseTensorWithSparseCSFIndex(type, shape, dim_names, sparse_index,
- payload.body_buffers[2 * ndim - 1]);
- }
- default:
- return Status::Invalid("Unsupported sparse index format");
- }
-}
-
-} // namespace internal
-
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Buffer& metadata,
- io::RandomAccessFile* file) {
- std::shared_ptr<DataType> type;
- std::vector<int64_t> shape;
- std::vector<std::string> dim_names;
- int64_t non_zero_length;
- SparseTensorFormat::type sparse_tensor_format_id;
- const flatbuf::SparseTensor* sparse_tensor;
- const flatbuf::Buffer* buffer;
-
- RETURN_NOT_OK(ReadSparseTensorMetadata(metadata, &type, &shape, &dim_names,
- &non_zero_length, &sparse_tensor_format_id,
- &sparse_tensor, &buffer));
-
- ARROW_ASSIGN_OR_RAISE(auto data, file->ReadAt(buffer->offset(), buffer->length()));
-
- std::shared_ptr<SparseIndex> sparse_index;
- switch (sparse_tensor_format_id) {
- case SparseTensorFormat::COO: {
- ARROW_ASSIGN_OR_RAISE(
- sparse_index, ReadSparseCOOIndex(sparse_tensor, shape, non_zero_length, file));
- return MakeSparseTensorWithSparseCOOIndex(
- type, shape, dim_names, checked_pointer_cast<SparseCOOIndex>(sparse_index),
- non_zero_length, data);
- }
- case SparseTensorFormat::CSR: {
- ARROW_ASSIGN_OR_RAISE(
- sparse_index, ReadSparseCSXIndex(sparse_tensor, shape, non_zero_length, file));
- return MakeSparseTensorWithSparseCSRIndex(
- type, shape, dim_names, checked_pointer_cast<SparseCSRIndex>(sparse_index),
- non_zero_length, data);
- }
- case SparseTensorFormat::CSC: {
- ARROW_ASSIGN_OR_RAISE(
- sparse_index, ReadSparseCSXIndex(sparse_tensor, shape, non_zero_length, file));
- return MakeSparseTensorWithSparseCSCIndex(
- type, shape, dim_names, checked_pointer_cast<SparseCSCIndex>(sparse_index),
- non_zero_length, data);
- }
- case SparseTensorFormat::CSF: {
- ARROW_ASSIGN_OR_RAISE(sparse_index, ReadSparseCSFIndex(sparse_tensor, shape, file));
- return MakeSparseTensorWithSparseCSFIndex(
- type, shape, dim_names, checked_pointer_cast<SparseCSFIndex>(sparse_index),
- data);
- }
- default:
- return Status::Invalid("Unsupported sparse index format");
- }
-}
-
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message) {
- CHECK_HAS_BODY(message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
- return ReadSparseTensor(*message.metadata(), reader.get());
-}
-
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file) {
- std::unique_ptr<Message> message;
- RETURN_NOT_OK(ReadContiguousPayload(file, &message));
- CHECK_MESSAGE_TYPE(MessageType::SPARSE_TENSOR, message->type());
- CHECK_HAS_BODY(*message);
- ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
- return ReadSparseTensor(*message->metadata(), reader.get());
-}
-
-///////////////////////////////////////////////////////////////////////////
-// Helpers for fuzzing
-
-namespace internal {
-
-Status FuzzIpcStream(const uint8_t* data, int64_t size) {
- auto buffer = std::make_shared<Buffer>(data, size);
- io::BufferReader buffer_reader(buffer);
-
- std::shared_ptr<RecordBatchReader> batch_reader;
- ARROW_ASSIGN_OR_RAISE(batch_reader, RecordBatchStreamReader::Open(&buffer_reader));
-
- while (true) {
- std::shared_ptr<arrow::RecordBatch> batch;
- RETURN_NOT_OK(batch_reader->ReadNext(&batch));
- if (batch == nullptr) {
- break;
- }
- RETURN_NOT_OK(batch->ValidateFull());
- }
-
- return Status::OK();
-}
-
-Status FuzzIpcFile(const uint8_t* data, int64_t size) {
- auto buffer = std::make_shared<Buffer>(data, size);
- io::BufferReader buffer_reader(buffer);
-
- std::shared_ptr<RecordBatchFileReader> batch_reader;
- ARROW_ASSIGN_OR_RAISE(batch_reader, RecordBatchFileReader::Open(&buffer_reader));
-
- const int n_batches = batch_reader->num_record_batches();
- for (int i = 0; i < n_batches; ++i) {
- ARROW_ASSIGN_OR_RAISE(auto batch, batch_reader->ReadRecordBatch(i));
- RETURN_NOT_OK(batch->ValidateFull());
- }
-
- return Status::OK();
-}
-
+Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
+ DictionaryMemo* dictionary_memo) {
+ std::unique_ptr<MessageReader> reader = MessageReader::Open(stream);
+ ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Message> message, reader->ReadNextMessage());
+ if (!message) {
+ return Status::Invalid("Tried reading schema message, was null or length 0");
+ }
+ CHECK_MESSAGE_TYPE(MessageType::SCHEMA, message->type());
+ return ReadSchema(*message, dictionary_memo);
+}
+
+Result<std::shared_ptr<Schema>> ReadSchema(const Message& message,
+ DictionaryMemo* dictionary_memo) {
+ std::shared_ptr<Schema> result;
+ RETURN_NOT_OK(internal::GetSchema(message.header(), dictionary_memo, &result));
+ return result;
+}
+
+Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file) {
+ std::unique_ptr<Message> message;
+ RETURN_NOT_OK(ReadContiguousPayload(file, &message));
+ return ReadTensor(*message);
+}
+
+Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message) {
+ std::shared_ptr<DataType> type;
+ std::vector<int64_t> shape;
+ std::vector<int64_t> strides;
+ std::vector<std::string> dim_names;
+ CHECK_HAS_BODY(message);
+ RETURN_NOT_OK(internal::GetTensorMetadata(*message.metadata(), &type, &shape, &strides,
+ &dim_names));
+ return Tensor::Make(type, message.body(), shape, strides, dim_names);
+}
+
+namespace {
+
+Result<std::shared_ptr<SparseIndex>> ReadSparseCOOIndex(
+ const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, io::RandomAccessFile* file) {
+ auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
+ const auto ndim = static_cast<int64_t>(shape.size());
+
+ std::shared_ptr<DataType> indices_type;
+ RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(sparse_index, &indices_type));
+ const int64_t indices_elsize = GetByteWidth(*indices_type);
+
+ auto* indices_buffer = sparse_index->indicesBuffer();
+ ARROW_ASSIGN_OR_RAISE(auto indices_data,
+ file->ReadAt(indices_buffer->offset(), indices_buffer->length()));
+ std::vector<int64_t> indices_shape({non_zero_length, ndim});
+ auto* indices_strides = sparse_index->indicesStrides();
+ std::vector<int64_t> strides(2);
+ if (indices_strides && indices_strides->size() > 0) {
+ if (indices_strides->size() != 2) {
+ return Status::Invalid("Wrong size for indicesStrides in SparseCOOIndex");
+ }
+ strides[0] = indices_strides->Get(0);
+ strides[1] = indices_strides->Get(1);
+ } else {
+ // Row-major by default
+ strides[0] = indices_elsize * ndim;
+ strides[1] = indices_elsize;
+ }
+ return SparseCOOIndex::Make(
+ std::make_shared<Tensor>(indices_type, indices_data, indices_shape, strides),
+ sparse_index->isCanonical());
+}
+
+Result<std::shared_ptr<SparseIndex>> ReadSparseCSXIndex(
+ const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, io::RandomAccessFile* file) {
+ if (shape.size() != 2) {
+ return Status::Invalid("Invalid shape length for a sparse matrix");
+ }
+
+ auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX();
+
+ std::shared_ptr<DataType> indptr_type, indices_type;
+ RETURN_NOT_OK(
+ internal::GetSparseCSXIndexMetadata(sparse_index, &indptr_type, &indices_type));
+ const int indptr_byte_width = GetByteWidth(*indptr_type);
+
+ auto* indptr_buffer = sparse_index->indptrBuffer();
+ ARROW_ASSIGN_OR_RAISE(auto indptr_data,
+ file->ReadAt(indptr_buffer->offset(), indptr_buffer->length()));
+
+ auto* indices_buffer = sparse_index->indicesBuffer();
+ ARROW_ASSIGN_OR_RAISE(auto indices_data,
+ file->ReadAt(indices_buffer->offset(), indices_buffer->length()));
+
+ std::vector<int64_t> indices_shape({non_zero_length});
+ const auto indices_minimum_bytes = indices_shape[0] * GetByteWidth(*indices_type);
+ if (indices_minimum_bytes > indices_buffer->length()) {
+ return Status::Invalid("shape is inconsistent to the size of indices buffer");
+ }
+
+ switch (sparse_index->compressedAxis()) {
+ case flatbuf::SparseMatrixCompressedAxis::Row: {
+ std::vector<int64_t> indptr_shape({shape[0] + 1});
+ const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
+ if (indptr_minimum_bytes > indptr_buffer->length()) {
+ return Status::Invalid("shape is inconsistent to the size of indptr buffer");
+ }
+ return std::make_shared<SparseCSRIndex>(
+ std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
+ std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
+ }
+ case flatbuf::SparseMatrixCompressedAxis::Column: {
+ std::vector<int64_t> indptr_shape({shape[1] + 1});
+ const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
+ if (indptr_minimum_bytes > indptr_buffer->length()) {
+ return Status::Invalid("shape is inconsistent to the size of indptr buffer");
+ }
+ return std::make_shared<SparseCSCIndex>(
+ std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
+ std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
+ }
+ default:
+ return Status::Invalid("Invalid value of SparseMatrixCompressedAxis");
+ }
+}
+
+Result<std::shared_ptr<SparseIndex>> ReadSparseCSFIndex(
+ const flatbuf::SparseTensor* sparse_tensor, const std::vector<int64_t>& shape,
+ io::RandomAccessFile* file) {
+ auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCSF();
+ const auto ndim = static_cast<int64_t>(shape.size());
+ auto* indptr_buffers = sparse_index->indptrBuffers();
+ auto* indices_buffers = sparse_index->indicesBuffers();
+ std::vector<std::shared_ptr<Buffer>> indptr_data(ndim - 1);
+ std::vector<std::shared_ptr<Buffer>> indices_data(ndim);
+
+ std::shared_ptr<DataType> indptr_type, indices_type;
+ std::vector<int64_t> axis_order, indices_size;
+
+ RETURN_NOT_OK(internal::GetSparseCSFIndexMetadata(
+ sparse_index, &axis_order, &indices_size, &indptr_type, &indices_type));
+ for (int i = 0; i < static_cast<int>(indptr_buffers->size()); ++i) {
+ ARROW_ASSIGN_OR_RAISE(indptr_data[i], file->ReadAt(indptr_buffers->Get(i)->offset(),
+ indptr_buffers->Get(i)->length()));
+ }
+ for (int i = 0; i < static_cast<int>(indices_buffers->size()); ++i) {
+ ARROW_ASSIGN_OR_RAISE(indices_data[i],
+ file->ReadAt(indices_buffers->Get(i)->offset(),
+ indices_buffers->Get(i)->length()));
+ }
+
+ return SparseCSFIndex::Make(indptr_type, indices_type, indices_size, axis_order,
+ indptr_data, indices_data);
+}
+
+Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCOOIndex(
+ const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names,
+ const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
+ const std::shared_ptr<Buffer>& data) {
+ return SparseCOOTensor::Make(sparse_index, type, data, shape, dim_names);
+}
+
+Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSRIndex(
+ const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names,
+ const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
+ const std::shared_ptr<Buffer>& data) {
+ return SparseCSRMatrix::Make(sparse_index, type, data, shape, dim_names);
+}
+
+Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSCIndex(
+ const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names,
+ const std::shared_ptr<SparseCSCIndex>& sparse_index, int64_t non_zero_length,
+ const std::shared_ptr<Buffer>& data) {
+ return SparseCSCMatrix::Make(sparse_index, type, data, shape, dim_names);
+}
+
+Result<std::shared_ptr<SparseTensor>> MakeSparseTensorWithSparseCSFIndex(
+ const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names,
+ const std::shared_ptr<SparseCSFIndex>& sparse_index,
+ const std::shared_ptr<Buffer>& data) {
+ return SparseCSFTensor::Make(sparse_index, type, data, shape, dim_names);
+}
+
+Status ReadSparseTensorMetadata(const Buffer& metadata,
+ std::shared_ptr<DataType>* out_type,
+ std::vector<int64_t>* out_shape,
+ std::vector<std::string>* out_dim_names,
+ int64_t* out_non_zero_length,
+ SparseTensorFormat::type* out_format_id,
+ const flatbuf::SparseTensor** out_fb_sparse_tensor,
+ const flatbuf::Buffer** out_buffer) {
+ RETURN_NOT_OK(internal::GetSparseTensorMetadata(
+ metadata, out_type, out_shape, out_dim_names, out_non_zero_length, out_format_id));
+
+ const flatbuf::Message* message = nullptr;
+ RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
+
+ auto sparse_tensor = message->header_as_SparseTensor();
+ if (sparse_tensor == nullptr) {
+ return Status::IOError(
+ "Header-type of flatbuffer-encoded Message is not SparseTensor.");
+ }
+ *out_fb_sparse_tensor = sparse_tensor;
+
+ auto buffer = sparse_tensor->data();
+ if (!BitUtil::IsMultipleOf8(buffer->offset())) {
+ return Status::Invalid(
+ "Buffer of sparse index data did not start on 8-byte aligned offset: ",
+ buffer->offset());
+ }
+ *out_buffer = buffer;
+
+ return Status::OK();
+}
+
+} // namespace
+
+namespace internal {
+
+namespace {
+
+Result<size_t> GetSparseTensorBodyBufferCount(SparseTensorFormat::type format_id,
+ const size_t ndim) {
+ switch (format_id) {
+ case SparseTensorFormat::COO:
+ return 2;
+
+ case SparseTensorFormat::CSR:
+ return 3;
+
+ case SparseTensorFormat::CSC:
+ return 3;
+
+ case SparseTensorFormat::CSF:
+ return 2 * ndim;
+
+ default:
+ return Status::Invalid("Unrecognized sparse tensor format");
+ }
+}
+
+Status CheckSparseTensorBodyBufferCount(const IpcPayload& payload,
+ SparseTensorFormat::type sparse_tensor_format_id,
+ const size_t ndim) {
+ size_t expected_body_buffer_count = 0;
+ ARROW_ASSIGN_OR_RAISE(expected_body_buffer_count,
+ GetSparseTensorBodyBufferCount(sparse_tensor_format_id, ndim));
+ if (payload.body_buffers.size() != expected_body_buffer_count) {
+ return Status::Invalid("Invalid body buffer count for a sparse tensor");
+ }
+
+ return Status::OK();
+}
+
+} // namespace
+
+Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata) {
+ SparseTensorFormat::type format_id;
+ std::vector<int64_t> shape;
+
+ RETURN_NOT_OK(internal::GetSparseTensorMetadata(metadata, nullptr, &shape, nullptr,
+ nullptr, &format_id));
+
+ return GetSparseTensorBodyBufferCount(format_id, static_cast<size_t>(shape.size()));
+}
+
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload) {
+ std::shared_ptr<DataType> type;
+ std::vector<int64_t> shape;
+ std::vector<std::string> dim_names;
+ int64_t non_zero_length;
+ SparseTensorFormat::type sparse_tensor_format_id;
+ const flatbuf::SparseTensor* sparse_tensor;
+ const flatbuf::Buffer* buffer;
+
+ RETURN_NOT_OK(ReadSparseTensorMetadata(*payload.metadata, &type, &shape, &dim_names,
+ &non_zero_length, &sparse_tensor_format_id,
+ &sparse_tensor, &buffer));
+
+ RETURN_NOT_OK(CheckSparseTensorBodyBufferCount(payload, sparse_tensor_format_id,
+ static_cast<size_t>(shape.size())));
+
+ switch (sparse_tensor_format_id) {
+ case SparseTensorFormat::COO: {
+ std::shared_ptr<SparseCOOIndex> sparse_index;
+ std::shared_ptr<DataType> indices_type;
+ RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(
+ sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type));
+ ARROW_ASSIGN_OR_RAISE(sparse_index,
+ SparseCOOIndex::Make(indices_type, shape, non_zero_length,
+ payload.body_buffers[0]));
+ return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index,
+ non_zero_length, payload.body_buffers[1]);
+ }
+ case SparseTensorFormat::CSR: {
+ std::shared_ptr<SparseCSRIndex> sparse_index;
+ std::shared_ptr<DataType> indptr_type;
+ std::shared_ptr<DataType> indices_type;
+ RETURN_NOT_OK(internal::GetSparseCSXIndexMetadata(
+ sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX(), &indptr_type,
+ &indices_type));
+ ARROW_CHECK_EQ(indptr_type, indices_type);
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index,
+ SparseCSRIndex::Make(indices_type, shape, non_zero_length,
+ payload.body_buffers[0], payload.body_buffers[1]));
+ return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index,
+ non_zero_length, payload.body_buffers[2]);
+ }
+ case SparseTensorFormat::CSC: {
+ std::shared_ptr<SparseCSCIndex> sparse_index;
+ std::shared_ptr<DataType> indptr_type;
+ std::shared_ptr<DataType> indices_type;
+ RETURN_NOT_OK(internal::GetSparseCSXIndexMetadata(
+ sparse_tensor->sparseIndex_as_SparseMatrixIndexCSX(), &indptr_type,
+ &indices_type));
+ ARROW_CHECK_EQ(indptr_type, indices_type);
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index,
+ SparseCSCIndex::Make(indices_type, shape, non_zero_length,
+ payload.body_buffers[0], payload.body_buffers[1]));
+ return MakeSparseTensorWithSparseCSCIndex(type, shape, dim_names, sparse_index,
+ non_zero_length, payload.body_buffers[2]);
+ }
+ case SparseTensorFormat::CSF: {
+ std::shared_ptr<SparseCSFIndex> sparse_index;
+ std::shared_ptr<DataType> indptr_type, indices_type;
+ std::vector<int64_t> axis_order, indices_size;
+
+ RETURN_NOT_OK(internal::GetSparseCSFIndexMetadata(
+ sparse_tensor->sparseIndex_as_SparseTensorIndexCSF(), &axis_order,
+ &indices_size, &indptr_type, &indices_type));
+ ARROW_CHECK_EQ(indptr_type, indices_type);
+
+ const int64_t ndim = shape.size();
+ std::vector<std::shared_ptr<Buffer>> indptr_data(ndim - 1);
+ std::vector<std::shared_ptr<Buffer>> indices_data(ndim);
+
+ for (int64_t i = 0; i < ndim - 1; ++i) {
+ indptr_data[i] = payload.body_buffers[i];
+ }
+ for (int64_t i = 0; i < ndim; ++i) {
+ indices_data[i] = payload.body_buffers[i + ndim - 1];
+ }
+
+ ARROW_ASSIGN_OR_RAISE(sparse_index,
+ SparseCSFIndex::Make(indptr_type, indices_type, indices_size,
+ axis_order, indptr_data, indices_data));
+ return MakeSparseTensorWithSparseCSFIndex(type, shape, dim_names, sparse_index,
+ payload.body_buffers[2 * ndim - 1]);
+ }
+ default:
+ return Status::Invalid("Unsupported sparse index format");
+ }
+}
+
+} // namespace internal
+
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Buffer& metadata,
+ io::RandomAccessFile* file) {
+ std::shared_ptr<DataType> type;
+ std::vector<int64_t> shape;
+ std::vector<std::string> dim_names;
+ int64_t non_zero_length;
+ SparseTensorFormat::type sparse_tensor_format_id;
+ const flatbuf::SparseTensor* sparse_tensor;
+ const flatbuf::Buffer* buffer;
+
+ RETURN_NOT_OK(ReadSparseTensorMetadata(metadata, &type, &shape, &dim_names,
+ &non_zero_length, &sparse_tensor_format_id,
+ &sparse_tensor, &buffer));
+
+ ARROW_ASSIGN_OR_RAISE(auto data, file->ReadAt(buffer->offset(), buffer->length()));
+
+ std::shared_ptr<SparseIndex> sparse_index;
+ switch (sparse_tensor_format_id) {
+ case SparseTensorFormat::COO: {
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index, ReadSparseCOOIndex(sparse_tensor, shape, non_zero_length, file));
+ return MakeSparseTensorWithSparseCOOIndex(
+ type, shape, dim_names, checked_pointer_cast<SparseCOOIndex>(sparse_index),
+ non_zero_length, data);
+ }
+ case SparseTensorFormat::CSR: {
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index, ReadSparseCSXIndex(sparse_tensor, shape, non_zero_length, file));
+ return MakeSparseTensorWithSparseCSRIndex(
+ type, shape, dim_names, checked_pointer_cast<SparseCSRIndex>(sparse_index),
+ non_zero_length, data);
+ }
+ case SparseTensorFormat::CSC: {
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index, ReadSparseCSXIndex(sparse_tensor, shape, non_zero_length, file));
+ return MakeSparseTensorWithSparseCSCIndex(
+ type, shape, dim_names, checked_pointer_cast<SparseCSCIndex>(sparse_index),
+ non_zero_length, data);
+ }
+ case SparseTensorFormat::CSF: {
+ ARROW_ASSIGN_OR_RAISE(sparse_index, ReadSparseCSFIndex(sparse_tensor, shape, file));
+ return MakeSparseTensorWithSparseCSFIndex(
+ type, shape, dim_names, checked_pointer_cast<SparseCSFIndex>(sparse_index),
+ data);
+ }
+ default:
+ return Status::Invalid("Unsupported sparse index format");
+ }
+}
+
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message) {
+ CHECK_HAS_BODY(message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
+ return ReadSparseTensor(*message.metadata(), reader.get());
+}
+
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file) {
+ std::unique_ptr<Message> message;
+ RETURN_NOT_OK(ReadContiguousPayload(file, &message));
+ CHECK_MESSAGE_TYPE(MessageType::SPARSE_TENSOR, message->type());
+ CHECK_HAS_BODY(*message);
+ ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+ return ReadSparseTensor(*message->metadata(), reader.get());
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Helpers for fuzzing
+
+namespace internal {
+
+Status FuzzIpcStream(const uint8_t* data, int64_t size) {
+ auto buffer = std::make_shared<Buffer>(data, size);
+ io::BufferReader buffer_reader(buffer);
+
+ std::shared_ptr<RecordBatchReader> batch_reader;
+ ARROW_ASSIGN_OR_RAISE(batch_reader, RecordBatchStreamReader::Open(&buffer_reader));
+
+ while (true) {
+ std::shared_ptr<arrow::RecordBatch> batch;
+ RETURN_NOT_OK(batch_reader->ReadNext(&batch));
+ if (batch == nullptr) {
+ break;
+ }
+ RETURN_NOT_OK(batch->ValidateFull());
+ }
+
+ return Status::OK();
+}
+
+Status FuzzIpcFile(const uint8_t* data, int64_t size) {
+ auto buffer = std::make_shared<Buffer>(data, size);
+ io::BufferReader buffer_reader(buffer);
+
+ std::shared_ptr<RecordBatchFileReader> batch_reader;
+ ARROW_ASSIGN_OR_RAISE(batch_reader, RecordBatchFileReader::Open(&buffer_reader));
+
+ const int n_batches = batch_reader->num_record_batches();
+ for (int i = 0; i < n_batches; ++i) {
+ ARROW_ASSIGN_OR_RAISE(auto batch, batch_reader->ReadRecordBatch(i));
+ RETURN_NOT_OK(batch->ValidateFull());
+ }
+
+ return Status::OK();
+}
+
Status FuzzIpcTensorStream(const uint8_t* data, int64_t size) {
auto buffer = std::make_shared<Buffer>(data, size);
io::BufferReader buffer_reader(buffer);
@@ -2076,6 +2076,6 @@ Status FuzzIpcTensorStream(const uint8_t* data, int64_t size) {
return Status::OK();
}
-} // namespace internal
-} // namespace ipc
-} // namespace arrow
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
index 6f2157557f3..4b8bc10d612 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
@@ -1,155 +1,155 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Read Arrow files and streams
-
-#pragma once
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <utility>
-#include <vector>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Read Arrow files and streams
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
#include "arrow/io/caching.h"
#include "arrow/io/type_fwd.h"
-#include "arrow/ipc/message.h"
-#include "arrow/ipc/options.h"
-#include "arrow/record_batch.h"
-#include "arrow/result.h"
+#include "arrow/ipc/message.h"
+#include "arrow/ipc/options.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/async_generator.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace ipc {
-
-class DictionaryMemo;
-struct IpcPayload;
-
-using RecordBatchReader = ::arrow::RecordBatchReader;
-
-struct ReadStats {
- /// Number of IPC messages read.
- int64_t num_messages = 0;
- /// Number of record batches read.
- int64_t num_record_batches = 0;
- /// Number of dictionary batches read.
- ///
- /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries
- int64_t num_dictionary_batches = 0;
-
- /// Number of dictionary deltas read.
- int64_t num_dictionary_deltas = 0;
- /// Number of replaced dictionaries (i.e. where a dictionary batch replaces
- /// an existing dictionary with an unrelated new dictionary).
- int64_t num_replaced_dictionaries = 0;
-};
-
-/// \brief Synchronous batch stream reader that reads from io::InputStream
-///
-/// This class reads the schema (plus any dictionaries) as the first messages
-/// in the stream, followed by record batches. For more granular zero-copy
-/// reads see the ReadRecordBatch functions
-class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
- public:
- /// Create batch reader from generic MessageReader.
- /// This will take ownership of the given MessageReader.
- ///
- /// \param[in] message_reader a MessageReader implementation
- /// \param[in] options any IPC reading options (optional)
- /// \return the created batch reader
- static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
- std::unique_ptr<MessageReader> message_reader,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Record batch stream reader from InputStream
- ///
- /// \param[in] stream an input stream instance. Must stay alive throughout
- /// lifetime of stream reader
- /// \param[in] options any IPC reading options (optional)
- /// \return the created batch reader
- static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
- io::InputStream* stream,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Open stream and retain ownership of stream object
- /// \param[in] stream the input stream
- /// \param[in] options any IPC reading options (optional)
- /// \return the created batch reader
- static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
- const std::shared_ptr<io::InputStream>& stream,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Return current read statistics
- virtual ReadStats stats() const = 0;
-};
-
-/// \brief Reads the record batch file format
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace ipc {
+
+class DictionaryMemo;
+struct IpcPayload;
+
+using RecordBatchReader = ::arrow::RecordBatchReader;
+
+struct ReadStats {
+ /// Number of IPC messages read.
+ int64_t num_messages = 0;
+ /// Number of record batches read.
+ int64_t num_record_batches = 0;
+ /// Number of dictionary batches read.
+ ///
+ /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries
+ int64_t num_dictionary_batches = 0;
+
+ /// Number of dictionary deltas read.
+ int64_t num_dictionary_deltas = 0;
+ /// Number of replaced dictionaries (i.e. where a dictionary batch replaces
+ /// an existing dictionary with an unrelated new dictionary).
+ int64_t num_replaced_dictionaries = 0;
+};
+
+/// \brief Synchronous batch stream reader that reads from io::InputStream
+///
+/// This class reads the schema (plus any dictionaries) as the first messages
+/// in the stream, followed by record batches. For more granular zero-copy
+/// reads see the ReadRecordBatch functions
+class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
+ public:
+ /// Create batch reader from generic MessageReader.
+ /// This will take ownership of the given MessageReader.
+ ///
+ /// \param[in] message_reader a MessageReader implementation
+ /// \param[in] options any IPC reading options (optional)
+ /// \return the created batch reader
+ static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
+ std::unique_ptr<MessageReader> message_reader,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Record batch stream reader from InputStream
+ ///
+ /// \param[in] stream an input stream instance. Must stay alive throughout
+ /// lifetime of stream reader
+ /// \param[in] options any IPC reading options (optional)
+ /// \return the created batch reader
+ static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
+ io::InputStream* stream,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Open stream and retain ownership of stream object
+ /// \param[in] stream the input stream
+ /// \param[in] options any IPC reading options (optional)
+ /// \return the created batch reader
+ static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
+ const std::shared_ptr<io::InputStream>& stream,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Return current read statistics
+ virtual ReadStats stats() const = 0;
+};
+
+/// \brief Reads the record batch file format
class ARROW_EXPORT RecordBatchFileReader
: public std::enable_shared_from_this<RecordBatchFileReader> {
- public:
- virtual ~RecordBatchFileReader() = default;
-
- /// \brief Open a RecordBatchFileReader
- ///
- /// Open a file-like object that is assumed to be self-contained; i.e., the
- /// end of the file interface is the end of the Arrow file. Note that there
- /// can be any amount of data preceding the Arrow-formatted data, because we
- /// need only locate the end of the Arrow file stream to discover the metadata
- /// and then proceed to read the data into memory.
- static Result<std::shared_ptr<RecordBatchFileReader>> Open(
- io::RandomAccessFile* file,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Open a RecordBatchFileReader
- /// If the file is embedded within some larger file or memory region, you can
- /// pass the absolute memory offset to the end of the file (which contains the
- /// metadata footer). The metadata must have been written with memory offsets
- /// relative to the start of the containing file
- ///
- /// \param[in] file the data source
- /// \param[in] footer_offset the position of the end of the Arrow file
- /// \param[in] options options for IPC reading
- /// \return the returned reader
- static Result<std::shared_ptr<RecordBatchFileReader>> Open(
- io::RandomAccessFile* file, int64_t footer_offset,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Version of Open that retains ownership of file
- ///
- /// \param[in] file the data source
- /// \param[in] options options for IPC reading
- /// \return the returned reader
- static Result<std::shared_ptr<RecordBatchFileReader>> Open(
- const std::shared_ptr<io::RandomAccessFile>& file,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
- /// \brief Version of Open that retains ownership of file
- ///
- /// \param[in] file the data source
- /// \param[in] footer_offset the position of the end of the Arrow file
- /// \param[in] options options for IPC reading
- /// \return the returned reader
- static Result<std::shared_ptr<RecordBatchFileReader>> Open(
- const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
- const IpcReadOptions& options = IpcReadOptions::Defaults());
-
+ public:
+ virtual ~RecordBatchFileReader() = default;
+
+ /// \brief Open a RecordBatchFileReader
+ ///
+ /// Open a file-like object that is assumed to be self-contained; i.e., the
+ /// end of the file interface is the end of the Arrow file. Note that there
+ /// can be any amount of data preceding the Arrow-formatted data, because we
+ /// need only locate the end of the Arrow file stream to discover the metadata
+ /// and then proceed to read the data into memory.
+ static Result<std::shared_ptr<RecordBatchFileReader>> Open(
+ io::RandomAccessFile* file,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Open a RecordBatchFileReader
+ /// If the file is embedded within some larger file or memory region, you can
+ /// pass the absolute memory offset to the end of the file (which contains the
+ /// metadata footer). The metadata must have been written with memory offsets
+ /// relative to the start of the containing file
+ ///
+ /// \param[in] file the data source
+ /// \param[in] footer_offset the position of the end of the Arrow file
+ /// \param[in] options options for IPC reading
+ /// \return the returned reader
+ static Result<std::shared_ptr<RecordBatchFileReader>> Open(
+ io::RandomAccessFile* file, int64_t footer_offset,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Version of Open that retains ownership of file
+ ///
+ /// \param[in] file the data source
+ /// \param[in] options options for IPC reading
+ /// \return the returned reader
+ static Result<std::shared_ptr<RecordBatchFileReader>> Open(
+ const std::shared_ptr<io::RandomAccessFile>& file,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+ /// \brief Version of Open that retains ownership of file
+ ///
+ /// \param[in] file the data source
+ /// \param[in] footer_offset the position of the end of the Arrow file
+ /// \param[in] options options for IPC reading
+ /// \return the returned reader
+ static Result<std::shared_ptr<RecordBatchFileReader>> Open(
+ const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+ const IpcReadOptions& options = IpcReadOptions::Defaults());
+
/// \brief Open a file asynchronously (owns the file).
static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
const std::shared_ptr<io::RandomAccessFile>& file,
@@ -170,28 +170,28 @@ class ARROW_EXPORT RecordBatchFileReader
io::RandomAccessFile* file, int64_t footer_offset,
const IpcReadOptions& options = IpcReadOptions::Defaults());
- /// \brief The schema read from the file
- virtual std::shared_ptr<Schema> schema() const = 0;
-
- /// \brief Returns the number of record batches in the file
- virtual int num_record_batches() const = 0;
-
- /// \brief Return the metadata version from the file metadata
- virtual MetadataVersion version() const = 0;
-
- /// \brief Return the contents of the custom_metadata field from the file's
- /// Footer
- virtual std::shared_ptr<const KeyValueMetadata> metadata() const = 0;
-
- /// \brief Read a particular record batch from the file. Does not copy memory
- /// if the input source supports zero-copy.
- ///
- /// \param[in] i the index of the record batch to return
- /// \return the read batch
- virtual Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) = 0;
-
- /// \brief Return current read statistics
- virtual ReadStats stats() const = 0;
+ /// \brief The schema read from the file
+ virtual std::shared_ptr<Schema> schema() const = 0;
+
+ /// \brief Returns the number of record batches in the file
+ virtual int num_record_batches() const = 0;
+
+ /// \brief Return the metadata version from the file metadata
+ virtual MetadataVersion version() const = 0;
+
+ /// \brief Return the contents of the custom_metadata field from the file's
+ /// Footer
+ virtual std::shared_ptr<const KeyValueMetadata> metadata() const = 0;
+
+ /// \brief Read a particular record batch from the file. Does not copy memory
+ /// if the input source supports zero-copy.
+ ///
+ /// \param[in] i the index of the record batch to return
+ /// \return the read batch
+ virtual Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) = 0;
+
+ /// \brief Return current read statistics
+ virtual ReadStats stats() const = 0;
/// \brief Computes the total number of rows in the file.
virtual Result<int64_t> CountRows() = 0;
@@ -210,327 +210,327 @@ class ARROW_EXPORT RecordBatchFileReader
const io::IOContext& io_context = io::default_io_context(),
const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(),
arrow::internal::Executor* executor = NULLPTR) = 0;
-};
-
-/// \brief A general listener class to receive events.
-///
-/// You must implement callback methods for interested events.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \since 0.17.0
-class ARROW_EXPORT Listener {
- public:
- virtual ~Listener() = default;
-
- /// \brief Called when end-of-stream is received.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \return Status
- ///
- /// \see StreamDecoder
- virtual Status OnEOS();
-
- /// \brief Called when a record batch is decoded.
- ///
- /// The default implementation just returns
- /// arrow::Status::NotImplemented().
- ///
- /// \param[in] record_batch a record batch decoded
- /// \return Status
- ///
- /// \see StreamDecoder
- virtual Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch);
-
- /// \brief Called when a schema is decoded.
- ///
- /// The default implementation just returns arrow::Status::OK().
- ///
- /// \param[in] schema a schema decoded
- /// \return Status
- ///
- /// \see StreamDecoder
- virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema);
-};
-
-/// \brief Collect schema and record batches decoded by StreamDecoder.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \since 0.17.0
-class ARROW_EXPORT CollectListener : public Listener {
- public:
- CollectListener() : schema_(), record_batches_() {}
- virtual ~CollectListener() = default;
-
- Status OnSchemaDecoded(std::shared_ptr<Schema> schema) override {
- schema_ = std::move(schema);
- return Status::OK();
- }
-
- Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch) override {
- record_batches_.push_back(std::move(record_batch));
- return Status::OK();
- }
-
- /// \return the decoded schema
- std::shared_ptr<Schema> schema() const { return schema_; }
-
- /// \return the all decoded record batches
- std::vector<std::shared_ptr<RecordBatch>> record_batches() const {
- return record_batches_;
- }
-
- private:
- std::shared_ptr<Schema> schema_;
- std::vector<std::shared_ptr<RecordBatch>> record_batches_;
-};
-
-/// \brief Push style stream decoder that receives data from user.
-///
-/// This class decodes the Apache Arrow IPC streaming format data.
-///
-/// This API is EXPERIMENTAL.
-///
-/// \see https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
-///
-/// \since 0.17.0
-class ARROW_EXPORT StreamDecoder {
- public:
- /// \brief Construct a stream decoder.
- ///
- /// \param[in] listener a Listener that must implement
- /// Listener::OnRecordBatchDecoded() to receive decoded record batches
- /// \param[in] options any IPC reading options (optional)
- StreamDecoder(std::shared_ptr<Listener> listener,
+};
+
+/// \brief A general listener class to receive events.
+///
+/// You must implement callback methods for interested events.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \since 0.17.0
+class ARROW_EXPORT Listener {
+ public:
+ virtual ~Listener() = default;
+
+ /// \brief Called when end-of-stream is received.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \return Status
+ ///
+ /// \see StreamDecoder
+ virtual Status OnEOS();
+
+ /// \brief Called when a record batch is decoded.
+ ///
+ /// The default implementation just returns
+ /// arrow::Status::NotImplemented().
+ ///
+ /// \param[in] record_batch a record batch decoded
+ /// \return Status
+ ///
+ /// \see StreamDecoder
+ virtual Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch);
+
+ /// \brief Called when a schema is decoded.
+ ///
+ /// The default implementation just returns arrow::Status::OK().
+ ///
+ /// \param[in] schema a schema decoded
+ /// \return Status
+ ///
+ /// \see StreamDecoder
+ virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema);
+};
+
+/// \brief Collect schema and record batches decoded by StreamDecoder.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \since 0.17.0
+class ARROW_EXPORT CollectListener : public Listener {
+ public:
+ CollectListener() : schema_(), record_batches_() {}
+ virtual ~CollectListener() = default;
+
+ Status OnSchemaDecoded(std::shared_ptr<Schema> schema) override {
+ schema_ = std::move(schema);
+ return Status::OK();
+ }
+
+ Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch) override {
+ record_batches_.push_back(std::move(record_batch));
+ return Status::OK();
+ }
+
+ /// \return the decoded schema
+ std::shared_ptr<Schema> schema() const { return schema_; }
+
+ /// \return the all decoded record batches
+ std::vector<std::shared_ptr<RecordBatch>> record_batches() const {
+ return record_batches_;
+ }
+
+ private:
+ std::shared_ptr<Schema> schema_;
+ std::vector<std::shared_ptr<RecordBatch>> record_batches_;
+};
+
+/// \brief Push style stream decoder that receives data from user.
+///
+/// This class decodes the Apache Arrow IPC streaming format data.
+///
+/// This API is EXPERIMENTAL.
+///
+/// \see https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
+///
+/// \since 0.17.0
+class ARROW_EXPORT StreamDecoder {
+ public:
+ /// \brief Construct a stream decoder.
+ ///
+ /// \param[in] listener a Listener that must implement
+ /// Listener::OnRecordBatchDecoded() to receive decoded record batches
+ /// \param[in] options any IPC reading options (optional)
+ StreamDecoder(std::shared_ptr<Listener> listener,
IpcReadOptions options = IpcReadOptions::Defaults());
-
- virtual ~StreamDecoder();
-
- /// \brief Feed data to the decoder as a raw data.
- ///
- /// If the decoder can read one or more record batches by the data,
- /// the decoder calls listener->OnRecordBatchDecoded() with a
- /// decoded record batch multiple times.
- ///
- /// \param[in] data a raw data to be processed. This data isn't
- /// copied. The passed memory must be kept alive through record
- /// batch processing.
- /// \param[in] size raw data size.
- /// \return Status
- Status Consume(const uint8_t* data, int64_t size);
-
- /// \brief Feed data to the decoder as a Buffer.
- ///
- /// If the decoder can read one or more record batches by the
- /// Buffer, the decoder calls listener->RecordBatchReceived() with a
- /// decoded record batch multiple times.
- ///
- /// \param[in] buffer a Buffer to be processed.
- /// \return Status
- Status Consume(std::shared_ptr<Buffer> buffer);
-
- /// \return the shared schema of the record batches in the stream
- std::shared_ptr<Schema> schema() const;
-
- /// \brief Return the number of bytes needed to advance the state of
- /// the decoder.
- ///
- /// This method is provided for users who want to optimize performance.
- /// Normal users don't need to use this method.
- ///
- /// Here is an example usage for normal users:
- ///
- /// ~~~{.cpp}
- /// decoder.Consume(buffer1);
- /// decoder.Consume(buffer2);
- /// decoder.Consume(buffer3);
- /// ~~~
- ///
- /// Decoder has internal buffer. If consumed data isn't enough to
- /// advance the state of the decoder, consumed data is buffered to
- /// the internal buffer. It causes performance overhead.
- ///
- /// If you pass next_required_size() size data to each Consume()
- /// call, the decoder doesn't use its internal buffer. It improves
- /// performance.
- ///
- /// Here is an example usage to avoid using internal buffer:
- ///
- /// ~~~{.cpp}
- /// buffer1 = get_data(decoder.next_required_size());
- /// decoder.Consume(buffer1);
- /// buffer2 = get_data(decoder.next_required_size());
- /// decoder.Consume(buffer2);
- /// ~~~
- ///
- /// Users can use this method to avoid creating small chunks. Record
- /// batch data must be contiguous data. If users pass small chunks
- /// to the decoder, the decoder needs concatenate small chunks
- /// internally. It causes performance overhead.
- ///
- /// Here is an example usage to reduce small chunks:
- ///
- /// ~~~{.cpp}
- /// buffer = AllocateResizableBuffer();
- /// while ((small_chunk = get_data(&small_chunk_size))) {
- /// auto current_buffer_size = buffer->size();
- /// buffer->Resize(current_buffer_size + small_chunk_size);
- /// memcpy(buffer->mutable_data() + current_buffer_size,
- /// small_chunk,
- /// small_chunk_size);
+
+ virtual ~StreamDecoder();
+
+ /// \brief Feed data to the decoder as a raw data.
+ ///
+ /// If the decoder can read one or more record batches by the data,
+ /// the decoder calls listener->OnRecordBatchDecoded() with a
+ /// decoded record batch multiple times.
+ ///
+ /// \param[in] data a raw data to be processed. This data isn't
+ /// copied. The passed memory must be kept alive through record
+ /// batch processing.
+ /// \param[in] size raw data size.
+ /// \return Status
+ Status Consume(const uint8_t* data, int64_t size);
+
+ /// \brief Feed data to the decoder as a Buffer.
+ ///
+ /// If the decoder can read one or more record batches by the
+ /// Buffer, the decoder calls listener->RecordBatchReceived() with a
+ /// decoded record batch multiple times.
+ ///
+ /// \param[in] buffer a Buffer to be processed.
+ /// \return Status
+ Status Consume(std::shared_ptr<Buffer> buffer);
+
+ /// \return the shared schema of the record batches in the stream
+ std::shared_ptr<Schema> schema() const;
+
+ /// \brief Return the number of bytes needed to advance the state of
+ /// the decoder.
+ ///
+ /// This method is provided for users who want to optimize performance.
+ /// Normal users don't need to use this method.
+ ///
+ /// Here is an example usage for normal users:
+ ///
+ /// ~~~{.cpp}
+ /// decoder.Consume(buffer1);
+ /// decoder.Consume(buffer2);
+ /// decoder.Consume(buffer3);
+ /// ~~~
+ ///
+ /// Decoder has internal buffer. If consumed data isn't enough to
+ /// advance the state of the decoder, consumed data is buffered to
+ /// the internal buffer. It causes performance overhead.
+ ///
+ /// If you pass next_required_size() size data to each Consume()
+ /// call, the decoder doesn't use its internal buffer. It improves
+ /// performance.
+ ///
+ /// Here is an example usage to avoid using internal buffer:
+ ///
+ /// ~~~{.cpp}
+ /// buffer1 = get_data(decoder.next_required_size());
+ /// decoder.Consume(buffer1);
+ /// buffer2 = get_data(decoder.next_required_size());
+ /// decoder.Consume(buffer2);
+ /// ~~~
+ ///
+ /// Users can use this method to avoid creating small chunks. Record
+ /// batch data must be contiguous data. If users pass small chunks
+ /// to the decoder, the decoder needs concatenate small chunks
+ /// internally. It causes performance overhead.
+ ///
+ /// Here is an example usage to reduce small chunks:
+ ///
+ /// ~~~{.cpp}
+ /// buffer = AllocateResizableBuffer();
+ /// while ((small_chunk = get_data(&small_chunk_size))) {
+ /// auto current_buffer_size = buffer->size();
+ /// buffer->Resize(current_buffer_size + small_chunk_size);
+ /// memcpy(buffer->mutable_data() + current_buffer_size,
+ /// small_chunk,
+ /// small_chunk_size);
/// if (buffer->size() < decoder.next_required_size()) {
- /// continue;
- /// }
- /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
- /// decoder.Consume(chunk);
- /// buffer = AllocateResizableBuffer();
- /// }
- /// if (buffer->size() > 0) {
- /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
- /// decoder.Consume(chunk);
- /// }
- /// ~~~
- ///
- /// \return the number of bytes needed to advance the state of the
- /// decoder
- int64_t next_required_size() const;
-
+ /// continue;
+ /// }
+ /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
+ /// decoder.Consume(chunk);
+ /// buffer = AllocateResizableBuffer();
+ /// }
+ /// if (buffer->size() > 0) {
+ /// std::shared_ptr<arrow::Buffer> chunk(buffer.release());
+ /// decoder.Consume(chunk);
+ /// }
+ /// ~~~
+ ///
+ /// \return the number of bytes needed to advance the state of the
+ /// decoder
+ int64_t next_required_size() const;
+
/// \brief Return current read statistics
ReadStats stats() const;
- private:
- class StreamDecoderImpl;
- std::unique_ptr<StreamDecoderImpl> impl_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(StreamDecoder);
-};
-
-// Generic read functions; does not copy data if the input supports zero copy reads
-
-/// \brief Read Schema from stream serialized as a single IPC message
-/// and populate any dictionary-encoded fields into a DictionaryMemo
-///
-/// \param[in] stream an InputStream
-/// \param[in] dictionary_memo for recording dictionary-encoded fields
-/// \return the output Schema
-///
-/// If record batches follow the schema, it is better to use
-/// RecordBatchStreamReader
-ARROW_EXPORT
-Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
- DictionaryMemo* dictionary_memo);
-
-/// \brief Read Schema from encapsulated Message
-///
-/// \param[in] message the message containing the Schema IPC metadata
-/// \param[in] dictionary_memo DictionaryMemo for recording dictionary-encoded
-/// fields. Can be nullptr if you are sure there are no
-/// dictionary-encoded fields
-/// \return the resulting Schema
-ARROW_EXPORT
-Result<std::shared_ptr<Schema>> ReadSchema(const Message& message,
- DictionaryMemo* dictionary_memo);
-
-/// Read record batch as encapsulated IPC message with metadata size prefix and
-/// header
-///
-/// \param[in] schema the record batch schema
-/// \param[in] dictionary_memo DictionaryMemo which has any
-/// dictionaries. Can be nullptr if you are sure there are no
-/// dictionary-encoded fields
-/// \param[in] options IPC options for reading
-/// \param[in] stream the file where the batch is located
-/// \return the read record batch
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo,
- const IpcReadOptions& options, io::InputStream* stream);
-
-/// \brief Read record batch from message
-///
-/// \param[in] message a Message containing the record batch metadata
-/// \param[in] schema the record batch schema
-/// \param[in] dictionary_memo DictionaryMemo which has any
-/// dictionaries. Can be nullptr if you are sure there are no
-/// dictionary-encoded fields
-/// \param[in] options IPC options for reading
-/// \return the read record batch
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const Message& message, const std::shared_ptr<Schema>& schema,
- const DictionaryMemo* dictionary_memo, const IpcReadOptions& options);
-
-/// Read record batch from file given metadata and schema
-///
-/// \param[in] metadata a Message containing the record batch metadata
-/// \param[in] schema the record batch schema
-/// \param[in] dictionary_memo DictionaryMemo which has any
-/// dictionaries. Can be nullptr if you are sure there are no
-/// dictionary-encoded fields
-/// \param[in] file a random access file
-/// \param[in] options options for deserialization
-/// \return the read record batch
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
- const Buffer& metadata, const std::shared_ptr<Schema>& schema,
- const DictionaryMemo* dictionary_memo, const IpcReadOptions& options,
- io::RandomAccessFile* file);
-
-/// \brief Read arrow::Tensor as encapsulated IPC message in file
-///
-/// \param[in] file an InputStream pointed at the start of the message
-/// \return the read tensor
-ARROW_EXPORT
-Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file);
-
-/// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message
-///
-/// \param[in] message a Message containing the tensor metadata and body
-/// \return the read tensor
-ARROW_EXPORT
-Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message);
-
-/// \brief EXPERIMENTAL: Read arrow::SparseTensor as encapsulated IPC message in file
-///
-/// \param[in] file an InputStream pointed at the start of the message
-/// \return the read sparse tensor
-ARROW_EXPORT
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file);
-
-/// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
-///
-/// \param[in] message a Message containing the tensor metadata and body
-/// \return the read sparse tensor
-ARROW_EXPORT
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message);
-
-namespace internal {
-
-// These internal APIs may change without warning or deprecation
-
-/// \brief EXPERIMENTAL: Read arrow::SparseTensorFormat::type from a metadata
-/// \param[in] metadata a Buffer containing the sparse tensor metadata
-/// \return the count of the body buffers
-ARROW_EXPORT
-Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata);
-
-/// \brief EXPERIMENTAL: Read arrow::SparseTensor from an IpcPayload
-/// \param[in] payload a IpcPayload contains a serialized SparseTensor
-/// \return the read sparse tensor
-ARROW_EXPORT
-Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload);
-
-// For fuzzing targets
-ARROW_EXPORT
-Status FuzzIpcStream(const uint8_t* data, int64_t size);
-ARROW_EXPORT
+ private:
+ class StreamDecoderImpl;
+ std::unique_ptr<StreamDecoderImpl> impl_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(StreamDecoder);
+};
+
+// Generic read functions; does not copy data if the input supports zero copy reads
+
+/// \brief Read Schema from stream serialized as a single IPC message
+/// and populate any dictionary-encoded fields into a DictionaryMemo
+///
+/// \param[in] stream an InputStream
+/// \param[in] dictionary_memo for recording dictionary-encoded fields
+/// \return the output Schema
+///
+/// If record batches follow the schema, it is better to use
+/// RecordBatchStreamReader
+ARROW_EXPORT
+Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
+ DictionaryMemo* dictionary_memo);
+
+/// \brief Read Schema from encapsulated Message
+///
+/// \param[in] message the message containing the Schema IPC metadata
+/// \param[in] dictionary_memo DictionaryMemo for recording dictionary-encoded
+/// fields. Can be nullptr if you are sure there are no
+/// dictionary-encoded fields
+/// \return the resulting Schema
+ARROW_EXPORT
+Result<std::shared_ptr<Schema>> ReadSchema(const Message& message,
+ DictionaryMemo* dictionary_memo);
+
+/// Read record batch as encapsulated IPC message with metadata size prefix and
+/// header
+///
+/// \param[in] schema the record batch schema
+/// \param[in] dictionary_memo DictionaryMemo which has any
+/// dictionaries. Can be nullptr if you are sure there are no
+/// dictionary-encoded fields
+/// \param[in] options IPC options for reading
+/// \param[in] stream the file where the batch is located
+/// \return the read record batch
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo,
+ const IpcReadOptions& options, io::InputStream* stream);
+
+/// \brief Read record batch from message
+///
+/// \param[in] message a Message containing the record batch metadata
+/// \param[in] schema the record batch schema
+/// \param[in] dictionary_memo DictionaryMemo which has any
+/// dictionaries. Can be nullptr if you are sure there are no
+/// dictionary-encoded fields
+/// \param[in] options IPC options for reading
+/// \return the read record batch
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const Message& message, const std::shared_ptr<Schema>& schema,
+ const DictionaryMemo* dictionary_memo, const IpcReadOptions& options);
+
+/// Read record batch from file given metadata and schema
+///
+/// \param[in] metadata a Message containing the record batch metadata
+/// \param[in] schema the record batch schema
+/// \param[in] dictionary_memo DictionaryMemo which has any
+/// dictionaries. Can be nullptr if you are sure there are no
+/// dictionary-encoded fields
+/// \param[in] file a random access file
+/// \param[in] options options for deserialization
+/// \return the read record batch
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+ const Buffer& metadata, const std::shared_ptr<Schema>& schema,
+ const DictionaryMemo* dictionary_memo, const IpcReadOptions& options,
+ io::RandomAccessFile* file);
+
+/// \brief Read arrow::Tensor as encapsulated IPC message in file
+///
+/// \param[in] file an InputStream pointed at the start of the message
+/// \return the read tensor
+ARROW_EXPORT
+Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file);
+
+/// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message
+///
+/// \param[in] message a Message containing the tensor metadata and body
+/// \return the read tensor
+ARROW_EXPORT
+Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message);
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensor as encapsulated IPC message in file
+///
+/// \param[in] file an InputStream pointed at the start of the message
+/// \return the read sparse tensor
+ARROW_EXPORT
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file);
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
+///
+/// \param[in] message a Message containing the tensor metadata and body
+/// \return the read sparse tensor
+ARROW_EXPORT
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message);
+
+namespace internal {
+
+// These internal APIs may change without warning or deprecation
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensorFormat::type from a metadata
+/// \param[in] metadata a Buffer containing the sparse tensor metadata
+/// \return the count of the body buffers
+ARROW_EXPORT
+Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata);
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensor from an IpcPayload
+/// \param[in] payload a IpcPayload contains a serialized SparseTensor
+/// \return the read sparse tensor
+ARROW_EXPORT
+Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload);
+
+// For fuzzing targets
+ARROW_EXPORT
+Status FuzzIpcStream(const uint8_t* data, int64_t size);
+ARROW_EXPORT
Status FuzzIpcTensorStream(const uint8_t* data, int64_t size);
ARROW_EXPORT
-Status FuzzIpcFile(const uint8_t* data, int64_t size);
-
-} // namespace internal
-
-} // namespace ipc
-} // namespace arrow
+Status FuzzIpcFile(const uint8_t* data, int64_t size);
+
+} // namespace internal
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
index 3493c4f1409..39f53a0864d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
@@ -1,65 +1,65 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-namespace arrow {
-namespace ipc {
-
-enum class MetadataVersion : char {
- /// 0.1.0
- V1,
-
- /// 0.2.0
- V2,
-
- /// 0.3.0 to 0.7.1
- V3,
-
- /// 0.8.0 to 0.17.0
- V4,
-
- /// >= 1.0.0
- V5
-};
-
-class Message;
-enum class MessageType {
- NONE,
- SCHEMA,
- DICTIONARY_BATCH,
- RECORD_BATCH,
- TENSOR,
- SPARSE_TENSOR
-};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+namespace ipc {
+
+enum class MetadataVersion : char {
+ /// 0.1.0
+ V1,
+
+ /// 0.2.0
+ V2,
+
+ /// 0.3.0 to 0.7.1
+ V3,
+
+ /// 0.8.0 to 0.17.0
+ V4,
+
+ /// >= 1.0.0
+ V5
+};
+
+class Message;
+enum class MessageType {
+ NONE,
+ SCHEMA,
+ DICTIONARY_BATCH,
+ RECORD_BATCH,
+ TENSOR,
+ SPARSE_TENSOR
+};
+
struct IpcReadOptions;
struct IpcWriteOptions;
-class MessageReader;
-
-class RecordBatchStreamReader;
-class RecordBatchFileReader;
-class RecordBatchWriter;
-
-namespace feather {
-
-class Reader;
-
-} // namespace feather
-} // namespace ipc
-} // namespace arrow
+class MessageReader;
+
+class RecordBatchStreamReader;
+class RecordBatchFileReader;
+class RecordBatchWriter;
+
+namespace feather {
+
+class Reader;
+
+} // namespace feather
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/util.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/util.h
index 709fedbf31b..f0d75620a2a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/util.h
@@ -1,41 +1,41 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-
-namespace arrow {
-namespace ipc {
-
-// Buffers are padded to 64-byte boundaries (for SIMD)
-static constexpr int32_t kArrowAlignment = 64;
-
-// Tensors are padded to 64-byte boundaries
-static constexpr int32_t kTensorAlignment = 64;
-
-// Align on 8-byte boundaries in IPC
-static constexpr int32_t kArrowIpcAlignment = 8;
-
-static constexpr uint8_t kPaddingBytes[kArrowAlignment] = {0};
-
-static inline int64_t PaddedLength(int64_t nbytes, int32_t alignment = kArrowAlignment) {
- return ((nbytes + alignment - 1) / alignment) * alignment;
-}
-
-} // namespace ipc
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+namespace arrow {
+namespace ipc {
+
+// Buffers are padded to 64-byte boundaries (for SIMD)
+static constexpr int32_t kArrowAlignment = 64;
+
+// Tensors are padded to 64-byte boundaries
+static constexpr int32_t kTensorAlignment = 64;
+
+// Align on 8-byte boundaries in IPC
+static constexpr int32_t kArrowIpcAlignment = 8;
+
+static constexpr uint8_t kPaddingBytes[kArrowAlignment] = {0};
+
+static inline int64_t PaddedLength(int64_t nbytes, int32_t alignment = kArrowAlignment) {
+ return ((nbytes + alignment - 1) / alignment) * alignment;
+}
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
index 7b9254b7e59..c9c1cad77f7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
@@ -1,75 +1,75 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/writer.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/device.h"
-#include "arrow/extension_type.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/io/memory.h"
-#include "arrow/ipc/dictionary.h"
-#include "arrow/ipc/message.h"
-#include "arrow/ipc/metadata_internal.h"
-#include "arrow/ipc/util.h"
-#include "arrow/record_batch.h"
-#include "arrow/result_internal.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/compression.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/writer.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/device.h"
+#include "arrow/extension_type.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/dictionary.h"
+#include "arrow/ipc/message.h"
+#include "arrow/ipc/metadata_internal.h"
+#include "arrow/ipc/util.h"
+#include "arrow/record_batch.h"
+#include "arrow/result_internal.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/compression.h"
#include "arrow/util/endian.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/make_unique.h"
-#include "arrow/util/parallel.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-using internal::CopyBitmap;
-using internal::GetByteWidth;
-
-namespace ipc {
-
-using internal::FileBlock;
-using internal::kArrowMagicBytes;
-
-namespace {
-
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/parallel.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+using internal::CopyBitmap;
+using internal::GetByteWidth;
+
+namespace ipc {
+
+using internal::FileBlock;
+using internal::kArrowMagicBytes;
+
+namespace {
+
bool HasNestedDict(const ArrayData& data) {
if (data.type->id() == Type::DICTIONARY) {
return true;
@@ -82,926 +82,926 @@ bool HasNestedDict(const ArrayData& data) {
return false;
}
-Status GetTruncatedBitmap(int64_t offset, int64_t length,
- const std::shared_ptr<Buffer> input, MemoryPool* pool,
- std::shared_ptr<Buffer>* buffer) {
- if (!input) {
- *buffer = input;
- return Status::OK();
- }
- int64_t min_length = PaddedLength(BitUtil::BytesForBits(length));
- if (offset != 0 || min_length < input->size()) {
- // With a sliced array / non-zero offset, we must copy the bitmap
- ARROW_ASSIGN_OR_RAISE(*buffer, CopyBitmap(pool, input->data(), offset, length));
- } else {
- *buffer = input;
- }
- return Status::OK();
-}
-
-Status GetTruncatedBuffer(int64_t offset, int64_t length, int32_t byte_width,
- const std::shared_ptr<Buffer> input, MemoryPool* pool,
- std::shared_ptr<Buffer>* buffer) {
- if (!input) {
- *buffer = input;
- return Status::OK();
- }
- int64_t padded_length = PaddedLength(length * byte_width);
- if (offset != 0 || padded_length < input->size()) {
- *buffer =
- SliceBuffer(input, offset * byte_width, std::min(padded_length, input->size()));
- } else {
- *buffer = input;
- }
- return Status::OK();
-}
-
-static inline bool NeedTruncate(int64_t offset, const Buffer* buffer,
- int64_t min_length) {
- // buffer can be NULL
- if (buffer == nullptr) {
- return false;
- }
- return offset != 0 || min_length < buffer->size();
-}
-
-class RecordBatchSerializer {
- public:
- RecordBatchSerializer(int64_t buffer_start_offset, const IpcWriteOptions& options,
- IpcPayload* out)
- : out_(out),
- options_(options),
- max_recursion_depth_(options.max_recursion_depth),
- buffer_start_offset_(buffer_start_offset) {
- DCHECK_GT(max_recursion_depth_, 0);
- }
-
- virtual ~RecordBatchSerializer() = default;
-
- Status VisitArray(const Array& arr) {
- static std::shared_ptr<Buffer> kNullBuffer = std::make_shared<Buffer>(nullptr, 0);
-
- if (max_recursion_depth_ <= 0) {
- return Status::Invalid("Max recursion depth reached");
- }
-
- if (!options_.allow_64bit && arr.length() > std::numeric_limits<int32_t>::max()) {
- return Status::CapacityError("Cannot write arrays larger than 2^31 - 1 in length");
- }
-
- // push back all common elements
- field_nodes_.push_back({arr.length(), arr.null_count(), 0});
-
- // In V4, null types have no validity bitmap
- // In V5 and later, null and union types have no validity bitmap
- if (internal::HasValidityBitmap(arr.type_id(), options_.metadata_version)) {
- if (arr.null_count() > 0) {
- std::shared_ptr<Buffer> bitmap;
- RETURN_NOT_OK(GetTruncatedBitmap(arr.offset(), arr.length(), arr.null_bitmap(),
- options_.memory_pool, &bitmap));
- out_->body_buffers.emplace_back(bitmap);
- } else {
- // Push a dummy zero-length buffer, not to be copied
- out_->body_buffers.emplace_back(kNullBuffer);
- }
- }
- return VisitType(arr);
- }
-
- // Override this for writing dictionary metadata
- virtual Status SerializeMetadata(int64_t num_rows) {
- return WriteRecordBatchMessage(num_rows, out_->body_length, custom_metadata_,
- field_nodes_, buffer_meta_, options_, &out_->metadata);
- }
-
- void AppendCustomMetadata(const std::string& key, const std::string& value) {
- if (!custom_metadata_) {
- custom_metadata_ = std::make_shared<KeyValueMetadata>();
- }
- custom_metadata_->Append(key, value);
- }
-
- Status CompressBuffer(const Buffer& buffer, util::Codec* codec,
- std::shared_ptr<Buffer>* out) {
- // Convert buffer to uncompressed-length-prefixed compressed buffer
- int64_t maximum_length = codec->MaxCompressedLen(buffer.size(), buffer.data());
- ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(maximum_length + sizeof(int64_t)));
-
- int64_t actual_length;
- ARROW_ASSIGN_OR_RAISE(actual_length,
- codec->Compress(buffer.size(), buffer.data(), maximum_length,
- result->mutable_data() + sizeof(int64_t)));
- *reinterpret_cast<int64_t*>(result->mutable_data()) =
- BitUtil::ToLittleEndian(buffer.size());
- *out = SliceBuffer(std::move(result), /*offset=*/0, actual_length + sizeof(int64_t));
- return Status::OK();
- }
-
- Status CompressBodyBuffers() {
- RETURN_NOT_OK(
- internal::CheckCompressionSupported(options_.codec->compression_type()));
-
- auto CompressOne = [&](size_t i) {
- if (out_->body_buffers[i]->size() > 0) {
- RETURN_NOT_OK(CompressBuffer(*out_->body_buffers[i], options_.codec.get(),
- &out_->body_buffers[i]));
- }
- return Status::OK();
- };
-
- return ::arrow::internal::OptionalParallelFor(
- options_.use_threads, static_cast<int>(out_->body_buffers.size()), CompressOne);
- }
-
- Status Assemble(const RecordBatch& batch) {
- if (field_nodes_.size() > 0) {
- field_nodes_.clear();
- buffer_meta_.clear();
- out_->body_buffers.clear();
- }
-
- // Perform depth-first traversal of the row-batch
- for (int i = 0; i < batch.num_columns(); ++i) {
- RETURN_NOT_OK(VisitArray(*batch.column(i)));
- }
-
- if (options_.codec != nullptr) {
- RETURN_NOT_OK(CompressBodyBuffers());
- }
-
- // The position for the start of a buffer relative to the passed frame of
- // reference. May be 0 or some other position in an address space
- int64_t offset = buffer_start_offset_;
-
- buffer_meta_.reserve(out_->body_buffers.size());
-
- // Construct the buffer metadata for the record batch header
- for (const auto& buffer : out_->body_buffers) {
- int64_t size = 0;
- int64_t padding = 0;
-
- // The buffer might be null if we are handling zero row lengths.
- if (buffer) {
- size = buffer->size();
- padding = BitUtil::RoundUpToMultipleOf8(size) - size;
- }
-
- buffer_meta_.push_back({offset, size});
- offset += size + padding;
- }
-
- out_->body_length = offset - buffer_start_offset_;
- DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
-
- // Now that we have computed the locations of all of the buffers in shared
- // memory, the data header can be converted to a flatbuffer and written out
- //
- // Note: The memory written here is prefixed by the size of the flatbuffer
- // itself as an int32_t.
- return SerializeMetadata(batch.num_rows());
- }
-
- template <typename ArrayType>
- Status GetZeroBasedValueOffsets(const ArrayType& array,
- std::shared_ptr<Buffer>* value_offsets) {
- // Share slicing logic between ListArray, BinaryArray and LargeBinaryArray
- using offset_type = typename ArrayType::offset_type;
-
- auto offsets = array.value_offsets();
-
- int64_t required_bytes = sizeof(offset_type) * (array.length() + 1);
- if (array.offset() != 0) {
- // If we have a non-zero offset, then the value offsets do not start at
- // zero. We must a) create a new offsets array with shifted offsets and
- // b) slice the values array accordingly
-
- ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
- AllocateBuffer(required_bytes, options_.memory_pool));
-
- offset_type* dest_offsets =
- reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
- const offset_type start_offset = array.value_offset(0);
-
- for (int i = 0; i < array.length(); ++i) {
- dest_offsets[i] = array.value_offset(i) - start_offset;
- }
- // Final offset
- dest_offsets[array.length()] = array.value_offset(array.length()) - start_offset;
- offsets = std::move(shifted_offsets);
- } else {
- // ARROW-6046: Slice offsets to used extent, in case we have a truncated
- // slice
- if (offsets != nullptr && offsets->size() > required_bytes) {
- offsets = SliceBuffer(offsets, 0, required_bytes);
- }
- }
- *value_offsets = std::move(offsets);
- return Status::OK();
- }
-
- Status Visit(const BooleanArray& array) {
- std::shared_ptr<Buffer> data;
- RETURN_NOT_OK(GetTruncatedBitmap(array.offset(), array.length(), array.values(),
- options_.memory_pool, &data));
- out_->body_buffers.emplace_back(data);
- return Status::OK();
- }
-
- Status Visit(const NullArray& array) { return Status::OK(); }
-
- template <typename T>
- typename std::enable_if<is_number_type<typename T::TypeClass>::value ||
- is_temporal_type<typename T::TypeClass>::value ||
- is_fixed_size_binary_type<typename T::TypeClass>::value,
- Status>::type
- Visit(const T& array) {
- std::shared_ptr<Buffer> data = array.values();
-
- const int64_t type_width = GetByteWidth(*array.type());
- int64_t min_length = PaddedLength(array.length() * type_width);
-
- if (NeedTruncate(array.offset(), data.get(), min_length)) {
- // Non-zero offset, slice the buffer
- const int64_t byte_offset = array.offset() * type_width;
-
- // Send padding if it's available
- const int64_t buffer_length =
- std::min(BitUtil::RoundUpToMultipleOf8(array.length() * type_width),
- data->size() - byte_offset);
- data = SliceBuffer(data, byte_offset, buffer_length);
- }
- out_->body_buffers.emplace_back(data);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_base_binary<typename T::TypeClass, Status> Visit(const T& array) {
- std::shared_ptr<Buffer> value_offsets;
- RETURN_NOT_OK(GetZeroBasedValueOffsets<T>(array, &value_offsets));
- auto data = array.value_data();
-
- int64_t total_data_bytes = 0;
- if (value_offsets) {
- total_data_bytes = array.value_offset(array.length()) - array.value_offset(0);
- }
- if (NeedTruncate(array.offset(), data.get(), total_data_bytes)) {
- // Slice the data buffer to include only the range we need now
- const int64_t start_offset = array.value_offset(0);
- const int64_t slice_length =
- std::min(PaddedLength(total_data_bytes), data->size() - start_offset);
- data = SliceBuffer(data, start_offset, slice_length);
- }
-
- out_->body_buffers.emplace_back(value_offsets);
- out_->body_buffers.emplace_back(data);
- return Status::OK();
- }
-
- template <typename T>
- enable_if_base_list<typename T::TypeClass, Status> Visit(const T& array) {
- using offset_type = typename T::offset_type;
-
- std::shared_ptr<Buffer> value_offsets;
- RETURN_NOT_OK(GetZeroBasedValueOffsets<T>(array, &value_offsets));
- out_->body_buffers.emplace_back(value_offsets);
-
- --max_recursion_depth_;
- std::shared_ptr<Array> values = array.values();
-
- offset_type values_offset = 0;
- offset_type values_length = 0;
- if (value_offsets) {
- values_offset = array.value_offset(0);
- values_length = array.value_offset(array.length()) - values_offset;
- }
-
- if (array.offset() != 0 || values_length < values->length()) {
- // Must also slice the values
- values = values->Slice(values_offset, values_length);
- }
- RETURN_NOT_OK(VisitArray(*values));
- ++max_recursion_depth_;
- return Status::OK();
- }
-
- Status Visit(const FixedSizeListArray& array) {
- --max_recursion_depth_;
- auto size = array.list_type()->list_size();
- auto values = array.values()->Slice(array.offset() * size, array.length() * size);
-
- RETURN_NOT_OK(VisitArray(*values));
- ++max_recursion_depth_;
- return Status::OK();
- }
-
- Status Visit(const StructArray& array) {
- --max_recursion_depth_;
- for (int i = 0; i < array.num_fields(); ++i) {
- std::shared_ptr<Array> field = array.field(i);
- RETURN_NOT_OK(VisitArray(*field));
- }
- ++max_recursion_depth_;
- return Status::OK();
- }
-
- Status Visit(const SparseUnionArray& array) {
- const int64_t offset = array.offset();
- const int64_t length = array.length();
-
- std::shared_ptr<Buffer> type_codes;
- RETURN_NOT_OK(GetTruncatedBuffer(
- offset, length, static_cast<int32_t>(sizeof(UnionArray::type_code_t)),
- array.type_codes(), options_.memory_pool, &type_codes));
- out_->body_buffers.emplace_back(type_codes);
-
- --max_recursion_depth_;
- for (int i = 0; i < array.num_fields(); ++i) {
- // Sparse union, slicing is done for us by field()
- RETURN_NOT_OK(VisitArray(*array.field(i)));
- }
- ++max_recursion_depth_;
- return Status::OK();
- }
-
- Status Visit(const DenseUnionArray& array) {
- const int64_t offset = array.offset();
- const int64_t length = array.length();
-
- std::shared_ptr<Buffer> type_codes;
- RETURN_NOT_OK(GetTruncatedBuffer(
- offset, length, static_cast<int32_t>(sizeof(UnionArray::type_code_t)),
- array.type_codes(), options_.memory_pool, &type_codes));
- out_->body_buffers.emplace_back(type_codes);
-
- --max_recursion_depth_;
- const auto& type = checked_cast<const UnionType&>(*array.type());
-
- std::shared_ptr<Buffer> value_offsets;
- RETURN_NOT_OK(
- GetTruncatedBuffer(offset, length, static_cast<int32_t>(sizeof(int32_t)),
- array.value_offsets(), options_.memory_pool, &value_offsets));
-
- // The Union type codes are not necessary 0-indexed
- int8_t max_code = 0;
- for (int8_t code : type.type_codes()) {
- if (code > max_code) {
- max_code = code;
- }
- }
-
- // Allocate an array of child offsets. Set all to -1 to indicate that we
- // haven't observed a first occurrence of a particular child yet
- std::vector<int32_t> child_offsets(max_code + 1, -1);
- std::vector<int32_t> child_lengths(max_code + 1, 0);
-
- if (offset != 0) {
- // This is an unpleasant case. Because the offsets are different for
- // each child array, when we have a sliced array, we need to "rebase"
- // the value_offsets for each array
-
- const int32_t* unshifted_offsets = array.raw_value_offsets();
- const int8_t* type_codes = array.raw_type_codes();
-
- // Allocate the shifted offsets
- ARROW_ASSIGN_OR_RAISE(
- auto shifted_offsets_buffer,
- AllocateBuffer(length * sizeof(int32_t), options_.memory_pool));
- int32_t* shifted_offsets =
- reinterpret_cast<int32_t*>(shifted_offsets_buffer->mutable_data());
-
- // Offsets may not be ascending, so we need to find out the start offset
- // for each child
- for (int64_t i = 0; i < length; ++i) {
- const uint8_t code = type_codes[i];
- if (child_offsets[code] == -1) {
- child_offsets[code] = unshifted_offsets[i];
- } else {
- child_offsets[code] = std::min(child_offsets[code], unshifted_offsets[i]);
- }
- }
-
- // Now compute shifted offsets by subtracting child offset
- for (int64_t i = 0; i < length; ++i) {
- const int8_t code = type_codes[i];
- shifted_offsets[i] = unshifted_offsets[i] - child_offsets[code];
- // Update the child length to account for observed value
- child_lengths[code] = std::max(child_lengths[code], shifted_offsets[i] + 1);
- }
-
- value_offsets = std::move(shifted_offsets_buffer);
- }
- out_->body_buffers.emplace_back(value_offsets);
-
- // Visit children and slice accordingly
- for (int i = 0; i < type.num_fields(); ++i) {
- std::shared_ptr<Array> child = array.field(i);
-
- // TODO: ARROW-809, for sliced unions, tricky to know how much to
- // truncate the children. For now, we are truncating the children to be
- // no longer than the parent union.
- if (offset != 0) {
- const int8_t code = type.type_codes()[i];
- const int64_t child_offset = child_offsets[code];
- const int64_t child_length = child_lengths[code];
-
- if (child_offset > 0) {
- child = child->Slice(child_offset, child_length);
- } else if (child_length < child->length()) {
- // This case includes when child is not encountered at all
- child = child->Slice(0, child_length);
- }
- }
- RETURN_NOT_OK(VisitArray(*child));
- }
- ++max_recursion_depth_;
- return Status::OK();
- }
-
- Status Visit(const DictionaryArray& array) {
- // Dictionary written out separately. Slice offset contained in the indices
- return VisitType(*array.indices());
- }
-
- Status Visit(const ExtensionArray& array) { return VisitType(*array.storage()); }
-
- Status VisitType(const Array& values) { return VisitArrayInline(values, this); }
-
- protected:
- // Destination for output buffers
- IpcPayload* out_;
-
- std::shared_ptr<KeyValueMetadata> custom_metadata_;
-
- std::vector<internal::FieldMetadata> field_nodes_;
- std::vector<internal::BufferMetadata> buffer_meta_;
-
- const IpcWriteOptions& options_;
- int64_t max_recursion_depth_;
- int64_t buffer_start_offset_;
-};
-
-class DictionarySerializer : public RecordBatchSerializer {
- public:
- DictionarySerializer(int64_t dictionary_id, bool is_delta, int64_t buffer_start_offset,
- const IpcWriteOptions& options, IpcPayload* out)
- : RecordBatchSerializer(buffer_start_offset, options, out),
- dictionary_id_(dictionary_id),
- is_delta_(is_delta) {}
-
- Status SerializeMetadata(int64_t num_rows) override {
- return WriteDictionaryMessage(dictionary_id_, is_delta_, num_rows, out_->body_length,
- custom_metadata_, field_nodes_, buffer_meta_, options_,
- &out_->metadata);
- }
-
- Status Assemble(const std::shared_ptr<Array>& dictionary) {
- // Make a dummy record batch. A bit tedious as we have to make a schema
- auto schema = arrow::schema({arrow::field("dictionary", dictionary->type())});
+Status GetTruncatedBitmap(int64_t offset, int64_t length,
+ const std::shared_ptr<Buffer> input, MemoryPool* pool,
+ std::shared_ptr<Buffer>* buffer) {
+ if (!input) {
+ *buffer = input;
+ return Status::OK();
+ }
+ int64_t min_length = PaddedLength(BitUtil::BytesForBits(length));
+ if (offset != 0 || min_length < input->size()) {
+ // With a sliced array / non-zero offset, we must copy the bitmap
+ ARROW_ASSIGN_OR_RAISE(*buffer, CopyBitmap(pool, input->data(), offset, length));
+ } else {
+ *buffer = input;
+ }
+ return Status::OK();
+}
+
+Status GetTruncatedBuffer(int64_t offset, int64_t length, int32_t byte_width,
+ const std::shared_ptr<Buffer> input, MemoryPool* pool,
+ std::shared_ptr<Buffer>* buffer) {
+ if (!input) {
+ *buffer = input;
+ return Status::OK();
+ }
+ int64_t padded_length = PaddedLength(length * byte_width);
+ if (offset != 0 || padded_length < input->size()) {
+ *buffer =
+ SliceBuffer(input, offset * byte_width, std::min(padded_length, input->size()));
+ } else {
+ *buffer = input;
+ }
+ return Status::OK();
+}
+
+static inline bool NeedTruncate(int64_t offset, const Buffer* buffer,
+ int64_t min_length) {
+ // buffer can be NULL
+ if (buffer == nullptr) {
+ return false;
+ }
+ return offset != 0 || min_length < buffer->size();
+}
+
+class RecordBatchSerializer {
+ public:
+ RecordBatchSerializer(int64_t buffer_start_offset, const IpcWriteOptions& options,
+ IpcPayload* out)
+ : out_(out),
+ options_(options),
+ max_recursion_depth_(options.max_recursion_depth),
+ buffer_start_offset_(buffer_start_offset) {
+ DCHECK_GT(max_recursion_depth_, 0);
+ }
+
+ virtual ~RecordBatchSerializer() = default;
+
+ Status VisitArray(const Array& arr) {
+ static std::shared_ptr<Buffer> kNullBuffer = std::make_shared<Buffer>(nullptr, 0);
+
+ if (max_recursion_depth_ <= 0) {
+ return Status::Invalid("Max recursion depth reached");
+ }
+
+ if (!options_.allow_64bit && arr.length() > std::numeric_limits<int32_t>::max()) {
+ return Status::CapacityError("Cannot write arrays larger than 2^31 - 1 in length");
+ }
+
+ // push back all common elements
+ field_nodes_.push_back({arr.length(), arr.null_count(), 0});
+
+ // In V4, null types have no validity bitmap
+ // In V5 and later, null and union types have no validity bitmap
+ if (internal::HasValidityBitmap(arr.type_id(), options_.metadata_version)) {
+ if (arr.null_count() > 0) {
+ std::shared_ptr<Buffer> bitmap;
+ RETURN_NOT_OK(GetTruncatedBitmap(arr.offset(), arr.length(), arr.null_bitmap(),
+ options_.memory_pool, &bitmap));
+ out_->body_buffers.emplace_back(bitmap);
+ } else {
+ // Push a dummy zero-length buffer, not to be copied
+ out_->body_buffers.emplace_back(kNullBuffer);
+ }
+ }
+ return VisitType(arr);
+ }
+
+ // Override this for writing dictionary metadata
+ virtual Status SerializeMetadata(int64_t num_rows) {
+ return WriteRecordBatchMessage(num_rows, out_->body_length, custom_metadata_,
+ field_nodes_, buffer_meta_, options_, &out_->metadata);
+ }
+
+ void AppendCustomMetadata(const std::string& key, const std::string& value) {
+ if (!custom_metadata_) {
+ custom_metadata_ = std::make_shared<KeyValueMetadata>();
+ }
+ custom_metadata_->Append(key, value);
+ }
+
+ Status CompressBuffer(const Buffer& buffer, util::Codec* codec,
+ std::shared_ptr<Buffer>* out) {
+ // Convert buffer to uncompressed-length-prefixed compressed buffer
+ int64_t maximum_length = codec->MaxCompressedLen(buffer.size(), buffer.data());
+ ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(maximum_length + sizeof(int64_t)));
+
+ int64_t actual_length;
+ ARROW_ASSIGN_OR_RAISE(actual_length,
+ codec->Compress(buffer.size(), buffer.data(), maximum_length,
+ result->mutable_data() + sizeof(int64_t)));
+ *reinterpret_cast<int64_t*>(result->mutable_data()) =
+ BitUtil::ToLittleEndian(buffer.size());
+ *out = SliceBuffer(std::move(result), /*offset=*/0, actual_length + sizeof(int64_t));
+ return Status::OK();
+ }
+
+ Status CompressBodyBuffers() {
+ RETURN_NOT_OK(
+ internal::CheckCompressionSupported(options_.codec->compression_type()));
+
+ auto CompressOne = [&](size_t i) {
+ if (out_->body_buffers[i]->size() > 0) {
+ RETURN_NOT_OK(CompressBuffer(*out_->body_buffers[i], options_.codec.get(),
+ &out_->body_buffers[i]));
+ }
+ return Status::OK();
+ };
+
+ return ::arrow::internal::OptionalParallelFor(
+ options_.use_threads, static_cast<int>(out_->body_buffers.size()), CompressOne);
+ }
+
+ Status Assemble(const RecordBatch& batch) {
+ if (field_nodes_.size() > 0) {
+ field_nodes_.clear();
+ buffer_meta_.clear();
+ out_->body_buffers.clear();
+ }
+
+ // Perform depth-first traversal of the row-batch
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ RETURN_NOT_OK(VisitArray(*batch.column(i)));
+ }
+
+ if (options_.codec != nullptr) {
+ RETURN_NOT_OK(CompressBodyBuffers());
+ }
+
+ // The position for the start of a buffer relative to the passed frame of
+ // reference. May be 0 or some other position in an address space
+ int64_t offset = buffer_start_offset_;
+
+ buffer_meta_.reserve(out_->body_buffers.size());
+
+ // Construct the buffer metadata for the record batch header
+ for (const auto& buffer : out_->body_buffers) {
+ int64_t size = 0;
+ int64_t padding = 0;
+
+ // The buffer might be null if we are handling zero row lengths.
+ if (buffer) {
+ size = buffer->size();
+ padding = BitUtil::RoundUpToMultipleOf8(size) - size;
+ }
+
+ buffer_meta_.push_back({offset, size});
+ offset += size + padding;
+ }
+
+ out_->body_length = offset - buffer_start_offset_;
+ DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
+
+ // Now that we have computed the locations of all of the buffers in shared
+ // memory, the data header can be converted to a flatbuffer and written out
+ //
+ // Note: The memory written here is prefixed by the size of the flatbuffer
+ // itself as an int32_t.
+ return SerializeMetadata(batch.num_rows());
+ }
+
+ template <typename ArrayType>
+ Status GetZeroBasedValueOffsets(const ArrayType& array,
+ std::shared_ptr<Buffer>* value_offsets) {
+ // Share slicing logic between ListArray, BinaryArray and LargeBinaryArray
+ using offset_type = typename ArrayType::offset_type;
+
+ auto offsets = array.value_offsets();
+
+ int64_t required_bytes = sizeof(offset_type) * (array.length() + 1);
+ if (array.offset() != 0) {
+ // If we have a non-zero offset, then the value offsets do not start at
+ // zero. We must a) create a new offsets array with shifted offsets and
+ // b) slice the values array accordingly
+
+ ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
+ AllocateBuffer(required_bytes, options_.memory_pool));
+
+ offset_type* dest_offsets =
+ reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
+ const offset_type start_offset = array.value_offset(0);
+
+ for (int i = 0; i < array.length(); ++i) {
+ dest_offsets[i] = array.value_offset(i) - start_offset;
+ }
+ // Final offset
+ dest_offsets[array.length()] = array.value_offset(array.length()) - start_offset;
+ offsets = std::move(shifted_offsets);
+ } else {
+ // ARROW-6046: Slice offsets to used extent, in case we have a truncated
+ // slice
+ if (offsets != nullptr && offsets->size() > required_bytes) {
+ offsets = SliceBuffer(offsets, 0, required_bytes);
+ }
+ }
+ *value_offsets = std::move(offsets);
+ return Status::OK();
+ }
+
+ Status Visit(const BooleanArray& array) {
+ std::shared_ptr<Buffer> data;
+ RETURN_NOT_OK(GetTruncatedBitmap(array.offset(), array.length(), array.values(),
+ options_.memory_pool, &data));
+ out_->body_buffers.emplace_back(data);
+ return Status::OK();
+ }
+
+ Status Visit(const NullArray& array) { return Status::OK(); }
+
+ template <typename T>
+ typename std::enable_if<is_number_type<typename T::TypeClass>::value ||
+ is_temporal_type<typename T::TypeClass>::value ||
+ is_fixed_size_binary_type<typename T::TypeClass>::value,
+ Status>::type
+ Visit(const T& array) {
+ std::shared_ptr<Buffer> data = array.values();
+
+ const int64_t type_width = GetByteWidth(*array.type());
+ int64_t min_length = PaddedLength(array.length() * type_width);
+
+ if (NeedTruncate(array.offset(), data.get(), min_length)) {
+ // Non-zero offset, slice the buffer
+ const int64_t byte_offset = array.offset() * type_width;
+
+ // Send padding if it's available
+ const int64_t buffer_length =
+ std::min(BitUtil::RoundUpToMultipleOf8(array.length() * type_width),
+ data->size() - byte_offset);
+ data = SliceBuffer(data, byte_offset, buffer_length);
+ }
+ out_->body_buffers.emplace_back(data);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_base_binary<typename T::TypeClass, Status> Visit(const T& array) {
+ std::shared_ptr<Buffer> value_offsets;
+ RETURN_NOT_OK(GetZeroBasedValueOffsets<T>(array, &value_offsets));
+ auto data = array.value_data();
+
+ int64_t total_data_bytes = 0;
+ if (value_offsets) {
+ total_data_bytes = array.value_offset(array.length()) - array.value_offset(0);
+ }
+ if (NeedTruncate(array.offset(), data.get(), total_data_bytes)) {
+ // Slice the data buffer to include only the range we need now
+ const int64_t start_offset = array.value_offset(0);
+ const int64_t slice_length =
+ std::min(PaddedLength(total_data_bytes), data->size() - start_offset);
+ data = SliceBuffer(data, start_offset, slice_length);
+ }
+
+ out_->body_buffers.emplace_back(value_offsets);
+ out_->body_buffers.emplace_back(data);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_base_list<typename T::TypeClass, Status> Visit(const T& array) {
+ using offset_type = typename T::offset_type;
+
+ std::shared_ptr<Buffer> value_offsets;
+ RETURN_NOT_OK(GetZeroBasedValueOffsets<T>(array, &value_offsets));
+ out_->body_buffers.emplace_back(value_offsets);
+
+ --max_recursion_depth_;
+ std::shared_ptr<Array> values = array.values();
+
+ offset_type values_offset = 0;
+ offset_type values_length = 0;
+ if (value_offsets) {
+ values_offset = array.value_offset(0);
+ values_length = array.value_offset(array.length()) - values_offset;
+ }
+
+ if (array.offset() != 0 || values_length < values->length()) {
+ // Must also slice the values
+ values = values->Slice(values_offset, values_length);
+ }
+ RETURN_NOT_OK(VisitArray(*values));
+ ++max_recursion_depth_;
+ return Status::OK();
+ }
+
+ Status Visit(const FixedSizeListArray& array) {
+ --max_recursion_depth_;
+ auto size = array.list_type()->list_size();
+ auto values = array.values()->Slice(array.offset() * size, array.length() * size);
+
+ RETURN_NOT_OK(VisitArray(*values));
+ ++max_recursion_depth_;
+ return Status::OK();
+ }
+
+ Status Visit(const StructArray& array) {
+ --max_recursion_depth_;
+ for (int i = 0; i < array.num_fields(); ++i) {
+ std::shared_ptr<Array> field = array.field(i);
+ RETURN_NOT_OK(VisitArray(*field));
+ }
+ ++max_recursion_depth_;
+ return Status::OK();
+ }
+
+ Status Visit(const SparseUnionArray& array) {
+ const int64_t offset = array.offset();
+ const int64_t length = array.length();
+
+ std::shared_ptr<Buffer> type_codes;
+ RETURN_NOT_OK(GetTruncatedBuffer(
+ offset, length, static_cast<int32_t>(sizeof(UnionArray::type_code_t)),
+ array.type_codes(), options_.memory_pool, &type_codes));
+ out_->body_buffers.emplace_back(type_codes);
+
+ --max_recursion_depth_;
+ for (int i = 0; i < array.num_fields(); ++i) {
+ // Sparse union, slicing is done for us by field()
+ RETURN_NOT_OK(VisitArray(*array.field(i)));
+ }
+ ++max_recursion_depth_;
+ return Status::OK();
+ }
+
+ Status Visit(const DenseUnionArray& array) {
+ const int64_t offset = array.offset();
+ const int64_t length = array.length();
+
+ std::shared_ptr<Buffer> type_codes;
+ RETURN_NOT_OK(GetTruncatedBuffer(
+ offset, length, static_cast<int32_t>(sizeof(UnionArray::type_code_t)),
+ array.type_codes(), options_.memory_pool, &type_codes));
+ out_->body_buffers.emplace_back(type_codes);
+
+ --max_recursion_depth_;
+ const auto& type = checked_cast<const UnionType&>(*array.type());
+
+ std::shared_ptr<Buffer> value_offsets;
+ RETURN_NOT_OK(
+ GetTruncatedBuffer(offset, length, static_cast<int32_t>(sizeof(int32_t)),
+ array.value_offsets(), options_.memory_pool, &value_offsets));
+
+ // The Union type codes are not necessary 0-indexed
+ int8_t max_code = 0;
+ for (int8_t code : type.type_codes()) {
+ if (code > max_code) {
+ max_code = code;
+ }
+ }
+
+ // Allocate an array of child offsets. Set all to -1 to indicate that we
+ // haven't observed a first occurrence of a particular child yet
+ std::vector<int32_t> child_offsets(max_code + 1, -1);
+ std::vector<int32_t> child_lengths(max_code + 1, 0);
+
+ if (offset != 0) {
+ // This is an unpleasant case. Because the offsets are different for
+ // each child array, when we have a sliced array, we need to "rebase"
+ // the value_offsets for each array
+
+ const int32_t* unshifted_offsets = array.raw_value_offsets();
+ const int8_t* type_codes = array.raw_type_codes();
+
+ // Allocate the shifted offsets
+ ARROW_ASSIGN_OR_RAISE(
+ auto shifted_offsets_buffer,
+ AllocateBuffer(length * sizeof(int32_t), options_.memory_pool));
+ int32_t* shifted_offsets =
+ reinterpret_cast<int32_t*>(shifted_offsets_buffer->mutable_data());
+
+ // Offsets may not be ascending, so we need to find out the start offset
+ // for each child
+ for (int64_t i = 0; i < length; ++i) {
+ const uint8_t code = type_codes[i];
+ if (child_offsets[code] == -1) {
+ child_offsets[code] = unshifted_offsets[i];
+ } else {
+ child_offsets[code] = std::min(child_offsets[code], unshifted_offsets[i]);
+ }
+ }
+
+ // Now compute shifted offsets by subtracting child offset
+ for (int64_t i = 0; i < length; ++i) {
+ const int8_t code = type_codes[i];
+ shifted_offsets[i] = unshifted_offsets[i] - child_offsets[code];
+ // Update the child length to account for observed value
+ child_lengths[code] = std::max(child_lengths[code], shifted_offsets[i] + 1);
+ }
+
+ value_offsets = std::move(shifted_offsets_buffer);
+ }
+ out_->body_buffers.emplace_back(value_offsets);
+
+ // Visit children and slice accordingly
+ for (int i = 0; i < type.num_fields(); ++i) {
+ std::shared_ptr<Array> child = array.field(i);
+
+ // TODO: ARROW-809, for sliced unions, tricky to know how much to
+ // truncate the children. For now, we are truncating the children to be
+ // no longer than the parent union.
+ if (offset != 0) {
+ const int8_t code = type.type_codes()[i];
+ const int64_t child_offset = child_offsets[code];
+ const int64_t child_length = child_lengths[code];
+
+ if (child_offset > 0) {
+ child = child->Slice(child_offset, child_length);
+ } else if (child_length < child->length()) {
+ // This case includes when child is not encountered at all
+ child = child->Slice(0, child_length);
+ }
+ }
+ RETURN_NOT_OK(VisitArray(*child));
+ }
+ ++max_recursion_depth_;
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryArray& array) {
+ // Dictionary written out separately. Slice offset contained in the indices
+ return VisitType(*array.indices());
+ }
+
+ Status Visit(const ExtensionArray& array) { return VisitType(*array.storage()); }
+
+ Status VisitType(const Array& values) { return VisitArrayInline(values, this); }
+
+ protected:
+ // Destination for output buffers
+ IpcPayload* out_;
+
+ std::shared_ptr<KeyValueMetadata> custom_metadata_;
+
+ std::vector<internal::FieldMetadata> field_nodes_;
+ std::vector<internal::BufferMetadata> buffer_meta_;
+
+ const IpcWriteOptions& options_;
+ int64_t max_recursion_depth_;
+ int64_t buffer_start_offset_;
+};
+
+class DictionarySerializer : public RecordBatchSerializer {
+ public:
+ DictionarySerializer(int64_t dictionary_id, bool is_delta, int64_t buffer_start_offset,
+ const IpcWriteOptions& options, IpcPayload* out)
+ : RecordBatchSerializer(buffer_start_offset, options, out),
+ dictionary_id_(dictionary_id),
+ is_delta_(is_delta) {}
+
+ Status SerializeMetadata(int64_t num_rows) override {
+ return WriteDictionaryMessage(dictionary_id_, is_delta_, num_rows, out_->body_length,
+ custom_metadata_, field_nodes_, buffer_meta_, options_,
+ &out_->metadata);
+ }
+
+ Status Assemble(const std::shared_ptr<Array>& dictionary) {
+ // Make a dummy record batch. A bit tedious as we have to make a schema
+ auto schema = arrow::schema({arrow::field("dictionary", dictionary->type())});
auto batch = RecordBatch::Make(std::move(schema), dictionary->length(), {dictionary});
- return RecordBatchSerializer::Assemble(*batch);
- }
-
- private:
- int64_t dictionary_id_;
- bool is_delta_;
-};
-
-} // namespace
-
-Status WriteIpcPayload(const IpcPayload& payload, const IpcWriteOptions& options,
- io::OutputStream* dst, int32_t* metadata_length) {
- RETURN_NOT_OK(WriteMessage(*payload.metadata, options, dst, metadata_length));
-
-#ifndef NDEBUG
- RETURN_NOT_OK(CheckAligned(dst));
-#endif
-
- // Now write the buffers
- for (size_t i = 0; i < payload.body_buffers.size(); ++i) {
- const std::shared_ptr<Buffer>& buffer = payload.body_buffers[i];
- int64_t size = 0;
- int64_t padding = 0;
-
- // The buffer might be null if we are handling zero row lengths.
- if (buffer) {
- size = buffer->size();
- padding = BitUtil::RoundUpToMultipleOf8(size) - size;
- }
-
- if (size > 0) {
- RETURN_NOT_OK(dst->Write(buffer));
- }
-
- if (padding > 0) {
- RETURN_NOT_OK(dst->Write(kPaddingBytes, padding));
- }
- }
-
-#ifndef NDEBUG
- RETURN_NOT_OK(CheckAligned(dst));
-#endif
-
- return Status::OK();
-}
-
-Status GetSchemaPayload(const Schema& schema, const IpcWriteOptions& options,
- const DictionaryFieldMapper& mapper, IpcPayload* out) {
- out->type = MessageType::SCHEMA;
- return internal::WriteSchemaMessage(schema, mapper, options, &out->metadata);
-}
-
-Status GetDictionaryPayload(int64_t id, const std::shared_ptr<Array>& dictionary,
- const IpcWriteOptions& options, IpcPayload* out) {
- return GetDictionaryPayload(id, false, dictionary, options, out);
-}
-
-Status GetDictionaryPayload(int64_t id, bool is_delta,
- const std::shared_ptr<Array>& dictionary,
- const IpcWriteOptions& options, IpcPayload* out) {
- out->type = MessageType::DICTIONARY_BATCH;
- // Frame of reference is 0, see ARROW-384
- DictionarySerializer assembler(id, is_delta, /*buffer_start_offset=*/0, options, out);
- return assembler.Assemble(dictionary);
-}
-
-Status GetRecordBatchPayload(const RecordBatch& batch, const IpcWriteOptions& options,
- IpcPayload* out) {
- out->type = MessageType::RECORD_BATCH;
- RecordBatchSerializer assembler(/*buffer_start_offset=*/0, options, out);
- return assembler.Assemble(batch);
-}
-
-Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
- io::OutputStream* dst, int32_t* metadata_length,
- int64_t* body_length, const IpcWriteOptions& options) {
- IpcPayload payload;
- RecordBatchSerializer assembler(buffer_start_offset, options, &payload);
- RETURN_NOT_OK(assembler.Assemble(batch));
-
- // TODO: it's a rough edge that the metadata and body length here are
- // computed separately
-
- // The body size is computed in the payload
- *body_length = payload.body_length;
-
- return WriteIpcPayload(payload, options, dst, metadata_length);
-}
-
-Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& batches,
- const IpcWriteOptions& options, io::OutputStream* dst) {
- ASSIGN_OR_RAISE(std::shared_ptr<RecordBatchWriter> writer,
- MakeStreamWriter(dst, batches[0]->schema(), options));
- for (const auto& batch : batches) {
- DCHECK(batch->schema()->Equals(*batches[0]->schema())) << "Schemas unequal";
- RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
- }
- RETURN_NOT_OK(writer->Close());
- return Status::OK();
-}
-
-namespace {
-
-Status WriteTensorHeader(const Tensor& tensor, io::OutputStream* dst,
- int32_t* metadata_length) {
- IpcWriteOptions options;
- options.alignment = kTensorAlignment;
- std::shared_ptr<Buffer> metadata;
- ARROW_ASSIGN_OR_RAISE(metadata, internal::WriteTensorMessage(tensor, 0, options));
- return WriteMessage(*metadata, options, dst, metadata_length);
-}
-
-Status WriteStridedTensorData(int dim_index, int64_t offset, int elem_size,
- const Tensor& tensor, uint8_t* scratch_space,
- io::OutputStream* dst) {
- if (dim_index == tensor.ndim() - 1) {
- const uint8_t* data_ptr = tensor.raw_data() + offset;
- const int64_t stride = tensor.strides()[dim_index];
- for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
- memcpy(scratch_space + i * elem_size, data_ptr, elem_size);
- data_ptr += stride;
- }
- return dst->Write(scratch_space, elem_size * tensor.shape()[dim_index]);
- }
- for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
- RETURN_NOT_OK(WriteStridedTensorData(dim_index + 1, offset, elem_size, tensor,
- scratch_space, dst));
- offset += tensor.strides()[dim_index];
- }
- return Status::OK();
-}
-
-Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,
- std::unique_ptr<Tensor>* out) {
- const int elem_size = GetByteWidth(*tensor.type());
-
- ARROW_ASSIGN_OR_RAISE(
- auto scratch_space,
- AllocateBuffer(tensor.shape()[tensor.ndim() - 1] * elem_size, pool));
-
- ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> contiguous_data,
- AllocateResizableBuffer(tensor.size() * elem_size, pool));
-
- io::BufferOutputStream stream(contiguous_data);
- RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
- scratch_space->mutable_data(), &stream));
-
- out->reset(new Tensor(tensor.type(), contiguous_data, tensor.shape()));
-
- return Status::OK();
-}
-
-} // namespace
-
-Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
- int64_t* body_length) {
- const int elem_size = GetByteWidth(*tensor.type());
-
- *body_length = tensor.size() * elem_size;
-
- // Tensor metadata accounts for padding
- if (tensor.is_contiguous()) {
- RETURN_NOT_OK(WriteTensorHeader(tensor, dst, metadata_length));
- auto data = tensor.data();
- if (data && data->data()) {
- RETURN_NOT_OK(dst->Write(data->data(), *body_length));
- } else {
- *body_length = 0;
- }
- } else {
- // The tensor written is made contiguous
- Tensor dummy(tensor.type(), nullptr, tensor.shape());
- RETURN_NOT_OK(WriteTensorHeader(dummy, dst, metadata_length));
-
- // TODO: Do we care enough about this temporary allocation to pass in a
- // MemoryPool to this function?
- ARROW_ASSIGN_OR_RAISE(auto scratch_space,
- AllocateBuffer(tensor.shape()[tensor.ndim() - 1] * elem_size));
-
- RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
- scratch_space->mutable_data(), dst));
- }
-
- return Status::OK();
-}
-
-Result<std::unique_ptr<Message>> GetTensorMessage(const Tensor& tensor,
- MemoryPool* pool) {
- const Tensor* tensor_to_write = &tensor;
- std::unique_ptr<Tensor> temp_tensor;
-
- if (!tensor.is_contiguous()) {
- RETURN_NOT_OK(GetContiguousTensor(tensor, pool, &temp_tensor));
- tensor_to_write = temp_tensor.get();
- }
-
- IpcWriteOptions options;
- options.alignment = kTensorAlignment;
- std::shared_ptr<Buffer> metadata;
- ARROW_ASSIGN_OR_RAISE(metadata,
- internal::WriteTensorMessage(*tensor_to_write, 0, options));
- return std::unique_ptr<Message>(new Message(metadata, tensor_to_write->data()));
-}
-
-namespace internal {
-
-class SparseTensorSerializer {
- public:
- SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out)
- : out_(out),
- buffer_start_offset_(buffer_start_offset),
- options_(IpcWriteOptions::Defaults()) {}
-
- ~SparseTensorSerializer() = default;
-
- Status VisitSparseIndex(const SparseIndex& sparse_index) {
- switch (sparse_index.format_id()) {
- case SparseTensorFormat::COO:
- RETURN_NOT_OK(
- VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
- break;
-
- case SparseTensorFormat::CSR:
- RETURN_NOT_OK(
- VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
- break;
-
- case SparseTensorFormat::CSC:
- RETURN_NOT_OK(
- VisitSparseCSCIndex(checked_cast<const SparseCSCIndex&>(sparse_index)));
- break;
-
- case SparseTensorFormat::CSF:
- RETURN_NOT_OK(
- VisitSparseCSFIndex(checked_cast<const SparseCSFIndex&>(sparse_index)));
- break;
-
- default:
- std::stringstream ss;
- ss << "Unable to convert type: " << sparse_index.ToString() << std::endl;
- return Status::NotImplemented(ss.str());
- }
-
- return Status::OK();
- }
-
- Status SerializeMetadata(const SparseTensor& sparse_tensor) {
- return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_,
- options_)
- .Value(&out_->metadata);
- }
-
- Status Assemble(const SparseTensor& sparse_tensor) {
- if (buffer_meta_.size() > 0) {
- buffer_meta_.clear();
- out_->body_buffers.clear();
- }
-
- RETURN_NOT_OK(VisitSparseIndex(*sparse_tensor.sparse_index()));
- out_->body_buffers.emplace_back(sparse_tensor.data());
-
- int64_t offset = buffer_start_offset_;
- buffer_meta_.reserve(out_->body_buffers.size());
-
- for (size_t i = 0; i < out_->body_buffers.size(); ++i) {
- const Buffer* buffer = out_->body_buffers[i].get();
- int64_t size = buffer->size();
- int64_t padding = BitUtil::RoundUpToMultipleOf8(size) - size;
- buffer_meta_.push_back({offset, size + padding});
- offset += size + padding;
- }
-
- out_->body_length = offset - buffer_start_offset_;
- DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
-
- return SerializeMetadata(sparse_tensor);
- }
-
- private:
- Status VisitSparseCOOIndex(const SparseCOOIndex& sparse_index) {
- out_->body_buffers.emplace_back(sparse_index.indices()->data());
- return Status::OK();
- }
-
- Status VisitSparseCSRIndex(const SparseCSRIndex& sparse_index) {
- out_->body_buffers.emplace_back(sparse_index.indptr()->data());
- out_->body_buffers.emplace_back(sparse_index.indices()->data());
- return Status::OK();
- }
-
- Status VisitSparseCSCIndex(const SparseCSCIndex& sparse_index) {
- out_->body_buffers.emplace_back(sparse_index.indptr()->data());
- out_->body_buffers.emplace_back(sparse_index.indices()->data());
- return Status::OK();
- }
-
- Status VisitSparseCSFIndex(const SparseCSFIndex& sparse_index) {
- for (const std::shared_ptr<arrow::Tensor>& indptr : sparse_index.indptr()) {
- out_->body_buffers.emplace_back(indptr->data());
- }
- for (const std::shared_ptr<arrow::Tensor>& indices : sparse_index.indices()) {
- out_->body_buffers.emplace_back(indices->data());
- }
- return Status::OK();
- }
-
- IpcPayload* out_;
-
- std::vector<internal::BufferMetadata> buffer_meta_;
- int64_t buffer_start_offset_;
- IpcWriteOptions options_;
-};
-
-} // namespace internal
-
-Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
- int32_t* metadata_length, int64_t* body_length) {
- IpcPayload payload;
- internal::SparseTensorSerializer writer(0, &payload);
- RETURN_NOT_OK(writer.Assemble(sparse_tensor));
-
- *body_length = payload.body_length;
- return WriteIpcPayload(payload, IpcWriteOptions::Defaults(), dst, metadata_length);
-}
-
-Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
- IpcPayload* out) {
- internal::SparseTensorSerializer writer(0, out);
- return writer.Assemble(sparse_tensor);
-}
-
-Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& sparse_tensor,
- MemoryPool* pool) {
- IpcPayload payload;
- RETURN_NOT_OK(GetSparseTensorPayload(sparse_tensor, pool, &payload));
- return std::unique_ptr<Message>(
- new Message(std::move(payload.metadata), std::move(payload.body_buffers[0])));
-}
-
-int64_t GetPayloadSize(const IpcPayload& payload, const IpcWriteOptions& options) {
- const int32_t prefix_size = options.write_legacy_ipc_format ? 4 : 8;
- const int32_t flatbuffer_size = static_cast<int32_t>(payload.metadata->size());
- const int32_t padded_message_length = static_cast<int32_t>(
- PaddedLength(flatbuffer_size + prefix_size, options.alignment));
- // body_length already accounts for padding
- return payload.body_length + padded_message_length;
-}
-
-Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size) {
- return GetRecordBatchSize(batch, IpcWriteOptions::Defaults(), size);
-}
-
-Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
- int64_t* size) {
- // emulates the behavior of Write without actually writing
- int32_t metadata_length = 0;
- int64_t body_length = 0;
- io::MockOutputStream dst;
- RETURN_NOT_OK(
- WriteRecordBatch(batch, 0, &dst, &metadata_length, &body_length, options));
- *size = dst.GetExtentBytesWritten();
- return Status::OK();
-}
-
-Status GetTensorSize(const Tensor& tensor, int64_t* size) {
- // emulates the behavior of Write without actually writing
- int32_t metadata_length = 0;
- int64_t body_length = 0;
- io::MockOutputStream dst;
- RETURN_NOT_OK(WriteTensor(tensor, &dst, &metadata_length, &body_length));
- *size = dst.GetExtentBytesWritten();
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-
-RecordBatchWriter::~RecordBatchWriter() {}
-
-Status RecordBatchWriter::WriteTable(const Table& table, int64_t max_chunksize) {
- TableBatchReader reader(table);
-
- if (max_chunksize > 0) {
- reader.set_chunksize(max_chunksize);
- }
-
- std::shared_ptr<RecordBatch> batch;
- while (true) {
- RETURN_NOT_OK(reader.ReadNext(&batch));
- if (batch == nullptr) {
- break;
- }
- RETURN_NOT_OK(WriteRecordBatch(*batch));
- }
-
- return Status::OK();
-}
-
-Status RecordBatchWriter::WriteTable(const Table& table) { return WriteTable(table, -1); }
-
-// ----------------------------------------------------------------------
-// Payload writer implementation
-
-namespace internal {
-
-IpcPayloadWriter::~IpcPayloadWriter() {}
-
-Status IpcPayloadWriter::Start() { return Status::OK(); }
-
-class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
- public:
- // A RecordBatchWriter implementation that writes to a IpcPayloadWriter.
- IpcFormatWriter(std::unique_ptr<internal::IpcPayloadWriter> payload_writer,
- const Schema& schema, const IpcWriteOptions& options,
- bool is_file_format)
- : payload_writer_(std::move(payload_writer)),
- schema_(schema),
- mapper_(schema),
- is_file_format_(is_file_format),
- options_(options) {}
-
- // A Schema-owning constructor variant
- IpcFormatWriter(std::unique_ptr<internal::IpcPayloadWriter> payload_writer,
- const std::shared_ptr<Schema>& schema, const IpcWriteOptions& options,
- bool is_file_format)
- : IpcFormatWriter(std::move(payload_writer), *schema, options, is_file_format) {
- shared_schema_ = schema;
- }
-
- Status WriteRecordBatch(const RecordBatch& batch) override {
- if (!batch.schema()->Equals(schema_, false /* check_metadata */)) {
- return Status::Invalid("Tried to write record batch with different schema");
- }
-
- RETURN_NOT_OK(CheckStarted());
-
- RETURN_NOT_OK(WriteDictionaries(batch));
-
- IpcPayload payload;
- RETURN_NOT_OK(GetRecordBatchPayload(batch, options_, &payload));
+ return RecordBatchSerializer::Assemble(*batch);
+ }
+
+ private:
+ int64_t dictionary_id_;
+ bool is_delta_;
+};
+
+} // namespace
+
+Status WriteIpcPayload(const IpcPayload& payload, const IpcWriteOptions& options,
+ io::OutputStream* dst, int32_t* metadata_length) {
+ RETURN_NOT_OK(WriteMessage(*payload.metadata, options, dst, metadata_length));
+
+#ifndef NDEBUG
+ RETURN_NOT_OK(CheckAligned(dst));
+#endif
+
+ // Now write the buffers
+ for (size_t i = 0; i < payload.body_buffers.size(); ++i) {
+ const std::shared_ptr<Buffer>& buffer = payload.body_buffers[i];
+ int64_t size = 0;
+ int64_t padding = 0;
+
+ // The buffer might be null if we are handling zero row lengths.
+ if (buffer) {
+ size = buffer->size();
+ padding = BitUtil::RoundUpToMultipleOf8(size) - size;
+ }
+
+ if (size > 0) {
+ RETURN_NOT_OK(dst->Write(buffer));
+ }
+
+ if (padding > 0) {
+ RETURN_NOT_OK(dst->Write(kPaddingBytes, padding));
+ }
+ }
+
+#ifndef NDEBUG
+ RETURN_NOT_OK(CheckAligned(dst));
+#endif
+
+ return Status::OK();
+}
+
+Status GetSchemaPayload(const Schema& schema, const IpcWriteOptions& options,
+ const DictionaryFieldMapper& mapper, IpcPayload* out) {
+ out->type = MessageType::SCHEMA;
+ return internal::WriteSchemaMessage(schema, mapper, options, &out->metadata);
+}
+
+Status GetDictionaryPayload(int64_t id, const std::shared_ptr<Array>& dictionary,
+ const IpcWriteOptions& options, IpcPayload* out) {
+ return GetDictionaryPayload(id, false, dictionary, options, out);
+}
+
+Status GetDictionaryPayload(int64_t id, bool is_delta,
+ const std::shared_ptr<Array>& dictionary,
+ const IpcWriteOptions& options, IpcPayload* out) {
+ out->type = MessageType::DICTIONARY_BATCH;
+ // Frame of reference is 0, see ARROW-384
+ DictionarySerializer assembler(id, is_delta, /*buffer_start_offset=*/0, options, out);
+ return assembler.Assemble(dictionary);
+}
+
+Status GetRecordBatchPayload(const RecordBatch& batch, const IpcWriteOptions& options,
+ IpcPayload* out) {
+ out->type = MessageType::RECORD_BATCH;
+ RecordBatchSerializer assembler(/*buffer_start_offset=*/0, options, out);
+ return assembler.Assemble(batch);
+}
+
+Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
+ io::OutputStream* dst, int32_t* metadata_length,
+ int64_t* body_length, const IpcWriteOptions& options) {
+ IpcPayload payload;
+ RecordBatchSerializer assembler(buffer_start_offset, options, &payload);
+ RETURN_NOT_OK(assembler.Assemble(batch));
+
+ // TODO: it's a rough edge that the metadata and body length here are
+ // computed separately
+
+ // The body size is computed in the payload
+ *body_length = payload.body_length;
+
+ return WriteIpcPayload(payload, options, dst, metadata_length);
+}
+
+Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& batches,
+ const IpcWriteOptions& options, io::OutputStream* dst) {
+ ASSIGN_OR_RAISE(std::shared_ptr<RecordBatchWriter> writer,
+ MakeStreamWriter(dst, batches[0]->schema(), options));
+ for (const auto& batch : batches) {
+ DCHECK(batch->schema()->Equals(*batches[0]->schema())) << "Schemas unequal";
+ RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+ }
+ RETURN_NOT_OK(writer->Close());
+ return Status::OK();
+}
+
+namespace {
+
+Status WriteTensorHeader(const Tensor& tensor, io::OutputStream* dst,
+ int32_t* metadata_length) {
+ IpcWriteOptions options;
+ options.alignment = kTensorAlignment;
+ std::shared_ptr<Buffer> metadata;
+ ARROW_ASSIGN_OR_RAISE(metadata, internal::WriteTensorMessage(tensor, 0, options));
+ return WriteMessage(*metadata, options, dst, metadata_length);
+}
+
+Status WriteStridedTensorData(int dim_index, int64_t offset, int elem_size,
+ const Tensor& tensor, uint8_t* scratch_space,
+ io::OutputStream* dst) {
+ if (dim_index == tensor.ndim() - 1) {
+ const uint8_t* data_ptr = tensor.raw_data() + offset;
+ const int64_t stride = tensor.strides()[dim_index];
+ for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
+ memcpy(scratch_space + i * elem_size, data_ptr, elem_size);
+ data_ptr += stride;
+ }
+ return dst->Write(scratch_space, elem_size * tensor.shape()[dim_index]);
+ }
+ for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
+ RETURN_NOT_OK(WriteStridedTensorData(dim_index + 1, offset, elem_size, tensor,
+ scratch_space, dst));
+ offset += tensor.strides()[dim_index];
+ }
+ return Status::OK();
+}
+
+Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,
+ std::unique_ptr<Tensor>* out) {
+ const int elem_size = GetByteWidth(*tensor.type());
+
+ ARROW_ASSIGN_OR_RAISE(
+ auto scratch_space,
+ AllocateBuffer(tensor.shape()[tensor.ndim() - 1] * elem_size, pool));
+
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> contiguous_data,
+ AllocateResizableBuffer(tensor.size() * elem_size, pool));
+
+ io::BufferOutputStream stream(contiguous_data);
+ RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
+ scratch_space->mutable_data(), &stream));
+
+ out->reset(new Tensor(tensor.type(), contiguous_data, tensor.shape()));
+
+ return Status::OK();
+}
+
+} // namespace
+
+Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
+ int64_t* body_length) {
+ const int elem_size = GetByteWidth(*tensor.type());
+
+ *body_length = tensor.size() * elem_size;
+
+ // Tensor metadata accounts for padding
+ if (tensor.is_contiguous()) {
+ RETURN_NOT_OK(WriteTensorHeader(tensor, dst, metadata_length));
+ auto data = tensor.data();
+ if (data && data->data()) {
+ RETURN_NOT_OK(dst->Write(data->data(), *body_length));
+ } else {
+ *body_length = 0;
+ }
+ } else {
+ // The tensor written is made contiguous
+ Tensor dummy(tensor.type(), nullptr, tensor.shape());
+ RETURN_NOT_OK(WriteTensorHeader(dummy, dst, metadata_length));
+
+ // TODO: Do we care enough about this temporary allocation to pass in a
+ // MemoryPool to this function?
+ ARROW_ASSIGN_OR_RAISE(auto scratch_space,
+ AllocateBuffer(tensor.shape()[tensor.ndim() - 1] * elem_size));
+
+ RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
+ scratch_space->mutable_data(), dst));
+ }
+
+ return Status::OK();
+}
+
+Result<std::unique_ptr<Message>> GetTensorMessage(const Tensor& tensor,
+ MemoryPool* pool) {
+ const Tensor* tensor_to_write = &tensor;
+ std::unique_ptr<Tensor> temp_tensor;
+
+ if (!tensor.is_contiguous()) {
+ RETURN_NOT_OK(GetContiguousTensor(tensor, pool, &temp_tensor));
+ tensor_to_write = temp_tensor.get();
+ }
+
+ IpcWriteOptions options;
+ options.alignment = kTensorAlignment;
+ std::shared_ptr<Buffer> metadata;
+ ARROW_ASSIGN_OR_RAISE(metadata,
+ internal::WriteTensorMessage(*tensor_to_write, 0, options));
+ return std::unique_ptr<Message>(new Message(metadata, tensor_to_write->data()));
+}
+
+namespace internal {
+
+class SparseTensorSerializer {
+ public:
+ SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out)
+ : out_(out),
+ buffer_start_offset_(buffer_start_offset),
+ options_(IpcWriteOptions::Defaults()) {}
+
+ ~SparseTensorSerializer() = default;
+
+ Status VisitSparseIndex(const SparseIndex& sparse_index) {
+ switch (sparse_index.format_id()) {
+ case SparseTensorFormat::COO:
+ RETURN_NOT_OK(
+ VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
+ break;
+
+ case SparseTensorFormat::CSR:
+ RETURN_NOT_OK(
+ VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
+ break;
+
+ case SparseTensorFormat::CSC:
+ RETURN_NOT_OK(
+ VisitSparseCSCIndex(checked_cast<const SparseCSCIndex&>(sparse_index)));
+ break;
+
+ case SparseTensorFormat::CSF:
+ RETURN_NOT_OK(
+ VisitSparseCSFIndex(checked_cast<const SparseCSFIndex&>(sparse_index)));
+ break;
+
+ default:
+ std::stringstream ss;
+ ss << "Unable to convert type: " << sparse_index.ToString() << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+
+ return Status::OK();
+ }
+
+ Status SerializeMetadata(const SparseTensor& sparse_tensor) {
+ return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_,
+ options_)
+ .Value(&out_->metadata);
+ }
+
+ Status Assemble(const SparseTensor& sparse_tensor) {
+ if (buffer_meta_.size() > 0) {
+ buffer_meta_.clear();
+ out_->body_buffers.clear();
+ }
+
+ RETURN_NOT_OK(VisitSparseIndex(*sparse_tensor.sparse_index()));
+ out_->body_buffers.emplace_back(sparse_tensor.data());
+
+ int64_t offset = buffer_start_offset_;
+ buffer_meta_.reserve(out_->body_buffers.size());
+
+ for (size_t i = 0; i < out_->body_buffers.size(); ++i) {
+ const Buffer* buffer = out_->body_buffers[i].get();
+ int64_t size = buffer->size();
+ int64_t padding = BitUtil::RoundUpToMultipleOf8(size) - size;
+ buffer_meta_.push_back({offset, size + padding});
+ offset += size + padding;
+ }
+
+ out_->body_length = offset - buffer_start_offset_;
+ DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
+
+ return SerializeMetadata(sparse_tensor);
+ }
+
+ private:
+ Status VisitSparseCOOIndex(const SparseCOOIndex& sparse_index) {
+ out_->body_buffers.emplace_back(sparse_index.indices()->data());
+ return Status::OK();
+ }
+
+ Status VisitSparseCSRIndex(const SparseCSRIndex& sparse_index) {
+ out_->body_buffers.emplace_back(sparse_index.indptr()->data());
+ out_->body_buffers.emplace_back(sparse_index.indices()->data());
+ return Status::OK();
+ }
+
+ Status VisitSparseCSCIndex(const SparseCSCIndex& sparse_index) {
+ out_->body_buffers.emplace_back(sparse_index.indptr()->data());
+ out_->body_buffers.emplace_back(sparse_index.indices()->data());
+ return Status::OK();
+ }
+
+ Status VisitSparseCSFIndex(const SparseCSFIndex& sparse_index) {
+ for (const std::shared_ptr<arrow::Tensor>& indptr : sparse_index.indptr()) {
+ out_->body_buffers.emplace_back(indptr->data());
+ }
+ for (const std::shared_ptr<arrow::Tensor>& indices : sparse_index.indices()) {
+ out_->body_buffers.emplace_back(indices->data());
+ }
+ return Status::OK();
+ }
+
+ IpcPayload* out_;
+
+ std::vector<internal::BufferMetadata> buffer_meta_;
+ int64_t buffer_start_offset_;
+ IpcWriteOptions options_;
+};
+
+} // namespace internal
+
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
+ int32_t* metadata_length, int64_t* body_length) {
+ IpcPayload payload;
+ internal::SparseTensorSerializer writer(0, &payload);
+ RETURN_NOT_OK(writer.Assemble(sparse_tensor));
+
+ *body_length = payload.body_length;
+ return WriteIpcPayload(payload, IpcWriteOptions::Defaults(), dst, metadata_length);
+}
+
+Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
+ IpcPayload* out) {
+ internal::SparseTensorSerializer writer(0, out);
+ return writer.Assemble(sparse_tensor);
+}
+
+Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& sparse_tensor,
+ MemoryPool* pool) {
+ IpcPayload payload;
+ RETURN_NOT_OK(GetSparseTensorPayload(sparse_tensor, pool, &payload));
+ return std::unique_ptr<Message>(
+ new Message(std::move(payload.metadata), std::move(payload.body_buffers[0])));
+}
+
+int64_t GetPayloadSize(const IpcPayload& payload, const IpcWriteOptions& options) {
+ const int32_t prefix_size = options.write_legacy_ipc_format ? 4 : 8;
+ const int32_t flatbuffer_size = static_cast<int32_t>(payload.metadata->size());
+ const int32_t padded_message_length = static_cast<int32_t>(
+ PaddedLength(flatbuffer_size + prefix_size, options.alignment));
+ // body_length already accounts for padding
+ return payload.body_length + padded_message_length;
+}
+
+Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size) {
+ return GetRecordBatchSize(batch, IpcWriteOptions::Defaults(), size);
+}
+
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+ int64_t* size) {
+ // emulates the behavior of Write without actually writing
+ int32_t metadata_length = 0;
+ int64_t body_length = 0;
+ io::MockOutputStream dst;
+ RETURN_NOT_OK(
+ WriteRecordBatch(batch, 0, &dst, &metadata_length, &body_length, options));
+ *size = dst.GetExtentBytesWritten();
+ return Status::OK();
+}
+
+Status GetTensorSize(const Tensor& tensor, int64_t* size) {
+ // emulates the behavior of Write without actually writing
+ int32_t metadata_length = 0;
+ int64_t body_length = 0;
+ io::MockOutputStream dst;
+ RETURN_NOT_OK(WriteTensor(tensor, &dst, &metadata_length, &body_length));
+ *size = dst.GetExtentBytesWritten();
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+
+RecordBatchWriter::~RecordBatchWriter() {}
+
+Status RecordBatchWriter::WriteTable(const Table& table, int64_t max_chunksize) {
+ TableBatchReader reader(table);
+
+ if (max_chunksize > 0) {
+ reader.set_chunksize(max_chunksize);
+ }
+
+ std::shared_ptr<RecordBatch> batch;
+ while (true) {
+ RETURN_NOT_OK(reader.ReadNext(&batch));
+ if (batch == nullptr) {
+ break;
+ }
+ RETURN_NOT_OK(WriteRecordBatch(*batch));
+ }
+
+ return Status::OK();
+}
+
+Status RecordBatchWriter::WriteTable(const Table& table) { return WriteTable(table, -1); }
+
+// ----------------------------------------------------------------------
+// Payload writer implementation
+
+namespace internal {
+
+IpcPayloadWriter::~IpcPayloadWriter() {}
+
+Status IpcPayloadWriter::Start() { return Status::OK(); }
+
+class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
+ public:
+ // A RecordBatchWriter implementation that writes to a IpcPayloadWriter.
+ IpcFormatWriter(std::unique_ptr<internal::IpcPayloadWriter> payload_writer,
+ const Schema& schema, const IpcWriteOptions& options,
+ bool is_file_format)
+ : payload_writer_(std::move(payload_writer)),
+ schema_(schema),
+ mapper_(schema),
+ is_file_format_(is_file_format),
+ options_(options) {}
+
+ // A Schema-owning constructor variant
+ IpcFormatWriter(std::unique_ptr<internal::IpcPayloadWriter> payload_writer,
+ const std::shared_ptr<Schema>& schema, const IpcWriteOptions& options,
+ bool is_file_format)
+ : IpcFormatWriter(std::move(payload_writer), *schema, options, is_file_format) {
+ shared_schema_ = schema;
+ }
+
+ Status WriteRecordBatch(const RecordBatch& batch) override {
+ if (!batch.schema()->Equals(schema_, false /* check_metadata */)) {
+ return Status::Invalid("Tried to write record batch with different schema");
+ }
+
+ RETURN_NOT_OK(CheckStarted());
+
+ RETURN_NOT_OK(WriteDictionaries(batch));
+
+ IpcPayload payload;
+ RETURN_NOT_OK(GetRecordBatchPayload(batch, options_, &payload));
RETURN_NOT_OK(WritePayload(payload));
++stats_.num_record_batches;
return Status::OK();
- }
-
+ }
+
Status WriteTable(const Table& table, int64_t max_chunksize) override {
if (is_file_format_ && options_.unify_dictionaries) {
ARROW_ASSIGN_OR_RAISE(auto unified_table,
@@ -1012,57 +1012,57 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
}
}
- Status Close() override {
- RETURN_NOT_OK(CheckStarted());
- return payload_writer_->Close();
- }
-
- Status Start() {
- started_ = true;
- RETURN_NOT_OK(payload_writer_->Start());
-
- IpcPayload payload;
- RETURN_NOT_OK(GetSchemaPayload(schema_, options_, mapper_, &payload));
+ Status Close() override {
+ RETURN_NOT_OK(CheckStarted());
+ return payload_writer_->Close();
+ }
+
+ Status Start() {
+ started_ = true;
+ RETURN_NOT_OK(payload_writer_->Start());
+
+ IpcPayload payload;
+ RETURN_NOT_OK(GetSchemaPayload(schema_, options_, mapper_, &payload));
return WritePayload(payload);
- }
-
+ }
+
WriteStats stats() const override { return stats_; }
- protected:
- Status CheckStarted() {
- if (!started_) {
- return Start();
- }
- return Status::OK();
- }
-
- Status WriteDictionaries(const RecordBatch& batch) {
- ARROW_ASSIGN_OR_RAISE(const auto dictionaries, CollectDictionaries(batch, mapper_));
+ protected:
+ Status CheckStarted() {
+ if (!started_) {
+ return Start();
+ }
+ return Status::OK();
+ }
+
+ Status WriteDictionaries(const RecordBatch& batch) {
+ ARROW_ASSIGN_OR_RAISE(const auto dictionaries, CollectDictionaries(batch, mapper_));
const auto equal_options = EqualOptions().nans_equal(true);
-
- for (const auto& pair : dictionaries) {
- int64_t dictionary_id = pair.first;
- const auto& dictionary = pair.second;
-
- // If a dictionary with this id was already emitted, check if it was the same.
- auto* last_dictionary = &last_dictionaries_[dictionary_id];
- const bool dictionary_exists = (*last_dictionary != nullptr);
+
+ for (const auto& pair : dictionaries) {
+ int64_t dictionary_id = pair.first;
+ const auto& dictionary = pair.second;
+
+ // If a dictionary with this id was already emitted, check if it was the same.
+ auto* last_dictionary = &last_dictionaries_[dictionary_id];
+ const bool dictionary_exists = (*last_dictionary != nullptr);
int64_t delta_start = 0;
- if (dictionary_exists) {
- if ((*last_dictionary)->data() == dictionary->data()) {
- // Fast shortcut for a common case.
- // Same dictionary data by pointer => no need to emit it again
- continue;
- }
+ if (dictionary_exists) {
+ if ((*last_dictionary)->data() == dictionary->data()) {
+ // Fast shortcut for a common case.
+ // Same dictionary data by pointer => no need to emit it again
+ continue;
+ }
const int64_t last_length = (*last_dictionary)->length();
const int64_t new_length = dictionary->length();
if (new_length == last_length &&
((*last_dictionary)->Equals(dictionary, equal_options))) {
- // Same dictionary by value => no need to emit it again
- // (while this can have a CPU cost, this code path is required
- // for the IPC file format)
- continue;
- }
+ // Same dictionary by value => no need to emit it again
+ // (while this can have a CPU cost, this code path is required
+ // for the IPC file format)
+ continue;
+ }
if (is_file_format_) {
return Status::Invalid(
"Dictionary replacement detected when writing IPC file format. "
@@ -1078,8 +1078,8 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
// New dictionary starts with the current dictionary
delta_start = last_length;
}
- }
-
+ }
+
IpcPayload payload;
if (delta_start) {
RETURN_NOT_OK(GetDictionaryPayload(dictionary_id, /*is_delta=*/true,
@@ -1088,7 +1088,7 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
} else {
RETURN_NOT_OK(
GetDictionaryPayload(dictionary_id, dictionary, options_, &payload));
- }
+ }
RETURN_NOT_OK(WritePayload(payload));
++stats_.num_dictionary_batches;
if (dictionary_exists) {
@@ -1098,332 +1098,332 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
++stats_.num_replaced_dictionaries;
}
}
-
- // Remember dictionary for next batches
- *last_dictionary = dictionary;
- }
- return Status::OK();
- }
-
+
+ // Remember dictionary for next batches
+ *last_dictionary = dictionary;
+ }
+ return Status::OK();
+ }
+
Status WritePayload(const IpcPayload& payload) {
RETURN_NOT_OK(payload_writer_->WritePayload(payload));
++stats_.num_messages;
return Status::OK();
}
- std::unique_ptr<IpcPayloadWriter> payload_writer_;
- std::shared_ptr<Schema> shared_schema_;
- const Schema& schema_;
- const DictionaryFieldMapper mapper_;
- const bool is_file_format_;
-
- // A map of last-written dictionaries by id.
- // This is required to avoid the same dictionary again and again,
- // and also for correctness when writing the IPC file format
- // (where replacements and deltas are unsupported).
- // The latter is also why we can't use weak_ptr.
- std::unordered_map<int64_t, std::shared_ptr<Array>> last_dictionaries_;
-
- bool started_ = false;
- IpcWriteOptions options_;
+ std::unique_ptr<IpcPayloadWriter> payload_writer_;
+ std::shared_ptr<Schema> shared_schema_;
+ const Schema& schema_;
+ const DictionaryFieldMapper mapper_;
+ const bool is_file_format_;
+
+ // A map of last-written dictionaries by id.
+ // This is required to avoid the same dictionary again and again,
+ // and also for correctness when writing the IPC file format
+ // (where replacements and deltas are unsupported).
+ // The latter is also why we can't use weak_ptr.
+ std::unordered_map<int64_t, std::shared_ptr<Array>> last_dictionaries_;
+
+ bool started_ = false;
+ IpcWriteOptions options_;
WriteStats stats_;
-};
-
-class StreamBookKeeper {
- public:
- StreamBookKeeper(const IpcWriteOptions& options, io::OutputStream* sink)
- : options_(options), sink_(sink), position_(-1) {}
- StreamBookKeeper(const IpcWriteOptions& options, std::shared_ptr<io::OutputStream> sink)
- : options_(options),
- sink_(sink.get()),
- owned_sink_(std::move(sink)),
- position_(-1) {}
-
- Status UpdatePosition() { return sink_->Tell().Value(&position_); }
-
- Status UpdatePositionCheckAligned() {
- RETURN_NOT_OK(UpdatePosition());
- DCHECK_EQ(0, position_ % 8) << "Stream is not aligned";
- return Status::OK();
- }
-
- Status Align(int32_t alignment = kArrowIpcAlignment) {
- // Adds padding bytes if necessary to ensure all memory blocks are written on
- // 8-byte (or other alignment) boundaries.
- int64_t remainder = PaddedLength(position_, alignment) - position_;
- if (remainder > 0) {
- return Write(kPaddingBytes, remainder);
- }
- return Status::OK();
- }
-
- // Write data and update position
- Status Write(const void* data, int64_t nbytes) {
- RETURN_NOT_OK(sink_->Write(data, nbytes));
- position_ += nbytes;
- return Status::OK();
- }
-
- Status WriteEOS() {
- // End of stream marker
- constexpr int32_t kZeroLength = 0;
- if (!options_.write_legacy_ipc_format) {
- RETURN_NOT_OK(Write(&kIpcContinuationToken, sizeof(int32_t)));
- }
- return Write(&kZeroLength, sizeof(int32_t));
- }
-
- protected:
- IpcWriteOptions options_;
- io::OutputStream* sink_;
- std::shared_ptr<io::OutputStream> owned_sink_;
- int64_t position_;
-};
-
-/// A IpcPayloadWriter implementation that writes to an IPC stream
-/// (with an end-of-stream marker)
-class PayloadStreamWriter : public IpcPayloadWriter, protected StreamBookKeeper {
- public:
- PayloadStreamWriter(io::OutputStream* sink,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults())
- : StreamBookKeeper(options, sink) {}
- PayloadStreamWriter(std::shared_ptr<io::OutputStream> sink,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults())
- : StreamBookKeeper(options, std::move(sink)) {}
-
- ~PayloadStreamWriter() override = default;
-
- Status WritePayload(const IpcPayload& payload) override {
-#ifndef NDEBUG
- // Catch bug fixed in ARROW-3236
- RETURN_NOT_OK(UpdatePositionCheckAligned());
-#endif
-
- int32_t metadata_length = 0; // unused
- RETURN_NOT_OK(WriteIpcPayload(payload, options_, sink_, &metadata_length));
- RETURN_NOT_OK(UpdatePositionCheckAligned());
- return Status::OK();
- }
-
- Status Close() override { return WriteEOS(); }
-};
-
-/// A IpcPayloadWriter implementation that writes to a IPC file
-/// (with a footer as defined in File.fbs)
-class PayloadFileWriter : public internal::IpcPayloadWriter, protected StreamBookKeeper {
- public:
- PayloadFileWriter(const IpcWriteOptions& options, const std::shared_ptr<Schema>& schema,
- const std::shared_ptr<const KeyValueMetadata>& metadata,
- io::OutputStream* sink)
- : StreamBookKeeper(options, sink), schema_(schema), metadata_(metadata) {}
- PayloadFileWriter(const IpcWriteOptions& options, const std::shared_ptr<Schema>& schema,
- const std::shared_ptr<const KeyValueMetadata>& metadata,
- std::shared_ptr<io::OutputStream> sink)
- : StreamBookKeeper(options, std::move(sink)),
- schema_(schema),
- metadata_(metadata) {}
-
- ~PayloadFileWriter() override = default;
-
- Status WritePayload(const IpcPayload& payload) override {
-#ifndef NDEBUG
- // Catch bug fixed in ARROW-3236
- RETURN_NOT_OK(UpdatePositionCheckAligned());
-#endif
-
- // Metadata length must include padding, it's computed by WriteIpcPayload()
- FileBlock block = {position_, 0, payload.body_length};
- RETURN_NOT_OK(WriteIpcPayload(payload, options_, sink_, &block.metadata_length));
- RETURN_NOT_OK(UpdatePositionCheckAligned());
-
- // Record position and size of some message types, to list them in the footer
- switch (payload.type) {
- case MessageType::DICTIONARY_BATCH:
- dictionaries_.push_back(block);
- break;
- case MessageType::RECORD_BATCH:
- record_batches_.push_back(block);
- break;
- default:
- break;
- }
-
- return Status::OK();
- }
-
- Status Start() override {
- // ARROW-3236: The initial position -1 needs to be updated to the stream's
- // current position otherwise an incorrect amount of padding will be
- // written to new files.
- RETURN_NOT_OK(UpdatePosition());
-
- // It is only necessary to align to 8-byte boundary at the start of the file
- RETURN_NOT_OK(Write(kArrowMagicBytes, strlen(kArrowMagicBytes)));
- RETURN_NOT_OK(Align());
-
- return Status::OK();
- }
-
- Status Close() override {
- // Write 0 EOS message for compatibility with sequential readers
- RETURN_NOT_OK(WriteEOS());
-
- // Write file footer
- RETURN_NOT_OK(UpdatePosition());
- int64_t initial_position = position_;
- RETURN_NOT_OK(
- WriteFileFooter(*schema_, dictionaries_, record_batches_, metadata_, sink_));
-
- // Write footer length
- RETURN_NOT_OK(UpdatePosition());
- int32_t footer_length = static_cast<int32_t>(position_ - initial_position);
- if (footer_length <= 0) {
- return Status::Invalid("Invalid file footer");
- }
-
- // write footer length in little endian
- footer_length = BitUtil::ToLittleEndian(footer_length);
- RETURN_NOT_OK(Write(&footer_length, sizeof(int32_t)));
-
- // Write magic bytes to end file
- return Write(kArrowMagicBytes, strlen(kArrowMagicBytes));
- }
-
- protected:
- std::shared_ptr<Schema> schema_;
- std::shared_ptr<const KeyValueMetadata> metadata_;
- std::vector<FileBlock> dictionaries_;
- std::vector<FileBlock> record_batches_;
-};
-
-} // namespace internal
-
-Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options) {
- return std::make_shared<internal::IpcFormatWriter>(
- ::arrow::internal::make_unique<internal::PayloadStreamWriter>(sink, options),
- schema, options, /*is_file_format=*/false);
-}
-
-Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
- std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options) {
- return std::make_shared<internal::IpcFormatWriter>(
- ::arrow::internal::make_unique<internal::PayloadStreamWriter>(std::move(sink),
- options),
- schema, options, /*is_file_format=*/false);
-}
-
-Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options) {
- return MakeStreamWriter(sink, schema, options);
-}
-
-Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options,
- const std::shared_ptr<const KeyValueMetadata>& metadata) {
- return std::make_shared<internal::IpcFormatWriter>(
- ::arrow::internal::make_unique<internal::PayloadFileWriter>(options, schema,
- metadata, sink),
- schema, options, /*is_file_format=*/true);
-}
-
-Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
- std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options,
- const std::shared_ptr<const KeyValueMetadata>& metadata) {
- return std::make_shared<internal::IpcFormatWriter>(
- ::arrow::internal::make_unique<internal::PayloadFileWriter>(
- options, schema, metadata, std::move(sink)),
- schema, options, /*is_file_format=*/true);
-}
-
-Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options,
- const std::shared_ptr<const KeyValueMetadata>& metadata) {
- return MakeFileWriter(sink, schema, options, metadata);
-}
-
-namespace internal {
-
-Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
- std::unique_ptr<IpcPayloadWriter> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options) {
- // XXX should we call Start()?
- return ::arrow::internal::make_unique<internal::IpcFormatWriter>(
- std::move(sink), schema, options, /*is_file_format=*/false);
-}
-
-Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadStreamWriter(
- io::OutputStream* sink, const IpcWriteOptions& options) {
- return ::arrow::internal::make_unique<internal::PayloadStreamWriter>(sink, options);
-}
-
-Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options,
- const std::shared_ptr<const KeyValueMetadata>& metadata) {
- return ::arrow::internal::make_unique<internal::PayloadFileWriter>(options, schema,
- metadata, sink);
-}
-
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// Serialization public APIs
-
-Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
- std::shared_ptr<MemoryManager> mm) {
- auto options = IpcWriteOptions::Defaults();
- int64_t size = 0;
- RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
- ARROW_ASSIGN_OR_RAISE(auto buffer, mm->AllocateBuffer(size));
- ARROW_ASSIGN_OR_RAISE(auto writer, Buffer::GetWriter(buffer));
-
- // XXX Should we have a helper function for getting a MemoryPool
- // for any MemoryManager (not only CPU)?
- if (mm->is_cpu()) {
- options.memory_pool = checked_pointer_cast<CPUMemoryManager>(mm)->pool();
- }
- RETURN_NOT_OK(SerializeRecordBatch(batch, options, writer.get()));
- RETURN_NOT_OK(writer->Close());
- return buffer;
-}
-
-Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
- const IpcWriteOptions& options) {
- int64_t size = 0;
- RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
- ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer,
- AllocateBuffer(size, options.memory_pool));
-
- io::FixedSizeBufferWriter stream(buffer);
- RETURN_NOT_OK(SerializeRecordBatch(batch, options, &stream));
- return buffer;
-}
-
-Status SerializeRecordBatch(const RecordBatch& batch, const IpcWriteOptions& options,
- io::OutputStream* out) {
- int32_t metadata_length = 0;
- int64_t body_length = 0;
- return WriteRecordBatch(batch, 0, out, &metadata_length, &body_length, options);
-}
-
-Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema, MemoryPool* pool) {
- ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create(1024, pool));
-
- auto options = IpcWriteOptions::Defaults();
- const bool is_file_format = false; // indifferent as we don't write dictionaries
- internal::IpcFormatWriter writer(
- ::arrow::internal::make_unique<internal::PayloadStreamWriter>(stream.get()), schema,
- options, is_file_format);
- RETURN_NOT_OK(writer.Start());
- return stream->Finish();
-}
-
-} // namespace ipc
-} // namespace arrow
+};
+
+class StreamBookKeeper {
+ public:
+ StreamBookKeeper(const IpcWriteOptions& options, io::OutputStream* sink)
+ : options_(options), sink_(sink), position_(-1) {}
+ StreamBookKeeper(const IpcWriteOptions& options, std::shared_ptr<io::OutputStream> sink)
+ : options_(options),
+ sink_(sink.get()),
+ owned_sink_(std::move(sink)),
+ position_(-1) {}
+
+ Status UpdatePosition() { return sink_->Tell().Value(&position_); }
+
+ Status UpdatePositionCheckAligned() {
+ RETURN_NOT_OK(UpdatePosition());
+ DCHECK_EQ(0, position_ % 8) << "Stream is not aligned";
+ return Status::OK();
+ }
+
+ Status Align(int32_t alignment = kArrowIpcAlignment) {
+ // Adds padding bytes if necessary to ensure all memory blocks are written on
+ // 8-byte (or other alignment) boundaries.
+ int64_t remainder = PaddedLength(position_, alignment) - position_;
+ if (remainder > 0) {
+ return Write(kPaddingBytes, remainder);
+ }
+ return Status::OK();
+ }
+
+ // Write data and update position
+ Status Write(const void* data, int64_t nbytes) {
+ RETURN_NOT_OK(sink_->Write(data, nbytes));
+ position_ += nbytes;
+ return Status::OK();
+ }
+
+ Status WriteEOS() {
+ // End of stream marker
+ constexpr int32_t kZeroLength = 0;
+ if (!options_.write_legacy_ipc_format) {
+ RETURN_NOT_OK(Write(&kIpcContinuationToken, sizeof(int32_t)));
+ }
+ return Write(&kZeroLength, sizeof(int32_t));
+ }
+
+ protected:
+ IpcWriteOptions options_;
+ io::OutputStream* sink_;
+ std::shared_ptr<io::OutputStream> owned_sink_;
+ int64_t position_;
+};
+
+/// A IpcPayloadWriter implementation that writes to an IPC stream
+/// (with an end-of-stream marker)
+class PayloadStreamWriter : public IpcPayloadWriter, protected StreamBookKeeper {
+ public:
+ PayloadStreamWriter(io::OutputStream* sink,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults())
+ : StreamBookKeeper(options, sink) {}
+ PayloadStreamWriter(std::shared_ptr<io::OutputStream> sink,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults())
+ : StreamBookKeeper(options, std::move(sink)) {}
+
+ ~PayloadStreamWriter() override = default;
+
+ Status WritePayload(const IpcPayload& payload) override {
+#ifndef NDEBUG
+ // Catch bug fixed in ARROW-3236
+ RETURN_NOT_OK(UpdatePositionCheckAligned());
+#endif
+
+ int32_t metadata_length = 0; // unused
+ RETURN_NOT_OK(WriteIpcPayload(payload, options_, sink_, &metadata_length));
+ RETURN_NOT_OK(UpdatePositionCheckAligned());
+ return Status::OK();
+ }
+
+ Status Close() override { return WriteEOS(); }
+};
+
+/// A IpcPayloadWriter implementation that writes to a IPC file
+/// (with a footer as defined in File.fbs)
+class PayloadFileWriter : public internal::IpcPayloadWriter, protected StreamBookKeeper {
+ public:
+ PayloadFileWriter(const IpcWriteOptions& options, const std::shared_ptr<Schema>& schema,
+ const std::shared_ptr<const KeyValueMetadata>& metadata,
+ io::OutputStream* sink)
+ : StreamBookKeeper(options, sink), schema_(schema), metadata_(metadata) {}
+ PayloadFileWriter(const IpcWriteOptions& options, const std::shared_ptr<Schema>& schema,
+ const std::shared_ptr<const KeyValueMetadata>& metadata,
+ std::shared_ptr<io::OutputStream> sink)
+ : StreamBookKeeper(options, std::move(sink)),
+ schema_(schema),
+ metadata_(metadata) {}
+
+ ~PayloadFileWriter() override = default;
+
+ Status WritePayload(const IpcPayload& payload) override {
+#ifndef NDEBUG
+ // Catch bug fixed in ARROW-3236
+ RETURN_NOT_OK(UpdatePositionCheckAligned());
+#endif
+
+ // Metadata length must include padding, it's computed by WriteIpcPayload()
+ FileBlock block = {position_, 0, payload.body_length};
+ RETURN_NOT_OK(WriteIpcPayload(payload, options_, sink_, &block.metadata_length));
+ RETURN_NOT_OK(UpdatePositionCheckAligned());
+
+ // Record position and size of some message types, to list them in the footer
+ switch (payload.type) {
+ case MessageType::DICTIONARY_BATCH:
+ dictionaries_.push_back(block);
+ break;
+ case MessageType::RECORD_BATCH:
+ record_batches_.push_back(block);
+ break;
+ default:
+ break;
+ }
+
+ return Status::OK();
+ }
+
+ Status Start() override {
+ // ARROW-3236: The initial position -1 needs to be updated to the stream's
+ // current position otherwise an incorrect amount of padding will be
+ // written to new files.
+ RETURN_NOT_OK(UpdatePosition());
+
+ // It is only necessary to align to 8-byte boundary at the start of the file
+ RETURN_NOT_OK(Write(kArrowMagicBytes, strlen(kArrowMagicBytes)));
+ RETURN_NOT_OK(Align());
+
+ return Status::OK();
+ }
+
+ Status Close() override {
+ // Write 0 EOS message for compatibility with sequential readers
+ RETURN_NOT_OK(WriteEOS());
+
+ // Write file footer
+ RETURN_NOT_OK(UpdatePosition());
+ int64_t initial_position = position_;
+ RETURN_NOT_OK(
+ WriteFileFooter(*schema_, dictionaries_, record_batches_, metadata_, sink_));
+
+ // Write footer length
+ RETURN_NOT_OK(UpdatePosition());
+ int32_t footer_length = static_cast<int32_t>(position_ - initial_position);
+ if (footer_length <= 0) {
+ return Status::Invalid("Invalid file footer");
+ }
+
+ // write footer length in little endian
+ footer_length = BitUtil::ToLittleEndian(footer_length);
+ RETURN_NOT_OK(Write(&footer_length, sizeof(int32_t)));
+
+ // Write magic bytes to end file
+ return Write(kArrowMagicBytes, strlen(kArrowMagicBytes));
+ }
+
+ protected:
+ std::shared_ptr<Schema> schema_;
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+ std::vector<FileBlock> dictionaries_;
+ std::vector<FileBlock> record_batches_;
+};
+
+} // namespace internal
+
+Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options) {
+ return std::make_shared<internal::IpcFormatWriter>(
+ ::arrow::internal::make_unique<internal::PayloadStreamWriter>(sink, options),
+ schema, options, /*is_file_format=*/false);
+}
+
+Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
+ std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options) {
+ return std::make_shared<internal::IpcFormatWriter>(
+ ::arrow::internal::make_unique<internal::PayloadStreamWriter>(std::move(sink),
+ options),
+ schema, options, /*is_file_format=*/false);
+}
+
+Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options) {
+ return MakeStreamWriter(sink, schema, options);
+}
+
+Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ return std::make_shared<internal::IpcFormatWriter>(
+ ::arrow::internal::make_unique<internal::PayloadFileWriter>(options, schema,
+ metadata, sink),
+ schema, options, /*is_file_format=*/true);
+}
+
+Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
+ std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ return std::make_shared<internal::IpcFormatWriter>(
+ ::arrow::internal::make_unique<internal::PayloadFileWriter>(
+ options, schema, metadata, std::move(sink)),
+ schema, options, /*is_file_format=*/true);
+}
+
+Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ return MakeFileWriter(sink, schema, options, metadata);
+}
+
+namespace internal {
+
+Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
+ std::unique_ptr<IpcPayloadWriter> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options) {
+ // XXX should we call Start()?
+ return ::arrow::internal::make_unique<internal::IpcFormatWriter>(
+ std::move(sink), schema, options, /*is_file_format=*/false);
+}
+
+Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadStreamWriter(
+ io::OutputStream* sink, const IpcWriteOptions& options) {
+ return ::arrow::internal::make_unique<internal::PayloadStreamWriter>(sink, options);
+}
+
+Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ return ::arrow::internal::make_unique<internal::PayloadFileWriter>(options, schema,
+ metadata, sink);
+}
+
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// Serialization public APIs
+
+Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
+ std::shared_ptr<MemoryManager> mm) {
+ auto options = IpcWriteOptions::Defaults();
+ int64_t size = 0;
+ RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
+ ARROW_ASSIGN_OR_RAISE(auto buffer, mm->AllocateBuffer(size));
+ ARROW_ASSIGN_OR_RAISE(auto writer, Buffer::GetWriter(buffer));
+
+ // XXX Should we have a helper function for getting a MemoryPool
+ // for any MemoryManager (not only CPU)?
+ if (mm->is_cpu()) {
+ options.memory_pool = checked_pointer_cast<CPUMemoryManager>(mm)->pool();
+ }
+ RETURN_NOT_OK(SerializeRecordBatch(batch, options, writer.get()));
+ RETURN_NOT_OK(writer->Close());
+ return buffer;
+}
+
+Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
+ const IpcWriteOptions& options) {
+ int64_t size = 0;
+ RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer,
+ AllocateBuffer(size, options.memory_pool));
+
+ io::FixedSizeBufferWriter stream(buffer);
+ RETURN_NOT_OK(SerializeRecordBatch(batch, options, &stream));
+ return buffer;
+}
+
+Status SerializeRecordBatch(const RecordBatch& batch, const IpcWriteOptions& options,
+ io::OutputStream* out) {
+ int32_t metadata_length = 0;
+ int64_t body_length = 0;
+ return WriteRecordBatch(batch, 0, out, &metadata_length, &body_length, options);
+}
+
+Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema, MemoryPool* pool) {
+ ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create(1024, pool));
+
+ auto options = IpcWriteOptions::Defaults();
+ const bool is_file_format = false; // indifferent as we don't write dictionaries
+ internal::IpcFormatWriter writer(
+ ::arrow::internal::make_unique<internal::PayloadStreamWriter>(stream.get()), schema,
+ options, is_file_format);
+ RETURN_NOT_OK(writer.Start());
+ return stream->Finish();
+}
+
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
index 0ea83d7630a..a2fd2fa72d5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
@@ -1,65 +1,65 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Implement Arrow streaming binary format
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/ipc/dictionary.h" // IWYU pragma: export
-#include "arrow/ipc/message.h"
-#include "arrow/ipc/options.h"
-#include "arrow/result.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class Buffer;
-class MemoryManager;
-class MemoryPool;
-class RecordBatch;
-class Schema;
-class Status;
-class Table;
-class Tensor;
-class SparseTensor;
-
-namespace io {
-
-class OutputStream;
-
-} // namespace io
-
-namespace ipc {
-
-/// \brief Intermediate data structure with metadata header, and zero
-/// or more buffers for the message body.
-struct IpcPayload {
- MessageType type = MessageType::NONE;
- std::shared_ptr<Buffer> metadata;
- std::vector<std::shared_ptr<Buffer>> body_buffers;
- int64_t body_length = 0;
-};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implement Arrow streaming binary format
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/ipc/dictionary.h" // IWYU pragma: export
+#include "arrow/ipc/message.h"
+#include "arrow/ipc/options.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class Buffer;
+class MemoryManager;
+class MemoryPool;
+class RecordBatch;
+class Schema;
+class Status;
+class Table;
+class Tensor;
+class SparseTensor;
+
+namespace io {
+
+class OutputStream;
+
+} // namespace io
+
+namespace ipc {
+
+/// \brief Intermediate data structure with metadata header, and zero
+/// or more buffers for the message body.
+struct IpcPayload {
+ MessageType type = MessageType::NONE;
+ std::shared_ptr<Buffer> metadata;
+ std::vector<std::shared_ptr<Buffer>> body_buffers;
+ int64_t body_length = 0;
+};
+
struct WriteStats {
/// Number of IPC messages written.
int64_t num_messages = 0;
@@ -77,94 +77,94 @@ struct WriteStats {
int64_t num_replaced_dictionaries = 0;
};
-/// \class RecordBatchWriter
-/// \brief Abstract interface for writing a stream of record batches
-class ARROW_EXPORT RecordBatchWriter {
- public:
- virtual ~RecordBatchWriter();
-
- /// \brief Write a record batch to the stream
- ///
- /// \param[in] batch the record batch to write to the stream
- /// \return Status
- virtual Status WriteRecordBatch(const RecordBatch& batch) = 0;
-
- /// \brief Write possibly-chunked table by creating sequence of record batches
- /// \param[in] table table to write
- /// \return Status
- Status WriteTable(const Table& table);
-
- /// \brief Write Table with a particular chunksize
- /// \param[in] table table to write
+/// \class RecordBatchWriter
+/// \brief Abstract interface for writing a stream of record batches
+class ARROW_EXPORT RecordBatchWriter {
+ public:
+ virtual ~RecordBatchWriter();
+
+ /// \brief Write a record batch to the stream
+ ///
+ /// \param[in] batch the record batch to write to the stream
+ /// \return Status
+ virtual Status WriteRecordBatch(const RecordBatch& batch) = 0;
+
+ /// \brief Write possibly-chunked table by creating sequence of record batches
+ /// \param[in] table table to write
+ /// \return Status
+ Status WriteTable(const Table& table);
+
+ /// \brief Write Table with a particular chunksize
+ /// \param[in] table table to write
/// \param[in] max_chunksize maximum length of table chunks. To indicate
/// that no maximum should be enforced, pass -1.
- /// \return Status
+ /// \return Status
virtual Status WriteTable(const Table& table, int64_t max_chunksize);
-
- /// \brief Perform any logic necessary to finish the stream
- ///
- /// \return Status
- virtual Status Close() = 0;
+
+ /// \brief Perform any logic necessary to finish the stream
+ ///
+ /// \return Status
+ virtual Status Close() = 0;
/// \brief Return current write statistics
virtual WriteStats stats() const = 0;
-};
-
+};
+
/// \defgroup record-batch-writer-factories Functions for creating RecordBatchWriter
/// instances
///
/// @{
-/// Create a new IPC stream writer from stream sink and schema. User is
-/// responsible for closing the actual OutputStream.
-///
-/// \param[in] sink output stream to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization
-/// \return Result<std::shared_ptr<RecordBatchWriter>>
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-
-/// Create a new IPC stream writer from stream sink and schema. User is
-/// responsible for closing the actual OutputStream.
-///
-/// \param[in] sink output stream to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization
-/// \return Result<std::shared_ptr<RecordBatchWriter>>
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
- std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-
-/// Create a new IPC file writer from stream sink and schema
-///
-/// \param[in] sink output stream to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization, optional
-/// \param[in] metadata custom metadata for File Footer, optional
-/// \return Result<std::shared_ptr<RecordBatchWriter>>
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
- const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
-
-/// Create a new IPC file writer from stream sink and schema
-///
-/// \param[in] sink output stream to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization, optional
-/// \param[in] metadata custom metadata for File Footer, optional
-/// \return Result<std::shared_ptr<RecordBatchWriter>>
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
- std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
- const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
-
+/// Create a new IPC stream writer from stream sink and schema. User is
+/// responsible for closing the actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
+/// Create a new IPC stream writer from stream sink and schema. User is
+/// responsible for closing the actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> MakeStreamWriter(
+ std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
+/// Create a new IPC file writer from stream sink and schema
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization, optional
+/// \param[in] metadata custom metadata for File Footer, optional
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
+ const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
+
+/// Create a new IPC file writer from stream sink and schema
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization, optional
+/// \param[in] metadata custom metadata for File Footer, optional
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
+ std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
+ const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
+
/// @}
ARROW_DEPRECATED("Use MakeStreamWriter")
@@ -173,287 +173,287 @@ Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-ARROW_DEPRECATED("Use MakeFileWriter")
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
- const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
-
-/// \brief Low-level API for writing a record batch (without schema)
-/// to an OutputStream as encapsulated IPC message. See Arrow format
-/// documentation for more detail.
-///
-/// \param[in] batch the record batch to write
-/// \param[in] buffer_start_offset the start offset to use in the buffer metadata,
-/// generally should be 0
-/// \param[in] dst an OutputStream
-/// \param[out] metadata_length the size of the length-prefixed flatbuffer
-/// including padding to a 64-byte boundary
-/// \param[out] body_length the size of the contiguous buffer block plus
-/// \param[in] options options for serialization
-/// \return Status
-ARROW_EXPORT
-Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
- io::OutputStream* dst, int32_t* metadata_length,
- int64_t* body_length, const IpcWriteOptions& options);
-
-/// \brief Serialize record batch as encapsulated IPC message in a new buffer
-///
-/// \param[in] batch the record batch
-/// \param[in] options the IpcWriteOptions to use for serialization
-/// \return the serialized message
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
- const IpcWriteOptions& options);
-
-/// \brief Serialize record batch as encapsulated IPC message in a new buffer
-///
-/// \param[in] batch the record batch
-/// \param[in] mm a MemoryManager to allocate memory from
-/// \return the serialized message
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
- std::shared_ptr<MemoryManager> mm);
-
-/// \brief Write record batch to OutputStream
-///
-/// \param[in] batch the record batch to write
-/// \param[in] options the IpcWriteOptions to use for serialization
-/// \param[in] out the OutputStream to write the output to
-/// \return Status
-///
-/// If writing to pre-allocated memory, you can use
-/// arrow::ipc::GetRecordBatchSize to compute how much space is required
-ARROW_EXPORT
-Status SerializeRecordBatch(const RecordBatch& batch, const IpcWriteOptions& options,
- io::OutputStream* out);
-
-/// \brief Serialize schema as encapsulated IPC message
-///
-/// \param[in] schema the schema to write
-/// \param[in] pool a MemoryPool to allocate memory from
-/// \return the serialized schema
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema,
- MemoryPool* pool = default_memory_pool());
-
-/// \brief Write multiple record batches to OutputStream, including schema
-/// \param[in] batches a vector of batches. Must all have same schema
-/// \param[in] options options for serialization
-/// \param[out] dst an OutputStream
-/// \return Status
-ARROW_EXPORT
-Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& batches,
- const IpcWriteOptions& options, io::OutputStream* dst);
-
-/// \brief Compute the number of bytes needed to write an IPC payload
-/// including metadata
-///
-/// \param[in] payload the IPC payload to write
-/// \param[in] options write options
-/// \return the size of the complete encapsulated message
-ARROW_EXPORT
-int64_t GetPayloadSize(const IpcPayload& payload,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-
-/// \brief Compute the number of bytes needed to write a record batch including metadata
-///
-/// \param[in] batch the record batch to write
-/// \param[out] size the size of the complete encapsulated message
-/// \return Status
-ARROW_EXPORT
-Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
-
-/// \brief Compute the number of bytes needed to write a record batch including metadata
-///
-/// \param[in] batch the record batch to write
-/// \param[in] options options for serialization
-/// \param[out] size the size of the complete encapsulated message
-/// \return Status
-ARROW_EXPORT
-Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
- int64_t* size);
-
-/// \brief Compute the number of bytes needed to write a tensor including metadata
-///
-/// \param[in] tensor the tensor to write
-/// \param[out] size the size of the complete encapsulated message
-/// \return Status
-ARROW_EXPORT
-Status GetTensorSize(const Tensor& tensor, int64_t* size);
-
-/// \brief EXPERIMENTAL: Convert arrow::Tensor to a Message with minimal memory
-/// allocation
-///
-/// \param[in] tensor the Tensor to write
-/// \param[in] pool MemoryPool to allocate space for metadata
-/// \return the resulting Message
-ARROW_EXPORT
-Result<std::unique_ptr<Message>> GetTensorMessage(const Tensor& tensor, MemoryPool* pool);
-
-/// \brief Write arrow::Tensor as a contiguous message.
-///
-/// The metadata and body are written assuming 64-byte alignment. It is the
-/// user's responsibility to ensure that the OutputStream has been aligned
-/// to a 64-byte multiple before writing the message.
-///
-/// The message is written out as followed:
-/// \code
-/// <metadata size> <metadata> <tensor data>
-/// \endcode
-///
-/// \param[in] tensor the Tensor to write
-/// \param[in] dst the OutputStream to write to
-/// \param[out] metadata_length the actual metadata length, including padding
-/// \param[out] body_length the actual message body length
-/// \return Status
-ARROW_EXPORT
-Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
- int64_t* body_length);
-
-/// \brief EXPERIMENTAL: Convert arrow::SparseTensor to a Message with minimal memory
-/// allocation
-///
-/// The message is written out as followed:
-/// \code
-/// <metadata size> <metadata> <sparse index> <sparse tensor body>
-/// \endcode
-///
-/// \param[in] sparse_tensor the SparseTensor to write
-/// \param[in] pool MemoryPool to allocate space for metadata
-/// \return the resulting Message
-ARROW_EXPORT
-Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& sparse_tensor,
- MemoryPool* pool);
-
-/// \brief EXPERIMENTAL: Write arrow::SparseTensor as a contiguous message. The metadata,
-/// sparse index, and body are written assuming 64-byte alignment. It is the
-/// user's responsibility to ensure that the OutputStream has been aligned
-/// to a 64-byte multiple before writing the message.
-///
-/// \param[in] sparse_tensor the SparseTensor to write
-/// \param[in] dst the OutputStream to write to
-/// \param[out] metadata_length the actual metadata length, including padding
-/// \param[out] body_length the actual message body length
-/// \return Status
-ARROW_EXPORT
-Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
- int32_t* metadata_length, int64_t* body_length);
-
-/// \brief Compute IpcPayload for the given schema
-/// \param[in] schema the Schema that is being serialized
-/// \param[in] options options for serialization
-/// \param[in] mapper object mapping dictionary fields to dictionary ids
-/// \param[out] out the returned vector of IpcPayloads
-/// \return Status
-ARROW_EXPORT
-Status GetSchemaPayload(const Schema& schema, const IpcWriteOptions& options,
- const DictionaryFieldMapper& mapper, IpcPayload* out);
-
-/// \brief Compute IpcPayload for a dictionary
-/// \param[in] id the dictionary id
-/// \param[in] dictionary the dictionary values
-/// \param[in] options options for serialization
-/// \param[out] payload the output IpcPayload
-/// \return Status
-ARROW_EXPORT
-Status GetDictionaryPayload(int64_t id, const std::shared_ptr<Array>& dictionary,
- const IpcWriteOptions& options, IpcPayload* payload);
-
-/// \brief Compute IpcPayload for a dictionary
-/// \param[in] id the dictionary id
-/// \param[in] is_delta whether the dictionary is a delta dictionary
-/// \param[in] dictionary the dictionary values
-/// \param[in] options options for serialization
-/// \param[out] payload the output IpcPayload
-/// \return Status
-ARROW_EXPORT
-Status GetDictionaryPayload(int64_t id, bool is_delta,
- const std::shared_ptr<Array>& dictionary,
- const IpcWriteOptions& options, IpcPayload* payload);
-
-/// \brief Compute IpcPayload for the given record batch
-/// \param[in] batch the RecordBatch that is being serialized
-/// \param[in] options options for serialization
-/// \param[out] out the returned IpcPayload
-/// \return Status
-ARROW_EXPORT
-Status GetRecordBatchPayload(const RecordBatch& batch, const IpcWriteOptions& options,
- IpcPayload* out);
-
-/// \brief Write an IPC payload to the given stream.
-/// \param[in] payload the payload to write
-/// \param[in] options options for serialization
-/// \param[in] dst The stream to write the payload to.
-/// \param[out] metadata_length the length of the serialized metadata
-/// \return Status
-ARROW_EXPORT
-Status WriteIpcPayload(const IpcPayload& payload, const IpcWriteOptions& options,
- io::OutputStream* dst, int32_t* metadata_length);
-
-/// \brief Compute IpcPayload for the given sparse tensor
-/// \param[in] sparse_tensor the SparseTensor that is being serialized
-/// \param[in,out] pool for any required temporary memory allocations
-/// \param[out] out the returned IpcPayload
-/// \return Status
-ARROW_EXPORT
-Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
- IpcPayload* out);
-
-namespace internal {
-
-// These internal APIs may change without warning or deprecation
-
-class ARROW_EXPORT IpcPayloadWriter {
- public:
- virtual ~IpcPayloadWriter();
-
- // Default implementation is a no-op
- virtual Status Start();
-
- virtual Status WritePayload(const IpcPayload& payload) = 0;
-
- virtual Status Close() = 0;
-};
-
-/// Create a new IPC payload stream writer from stream sink. User is
-/// responsible for closing the actual OutputStream.
-///
-/// \param[in] sink output stream to write to
-/// \param[in] options options for serialization
-/// \return Result<std::shared_ptr<IpcPayloadWriter>>
-ARROW_EXPORT
-Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadStreamWriter(
- io::OutputStream* sink, const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-
-/// Create a new IPC payload file writer from stream sink.
-///
-/// \param[in] sink output stream to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization, optional
-/// \param[in] metadata custom metadata for File Footer, optional
-/// \return Status
-ARROW_EXPORT
-Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadFileWriter(
- io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
- const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
-
-/// Create a new RecordBatchWriter from IpcPayloadWriter and schema.
-///
-/// The format is implicitly the IPC stream format (allowing dictionary
-/// replacement and deltas).
-///
-/// \param[in] sink the IpcPayloadWriter to write to
-/// \param[in] schema the schema of the record batches to be written
-/// \param[in] options options for serialization
-/// \return Result<std::unique_ptr<RecordBatchWriter>>
-ARROW_EXPORT
-Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
- std::unique_ptr<IpcPayloadWriter> sink, const std::shared_ptr<Schema>& schema,
- const IpcWriteOptions& options = IpcWriteOptions::Defaults());
-
-} // namespace internal
-} // namespace ipc
-} // namespace arrow
+ARROW_DEPRECATED("Use MakeFileWriter")
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
+ const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
+
+/// \brief Low-level API for writing a record batch (without schema)
+/// to an OutputStream as encapsulated IPC message. See Arrow format
+/// documentation for more detail.
+///
+/// \param[in] batch the record batch to write
+/// \param[in] buffer_start_offset the start offset to use in the buffer metadata,
+/// generally should be 0
+/// \param[in] dst an OutputStream
+/// \param[out] metadata_length the size of the length-prefixed flatbuffer
+/// including padding to a 64-byte boundary
+/// \param[out] body_length the size of the contiguous buffer block plus
+/// \param[in] options options for serialization
+/// \return Status
+ARROW_EXPORT
+Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
+ io::OutputStream* dst, int32_t* metadata_length,
+ int64_t* body_length, const IpcWriteOptions& options);
+
+/// \brief Serialize record batch as encapsulated IPC message in a new buffer
+///
+/// \param[in] batch the record batch
+/// \param[in] options the IpcWriteOptions to use for serialization
+/// \return the serialized message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
+ const IpcWriteOptions& options);
+
+/// \brief Serialize record batch as encapsulated IPC message in a new buffer
+///
+/// \param[in] batch the record batch
+/// \param[in] mm a MemoryManager to allocate memory from
+/// \return the serialized message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
+ std::shared_ptr<MemoryManager> mm);
+
+/// \brief Write record batch to OutputStream
+///
+/// \param[in] batch the record batch to write
+/// \param[in] options the IpcWriteOptions to use for serialization
+/// \param[in] out the OutputStream to write the output to
+/// \return Status
+///
+/// If writing to pre-allocated memory, you can use
+/// arrow::ipc::GetRecordBatchSize to compute how much space is required
+ARROW_EXPORT
+Status SerializeRecordBatch(const RecordBatch& batch, const IpcWriteOptions& options,
+ io::OutputStream* out);
+
+/// \brief Serialize schema as encapsulated IPC message
+///
+/// \param[in] schema the schema to write
+/// \param[in] pool a MemoryPool to allocate memory from
+/// \return the serialized schema
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema,
+ MemoryPool* pool = default_memory_pool());
+
+/// \brief Write multiple record batches to OutputStream, including schema
+/// \param[in] batches a vector of batches. Must all have same schema
+/// \param[in] options options for serialization
+/// \param[out] dst an OutputStream
+/// \return Status
+ARROW_EXPORT
+Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& batches,
+ const IpcWriteOptions& options, io::OutputStream* dst);
+
+/// \brief Compute the number of bytes needed to write an IPC payload
+/// including metadata
+///
+/// \param[in] payload the IPC payload to write
+/// \param[in] options write options
+/// \return the size of the complete encapsulated message
+ARROW_EXPORT
+int64_t GetPayloadSize(const IpcPayload& payload,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
+/// \brief Compute the number of bytes needed to write a record batch including metadata
+///
+/// \param[in] batch the record batch to write
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
+ARROW_EXPORT
+Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
+
+/// \brief Compute the number of bytes needed to write a record batch including metadata
+///
+/// \param[in] batch the record batch to write
+/// \param[in] options options for serialization
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
+ARROW_EXPORT
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+ int64_t* size);
+
+/// \brief Compute the number of bytes needed to write a tensor including metadata
+///
+/// \param[in] tensor the tensor to write
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
+ARROW_EXPORT
+Status GetTensorSize(const Tensor& tensor, int64_t* size);
+
+/// \brief EXPERIMENTAL: Convert arrow::Tensor to a Message with minimal memory
+/// allocation
+///
+/// \param[in] tensor the Tensor to write
+/// \param[in] pool MemoryPool to allocate space for metadata
+/// \return the resulting Message
+ARROW_EXPORT
+Result<std::unique_ptr<Message>> GetTensorMessage(const Tensor& tensor, MemoryPool* pool);
+
+/// \brief Write arrow::Tensor as a contiguous message.
+///
+/// The metadata and body are written assuming 64-byte alignment. It is the
+/// user's responsibility to ensure that the OutputStream has been aligned
+/// to a 64-byte multiple before writing the message.
+///
+/// The message is written out as followed:
+/// \code
+/// <metadata size> <metadata> <tensor data>
+/// \endcode
+///
+/// \param[in] tensor the Tensor to write
+/// \param[in] dst the OutputStream to write to
+/// \param[out] metadata_length the actual metadata length, including padding
+/// \param[out] body_length the actual message body length
+/// \return Status
+ARROW_EXPORT
+Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
+ int64_t* body_length);
+
+/// \brief EXPERIMENTAL: Convert arrow::SparseTensor to a Message with minimal memory
+/// allocation
+///
+/// The message is written out as followed:
+/// \code
+/// <metadata size> <metadata> <sparse index> <sparse tensor body>
+/// \endcode
+///
+/// \param[in] sparse_tensor the SparseTensor to write
+/// \param[in] pool MemoryPool to allocate space for metadata
+/// \return the resulting Message
+ARROW_EXPORT
+Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& sparse_tensor,
+ MemoryPool* pool);
+
+/// \brief EXPERIMENTAL: Write arrow::SparseTensor as a contiguous message. The metadata,
+/// sparse index, and body are written assuming 64-byte alignment. It is the
+/// user's responsibility to ensure that the OutputStream has been aligned
+/// to a 64-byte multiple before writing the message.
+///
+/// \param[in] sparse_tensor the SparseTensor to write
+/// \param[in] dst the OutputStream to write to
+/// \param[out] metadata_length the actual metadata length, including padding
+/// \param[out] body_length the actual message body length
+/// \return Status
+ARROW_EXPORT
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
+ int32_t* metadata_length, int64_t* body_length);
+
+/// \brief Compute IpcPayload for the given schema
+/// \param[in] schema the Schema that is being serialized
+/// \param[in] options options for serialization
+/// \param[in] mapper object mapping dictionary fields to dictionary ids
+/// \param[out] out the returned vector of IpcPayloads
+/// \return Status
+ARROW_EXPORT
+Status GetSchemaPayload(const Schema& schema, const IpcWriteOptions& options,
+ const DictionaryFieldMapper& mapper, IpcPayload* out);
+
+/// \brief Compute IpcPayload for a dictionary
+/// \param[in] id the dictionary id
+/// \param[in] dictionary the dictionary values
+/// \param[in] options options for serialization
+/// \param[out] payload the output IpcPayload
+/// \return Status
+ARROW_EXPORT
+Status GetDictionaryPayload(int64_t id, const std::shared_ptr<Array>& dictionary,
+ const IpcWriteOptions& options, IpcPayload* payload);
+
+/// \brief Compute IpcPayload for a dictionary
+/// \param[in] id the dictionary id
+/// \param[in] is_delta whether the dictionary is a delta dictionary
+/// \param[in] dictionary the dictionary values
+/// \param[in] options options for serialization
+/// \param[out] payload the output IpcPayload
+/// \return Status
+ARROW_EXPORT
+Status GetDictionaryPayload(int64_t id, bool is_delta,
+ const std::shared_ptr<Array>& dictionary,
+ const IpcWriteOptions& options, IpcPayload* payload);
+
+/// \brief Compute IpcPayload for the given record batch
+/// \param[in] batch the RecordBatch that is being serialized
+/// \param[in] options options for serialization
+/// \param[out] out the returned IpcPayload
+/// \return Status
+ARROW_EXPORT
+Status GetRecordBatchPayload(const RecordBatch& batch, const IpcWriteOptions& options,
+ IpcPayload* out);
+
+/// \brief Write an IPC payload to the given stream.
+/// \param[in] payload the payload to write
+/// \param[in] options options for serialization
+/// \param[in] dst The stream to write the payload to.
+/// \param[out] metadata_length the length of the serialized metadata
+/// \return Status
+ARROW_EXPORT
+Status WriteIpcPayload(const IpcPayload& payload, const IpcWriteOptions& options,
+ io::OutputStream* dst, int32_t* metadata_length);
+
+/// \brief Compute IpcPayload for the given sparse tensor
+/// \param[in] sparse_tensor the SparseTensor that is being serialized
+/// \param[in,out] pool for any required temporary memory allocations
+/// \param[out] out the returned IpcPayload
+/// \return Status
+ARROW_EXPORT
+Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
+ IpcPayload* out);
+
+namespace internal {
+
+// These internal APIs may change without warning or deprecation
+
+class ARROW_EXPORT IpcPayloadWriter {
+ public:
+ virtual ~IpcPayloadWriter();
+
+ // Default implementation is a no-op
+ virtual Status Start();
+
+ virtual Status WritePayload(const IpcPayload& payload) = 0;
+
+ virtual Status Close() = 0;
+};
+
+/// Create a new IPC payload stream writer from stream sink. User is
+/// responsible for closing the actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<IpcPayloadWriter>>
+ARROW_EXPORT
+Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadStreamWriter(
+ io::OutputStream* sink, const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
+/// Create a new IPC payload file writer from stream sink.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization, optional
+/// \param[in] metadata custom metadata for File Footer, optional
+/// \return Status
+ARROW_EXPORT
+Result<std::unique_ptr<IpcPayloadWriter>> MakePayloadFileWriter(
+ io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
+ const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
+
+/// Create a new RecordBatchWriter from IpcPayloadWriter and schema.
+///
+/// The format is implicitly the IPC stream format (allowing dictionary
+/// replacement and deltas).
+///
+/// \param[in] sink the IpcPayloadWriter to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::unique_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
+ std::unique_ptr<IpcPayloadWriter> sink, const std::shared_ptr<Schema>& schema,
+ const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
index 2d6f3176224..b0cc69e5518 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
@@ -1,30 +1,30 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/memory_pool.h"
-
-#include <algorithm> // IWYU pragma: keep
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool.h"
+
+#include <algorithm> // IWYU pragma: keep
#include <atomic>
#include <cstdlib> // IWYU pragma: keep
#include <cstring> // IWYU pragma: keep
#include <iostream> // IWYU pragma: keep
-#include <limits>
-#include <memory>
-
+#include <limits>
+#include <memory>
+
#if defined(sun) || defined(__sun)
#include <stdlib.h>
#endif
@@ -32,81 +32,81 @@
#include "arrow/buffer.h"
#include "arrow/io/util_internal.h"
#include "arrow/result.h"
-#include "arrow/status.h"
+#include "arrow/status.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h" // IWYU pragma: keep
+#include "arrow/util/logging.h" // IWYU pragma: keep
#include "arrow/util/optional.h"
#include "arrow/util/string.h"
#include "arrow/util/thread_pool.h"
-
+
#ifdef __GLIBC__
#include <malloc.h>
#endif
-#ifdef ARROW_JEMALLOC
-// Needed to support jemalloc 3 and 4
-#define JEMALLOC_MANGLE
-// Explicitly link to our version of jemalloc
-#error #include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-#endif
-
-#ifdef ARROW_MIMALLOC
-#error #include <mimalloc.h>
-#endif
-
-#ifdef ARROW_JEMALLOC
-
-// Compile-time configuration for jemalloc options.
-// Note the prefix ("je_arrow_") must match the symbol prefix given when
-// building jemalloc.
-// See discussion in https://github.com/jemalloc/jemalloc/issues/1621
-
-// ARROW-6910(wesm): we found that jemalloc's default behavior with respect to
-// dirty / muzzy pages (see definitions of these in the jemalloc documentation)
-// conflicted with user expectations, and would even cause memory use problems
-// in some cases. By enabling the background_thread option and reducing the
-// decay time from 10 seconds to 1 seconds, memory is released more
-// aggressively (and in the background) to the OS. This can be configured
-// further by using the arrow::jemalloc_set_decay_ms API
-
-#undef USE_JEMALLOC_BACKGROUND_THREAD
-#ifndef __APPLE__
-// ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
-#define USE_JEMALLOC_BACKGROUND_THREAD
-#endif
-
-// In debug mode, add memory poisoning on alloc / free
-#ifdef NDEBUG
-#define JEMALLOC_DEBUG_OPTIONS ""
-#else
-#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
-#endif
-
-const char* je_arrow_malloc_conf =
- ("oversize_threshold:0"
-#ifdef USE_JEMALLOC_BACKGROUND_THREAD
- ",dirty_decay_ms:1000"
- ",muzzy_decay_ms:1000"
- ",background_thread:true"
-#else
- // ARROW-6994: return memory immediately to the OS if the
- // background_thread option isn't available
- ",dirty_decay_ms:0"
- ",muzzy_decay_ms:0"
-#endif
- JEMALLOC_DEBUG_OPTIONS); // NOLINT: whitespace/parens
-
-#endif // ARROW_JEMALLOC
-
-namespace arrow {
-
+#ifdef ARROW_JEMALLOC
+// Needed to support jemalloc 3 and 4
+#define JEMALLOC_MANGLE
+// Explicitly link to our version of jemalloc
+#error #include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#endif
+
+#ifdef ARROW_MIMALLOC
+#error #include <mimalloc.h>
+#endif
+
+#ifdef ARROW_JEMALLOC
+
+// Compile-time configuration for jemalloc options.
+// Note the prefix ("je_arrow_") must match the symbol prefix given when
+// building jemalloc.
+// See discussion in https://github.com/jemalloc/jemalloc/issues/1621
+
+// ARROW-6910(wesm): we found that jemalloc's default behavior with respect to
+// dirty / muzzy pages (see definitions of these in the jemalloc documentation)
+// conflicted with user expectations, and would even cause memory use problems
+// in some cases. By enabling the background_thread option and reducing the
+// decay time from 10 seconds to 1 seconds, memory is released more
+// aggressively (and in the background) to the OS. This can be configured
+// further by using the arrow::jemalloc_set_decay_ms API
+
+#undef USE_JEMALLOC_BACKGROUND_THREAD
+#ifndef __APPLE__
+// ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
+#define USE_JEMALLOC_BACKGROUND_THREAD
+#endif
+
+// In debug mode, add memory poisoning on alloc / free
+#ifdef NDEBUG
+#define JEMALLOC_DEBUG_OPTIONS ""
+#else
+#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#endif
+
+const char* je_arrow_malloc_conf =
+ ("oversize_threshold:0"
+#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+ ",dirty_decay_ms:1000"
+ ",muzzy_decay_ms:1000"
+ ",background_thread:true"
+#else
+ // ARROW-6994: return memory immediately to the OS if the
+ // background_thread option isn't available
+ ",dirty_decay_ms:0"
+ ",muzzy_decay_ms:0"
+#endif
+ JEMALLOC_DEBUG_OPTIONS); // NOLINT: whitespace/parens
+
+#endif // ARROW_JEMALLOC
+
+namespace arrow {
+
namespace {
-constexpr size_t kAlignment = 64;
-
+constexpr size_t kAlignment = 64;
+
constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL";
-
+
enum class MemoryPoolBackend : uint8_t { System, Jemalloc, Mimalloc };
struct SupportedBackend {
@@ -183,85 +183,85 @@ MemoryPoolBackend DefaultBackend() {
return default_backend.backend;
}
-// A static piece of memory for 0-size allocations, so as to return
-// an aligned non-null pointer.
-alignas(kAlignment) static uint8_t zero_size_area[1];
-
-// Helper class directing allocations to the standard system allocator.
-class SystemAllocator {
- public:
- // Allocate memory according to the alignment requirements for Arrow
- // (as of May 2016 64 bytes)
- static Status AllocateAligned(int64_t size, uint8_t** out) {
- if (size == 0) {
- *out = zero_size_area;
- return Status::OK();
- }
-#ifdef _WIN32
- // Special code path for Windows
- *out = reinterpret_cast<uint8_t*>(
- _aligned_malloc(static_cast<size_t>(size), kAlignment));
- if (!*out) {
- return Status::OutOfMemory("malloc of size ", size, " failed");
- }
+// A static piece of memory for 0-size allocations, so as to return
+// an aligned non-null pointer.
+alignas(kAlignment) static uint8_t zero_size_area[1];
+
+// Helper class directing allocations to the standard system allocator.
+class SystemAllocator {
+ public:
+ // Allocate memory according to the alignment requirements for Arrow
+ // (as of May 2016 64 bytes)
+ static Status AllocateAligned(int64_t size, uint8_t** out) {
+ if (size == 0) {
+ *out = zero_size_area;
+ return Status::OK();
+ }
+#ifdef _WIN32
+ // Special code path for Windows
+ *out = reinterpret_cast<uint8_t*>(
+ _aligned_malloc(static_cast<size_t>(size), kAlignment));
+ if (!*out) {
+ return Status::OutOfMemory("malloc of size ", size, " failed");
+ }
#elif defined(sun) || defined(__sun)
*out = reinterpret_cast<uint8_t*>(memalign(kAlignment, static_cast<size_t>(size)));
if (!*out) {
return Status::OutOfMemory("malloc of size ", size, " failed");
}
-#else
- const int result = posix_memalign(reinterpret_cast<void**>(out), kAlignment,
- static_cast<size_t>(size));
- if (result == ENOMEM) {
- return Status::OutOfMemory("malloc of size ", size, " failed");
- }
-
- if (result == EINVAL) {
- return Status::Invalid("invalid alignment parameter: ", kAlignment);
- }
-#endif
- return Status::OK();
- }
-
- static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- uint8_t* previous_ptr = *ptr;
- if (previous_ptr == zero_size_area) {
- DCHECK_EQ(old_size, 0);
- return AllocateAligned(new_size, ptr);
- }
- if (new_size == 0) {
- DeallocateAligned(previous_ptr, old_size);
- *ptr = zero_size_area;
- return Status::OK();
- }
- // Note: We cannot use realloc() here as it doesn't guarantee alignment.
-
- // Allocate new chunk
- uint8_t* out = nullptr;
- RETURN_NOT_OK(AllocateAligned(new_size, &out));
- DCHECK(out);
- // Copy contents and release old memory chunk
- memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
-#ifdef _WIN32
- _aligned_free(*ptr);
-#else
- free(*ptr);
-#endif // defined(_WIN32)
- *ptr = out;
- return Status::OK();
- }
-
- static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == zero_size_area) {
- DCHECK_EQ(size, 0);
- } else {
-#ifdef _WIN32
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
- }
- }
+#else
+ const int result = posix_memalign(reinterpret_cast<void**>(out), kAlignment,
+ static_cast<size_t>(size));
+ if (result == ENOMEM) {
+ return Status::OutOfMemory("malloc of size ", size, " failed");
+ }
+
+ if (result == EINVAL) {
+ return Status::Invalid("invalid alignment parameter: ", kAlignment);
+ }
+#endif
+ return Status::OK();
+ }
+
+ static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ uint8_t* previous_ptr = *ptr;
+ if (previous_ptr == zero_size_area) {
+ DCHECK_EQ(old_size, 0);
+ return AllocateAligned(new_size, ptr);
+ }
+ if (new_size == 0) {
+ DeallocateAligned(previous_ptr, old_size);
+ *ptr = zero_size_area;
+ return Status::OK();
+ }
+ // Note: We cannot use realloc() here as it doesn't guarantee alignment.
+
+ // Allocate new chunk
+ uint8_t* out = nullptr;
+ RETURN_NOT_OK(AllocateAligned(new_size, &out));
+ DCHECK(out);
+ // Copy contents and release old memory chunk
+ memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
+#ifdef _WIN32
+ _aligned_free(*ptr);
+#else
+ free(*ptr);
+#endif // defined(_WIN32)
+ *ptr = out;
+ return Status::OK();
+ }
+
+ static void DeallocateAligned(uint8_t* ptr, int64_t size) {
+ if (ptr == zero_size_area) {
+ DCHECK_EQ(size, 0);
+ } else {
+#ifdef _WIN32
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+ }
+ }
static void ReleaseUnused() {
#ifdef __GLIBC__
@@ -270,225 +270,225 @@ class SystemAllocator {
ARROW_UNUSED(malloc_trim(0));
#endif
}
-};
-
-#ifdef ARROW_JEMALLOC
-
-// Helper class directing allocations to the jemalloc allocator.
-class JemallocAllocator {
- public:
- static Status AllocateAligned(int64_t size, uint8_t** out) {
- if (size == 0) {
- *out = zero_size_area;
- return Status::OK();
- }
- *out = reinterpret_cast<uint8_t*>(
- mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
- if (*out == NULL) {
- return Status::OutOfMemory("malloc of size ", size, " failed");
- }
- return Status::OK();
- }
-
- static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- uint8_t* previous_ptr = *ptr;
- if (previous_ptr == zero_size_area) {
- DCHECK_EQ(old_size, 0);
- return AllocateAligned(new_size, ptr);
- }
- if (new_size == 0) {
- DeallocateAligned(previous_ptr, old_size);
- *ptr = zero_size_area;
- return Status::OK();
- }
- *ptr = reinterpret_cast<uint8_t*>(
- rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
- if (*ptr == NULL) {
- *ptr = previous_ptr;
- return Status::OutOfMemory("realloc of size ", new_size, " failed");
- }
- return Status::OK();
- }
-
- static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == zero_size_area) {
- DCHECK_EQ(size, 0);
- } else {
- dallocx(ptr, MALLOCX_ALIGN(kAlignment));
- }
- }
+};
+
+#ifdef ARROW_JEMALLOC
+
+// Helper class directing allocations to the jemalloc allocator.
+class JemallocAllocator {
+ public:
+ static Status AllocateAligned(int64_t size, uint8_t** out) {
+ if (size == 0) {
+ *out = zero_size_area;
+ return Status::OK();
+ }
+ *out = reinterpret_cast<uint8_t*>(
+ mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
+ if (*out == NULL) {
+ return Status::OutOfMemory("malloc of size ", size, " failed");
+ }
+ return Status::OK();
+ }
+
+ static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ uint8_t* previous_ptr = *ptr;
+ if (previous_ptr == zero_size_area) {
+ DCHECK_EQ(old_size, 0);
+ return AllocateAligned(new_size, ptr);
+ }
+ if (new_size == 0) {
+ DeallocateAligned(previous_ptr, old_size);
+ *ptr = zero_size_area;
+ return Status::OK();
+ }
+ *ptr = reinterpret_cast<uint8_t*>(
+ rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
+ if (*ptr == NULL) {
+ *ptr = previous_ptr;
+ return Status::OutOfMemory("realloc of size ", new_size, " failed");
+ }
+ return Status::OK();
+ }
+
+ static void DeallocateAligned(uint8_t* ptr, int64_t size) {
+ if (ptr == zero_size_area) {
+ DCHECK_EQ(size, 0);
+ } else {
+ dallocx(ptr, MALLOCX_ALIGN(kAlignment));
+ }
+ }
static void ReleaseUnused() {
mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
}
-};
-
-#endif // defined(ARROW_JEMALLOC)
-
-#ifdef ARROW_MIMALLOC
-
-// Helper class directing allocations to the mimalloc allocator.
-class MimallocAllocator {
- public:
- static Status AllocateAligned(int64_t size, uint8_t** out) {
- if (size == 0) {
- *out = zero_size_area;
- return Status::OK();
- }
- *out = reinterpret_cast<uint8_t*>(
- mi_malloc_aligned(static_cast<size_t>(size), kAlignment));
- if (*out == NULL) {
- return Status::OutOfMemory("malloc of size ", size, " failed");
- }
- return Status::OK();
- }
-
+};
+
+#endif // defined(ARROW_JEMALLOC)
+
+#ifdef ARROW_MIMALLOC
+
+// Helper class directing allocations to the mimalloc allocator.
+class MimallocAllocator {
+ public:
+ static Status AllocateAligned(int64_t size, uint8_t** out) {
+ if (size == 0) {
+ *out = zero_size_area;
+ return Status::OK();
+ }
+ *out = reinterpret_cast<uint8_t*>(
+ mi_malloc_aligned(static_cast<size_t>(size), kAlignment));
+ if (*out == NULL) {
+ return Status::OutOfMemory("malloc of size ", size, " failed");
+ }
+ return Status::OK();
+ }
+
static void ReleaseUnused() { mi_collect(true); }
- static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- uint8_t* previous_ptr = *ptr;
- if (previous_ptr == zero_size_area) {
- DCHECK_EQ(old_size, 0);
- return AllocateAligned(new_size, ptr);
- }
- if (new_size == 0) {
- DeallocateAligned(previous_ptr, old_size);
- *ptr = zero_size_area;
- return Status::OK();
- }
- *ptr = reinterpret_cast<uint8_t*>(
- mi_realloc_aligned(previous_ptr, static_cast<size_t>(new_size), kAlignment));
- if (*ptr == NULL) {
- *ptr = previous_ptr;
- return Status::OutOfMemory("realloc of size ", new_size, " failed");
- }
- return Status::OK();
- }
-
- static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == zero_size_area) {
- DCHECK_EQ(size, 0);
- } else {
- mi_free(ptr);
- }
- }
-};
-
-#endif // defined(ARROW_MIMALLOC)
-
-} // namespace
-
-int64_t MemoryPool::max_memory() const { return -1; }
-
-///////////////////////////////////////////////////////////////////////
-// MemoryPool implementation that delegates its core duty
-// to an Allocator class.
-
-#ifndef NDEBUG
-static constexpr uint8_t kAllocPoison = 0xBC;
-static constexpr uint8_t kReallocPoison = 0xBD;
-static constexpr uint8_t kDeallocPoison = 0xBE;
-#endif
-
-template <typename Allocator>
-class BaseMemoryPoolImpl : public MemoryPool {
- public:
- ~BaseMemoryPoolImpl() override {}
-
- Status Allocate(int64_t size, uint8_t** out) override {
- if (size < 0) {
- return Status::Invalid("negative malloc size");
- }
- if (static_cast<uint64_t>(size) >= std::numeric_limits<size_t>::max()) {
- return Status::CapacityError("malloc size overflows size_t");
- }
- RETURN_NOT_OK(Allocator::AllocateAligned(size, out));
-#ifndef NDEBUG
- // Poison data
- if (size > 0) {
- DCHECK_NE(*out, nullptr);
- (*out)[0] = kAllocPoison;
- (*out)[size - 1] = kAllocPoison;
- }
-#endif
-
- stats_.UpdateAllocatedBytes(size);
- return Status::OK();
- }
-
- Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
- if (new_size < 0) {
- return Status::Invalid("negative realloc size");
- }
- if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
- return Status::CapacityError("realloc overflows size_t");
- }
- RETURN_NOT_OK(Allocator::ReallocateAligned(old_size, new_size, ptr));
-#ifndef NDEBUG
- // Poison data
- if (new_size > old_size) {
- DCHECK_NE(*ptr, nullptr);
- (*ptr)[old_size] = kReallocPoison;
- (*ptr)[new_size - 1] = kReallocPoison;
- }
-#endif
-
- stats_.UpdateAllocatedBytes(new_size - old_size);
- return Status::OK();
- }
-
- void Free(uint8_t* buffer, int64_t size) override {
-#ifndef NDEBUG
- // Poison data
- if (size > 0) {
- DCHECK_NE(buffer, nullptr);
- buffer[0] = kDeallocPoison;
- buffer[size - 1] = kDeallocPoison;
- }
-#endif
- Allocator::DeallocateAligned(buffer, size);
-
- stats_.UpdateAllocatedBytes(-size);
- }
-
+ static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ uint8_t* previous_ptr = *ptr;
+ if (previous_ptr == zero_size_area) {
+ DCHECK_EQ(old_size, 0);
+ return AllocateAligned(new_size, ptr);
+ }
+ if (new_size == 0) {
+ DeallocateAligned(previous_ptr, old_size);
+ *ptr = zero_size_area;
+ return Status::OK();
+ }
+ *ptr = reinterpret_cast<uint8_t*>(
+ mi_realloc_aligned(previous_ptr, static_cast<size_t>(new_size), kAlignment));
+ if (*ptr == NULL) {
+ *ptr = previous_ptr;
+ return Status::OutOfMemory("realloc of size ", new_size, " failed");
+ }
+ return Status::OK();
+ }
+
+ static void DeallocateAligned(uint8_t* ptr, int64_t size) {
+ if (ptr == zero_size_area) {
+ DCHECK_EQ(size, 0);
+ } else {
+ mi_free(ptr);
+ }
+ }
+};
+
+#endif // defined(ARROW_MIMALLOC)
+
+} // namespace
+
+int64_t MemoryPool::max_memory() const { return -1; }
+
+///////////////////////////////////////////////////////////////////////
+// MemoryPool implementation that delegates its core duty
+// to an Allocator class.
+
+#ifndef NDEBUG
+static constexpr uint8_t kAllocPoison = 0xBC;
+static constexpr uint8_t kReallocPoison = 0xBD;
+static constexpr uint8_t kDeallocPoison = 0xBE;
+#endif
+
+template <typename Allocator>
+class BaseMemoryPoolImpl : public MemoryPool {
+ public:
+ ~BaseMemoryPoolImpl() override {}
+
+ Status Allocate(int64_t size, uint8_t** out) override {
+ if (size < 0) {
+ return Status::Invalid("negative malloc size");
+ }
+ if (static_cast<uint64_t>(size) >= std::numeric_limits<size_t>::max()) {
+ return Status::CapacityError("malloc size overflows size_t");
+ }
+ RETURN_NOT_OK(Allocator::AllocateAligned(size, out));
+#ifndef NDEBUG
+ // Poison data
+ if (size > 0) {
+ DCHECK_NE(*out, nullptr);
+ (*out)[0] = kAllocPoison;
+ (*out)[size - 1] = kAllocPoison;
+ }
+#endif
+
+ stats_.UpdateAllocatedBytes(size);
+ return Status::OK();
+ }
+
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+ if (new_size < 0) {
+ return Status::Invalid("negative realloc size");
+ }
+ if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
+ return Status::CapacityError("realloc overflows size_t");
+ }
+ RETURN_NOT_OK(Allocator::ReallocateAligned(old_size, new_size, ptr));
+#ifndef NDEBUG
+ // Poison data
+ if (new_size > old_size) {
+ DCHECK_NE(*ptr, nullptr);
+ (*ptr)[old_size] = kReallocPoison;
+ (*ptr)[new_size - 1] = kReallocPoison;
+ }
+#endif
+
+ stats_.UpdateAllocatedBytes(new_size - old_size);
+ return Status::OK();
+ }
+
+ void Free(uint8_t* buffer, int64_t size) override {
+#ifndef NDEBUG
+ // Poison data
+ if (size > 0) {
+ DCHECK_NE(buffer, nullptr);
+ buffer[0] = kDeallocPoison;
+ buffer[size - 1] = kDeallocPoison;
+ }
+#endif
+ Allocator::DeallocateAligned(buffer, size);
+
+ stats_.UpdateAllocatedBytes(-size);
+ }
+
void ReleaseUnused() override { Allocator::ReleaseUnused(); }
- int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
-
- int64_t max_memory() const override { return stats_.max_memory(); }
-
- protected:
- internal::MemoryPoolStats stats_;
-};
-
-class SystemMemoryPool : public BaseMemoryPoolImpl<SystemAllocator> {
- public:
- std::string backend_name() const override { return "system"; }
-};
-
-#ifdef ARROW_JEMALLOC
-class JemallocMemoryPool : public BaseMemoryPoolImpl<JemallocAllocator> {
- public:
- std::string backend_name() const override { return "jemalloc"; }
-};
-#endif
-
-#ifdef ARROW_MIMALLOC
-class MimallocMemoryPool : public BaseMemoryPoolImpl<MimallocAllocator> {
- public:
- std::string backend_name() const override { return "mimalloc"; }
-};
-#endif
-
+ int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
+
+ int64_t max_memory() const override { return stats_.max_memory(); }
+
+ protected:
+ internal::MemoryPoolStats stats_;
+};
+
+class SystemMemoryPool : public BaseMemoryPoolImpl<SystemAllocator> {
+ public:
+ std::string backend_name() const override { return "system"; }
+};
+
+#ifdef ARROW_JEMALLOC
+class JemallocMemoryPool : public BaseMemoryPoolImpl<JemallocAllocator> {
+ public:
+ std::string backend_name() const override { return "jemalloc"; }
+};
+#endif
+
+#ifdef ARROW_MIMALLOC
+class MimallocMemoryPool : public BaseMemoryPoolImpl<MimallocAllocator> {
+ public:
+ std::string backend_name() const override { return "mimalloc"; }
+};
+#endif
+
std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() {
auto backend = DefaultBackend();
switch (backend) {
case MemoryPoolBackend::System:
return std::unique_ptr<MemoryPool>(new SystemMemoryPool);
-#ifdef ARROW_JEMALLOC
+#ifdef ARROW_JEMALLOC
case MemoryPoolBackend::Jemalloc:
return std::unique_ptr<MemoryPool>(new JemallocMemoryPool);
-#endif
+#endif
#ifdef ARROW_MIMALLOC
case MemoryPoolBackend::Mimalloc:
return std::unique_ptr<MemoryPool>(new MimallocMemoryPool);
@@ -497,8 +497,8 @@ std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() {
ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
return nullptr;
}
-}
-
+}
+
static struct GlobalState {
~GlobalState() { finalizing.store(true, std::memory_order_relaxed); }
@@ -507,43 +507,43 @@ static struct GlobalState {
std::atomic<bool> finalizing{false}; // constructed first, destroyed last
SystemMemoryPool system_pool;
-#ifdef ARROW_JEMALLOC
+#ifdef ARROW_JEMALLOC
JemallocMemoryPool jemalloc_pool;
-#endif
-#ifdef ARROW_MIMALLOC
+#endif
+#ifdef ARROW_MIMALLOC
MimallocMemoryPool mimalloc_pool;
-#endif
+#endif
} global_state;
-
+
MemoryPool* system_memory_pool() { return &global_state.system_pool; }
-
-Status jemalloc_memory_pool(MemoryPool** out) {
-#ifdef ARROW_JEMALLOC
+
+Status jemalloc_memory_pool(MemoryPool** out) {
+#ifdef ARROW_JEMALLOC
*out = &global_state.jemalloc_pool;
- return Status::OK();
-#else
- return Status::NotImplemented("This Arrow build does not enable jemalloc");
-#endif
-}
-
-Status mimalloc_memory_pool(MemoryPool** out) {
-#ifdef ARROW_MIMALLOC
+ return Status::OK();
+#else
+ return Status::NotImplemented("This Arrow build does not enable jemalloc");
+#endif
+}
+
+Status mimalloc_memory_pool(MemoryPool** out) {
+#ifdef ARROW_MIMALLOC
*out = &global_state.mimalloc_pool;
- return Status::OK();
-#else
- return Status::NotImplemented("This Arrow build does not enable mimalloc");
-#endif
-}
-
-MemoryPool* default_memory_pool() {
+ return Status::OK();
+#else
+ return Status::NotImplemented("This Arrow build does not enable mimalloc");
+#endif
+}
+
+MemoryPool* default_memory_pool() {
auto backend = DefaultBackend();
switch (backend) {
case MemoryPoolBackend::System:
return &global_state.system_pool;
-#ifdef ARROW_JEMALLOC
+#ifdef ARROW_JEMALLOC
case MemoryPoolBackend::Jemalloc:
return &global_state.jemalloc_pool;
-#endif
+#endif
#ifdef ARROW_MIMALLOC
case MemoryPoolBackend::Mimalloc:
return &global_state.mimalloc_pool;
@@ -552,128 +552,128 @@ MemoryPool* default_memory_pool() {
ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
return nullptr;
}
-}
-
-#define RETURN_IF_JEMALLOC_ERROR(ERR) \
- do { \
- if (err != 0) { \
- return Status::UnknownError(std::strerror(ERR)); \
- } \
- } while (0)
-
-Status jemalloc_set_decay_ms(int ms) {
-#ifdef ARROW_JEMALLOC
- ssize_t decay_time_ms = static_cast<ssize_t>(ms);
-
- int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms,
- sizeof(decay_time_ms));
- RETURN_IF_JEMALLOC_ERROR(err);
- err = mallctl("arenas.muzzy_decay_ms", nullptr, nullptr, &decay_time_ms,
- sizeof(decay_time_ms));
- RETURN_IF_JEMALLOC_ERROR(err);
-
- return Status::OK();
-#else
- return Status::Invalid("jemalloc support is not built");
-#endif
-}
-
-///////////////////////////////////////////////////////////////////////
-// LoggingMemoryPool implementation
-
-LoggingMemoryPool::LoggingMemoryPool(MemoryPool* pool) : pool_(pool) {}
-
-Status LoggingMemoryPool::Allocate(int64_t size, uint8_t** out) {
- Status s = pool_->Allocate(size, out);
- std::cout << "Allocate: size = " << size << std::endl;
- return s;
-}
-
-Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- Status s = pool_->Reallocate(old_size, new_size, ptr);
- std::cout << "Reallocate: old_size = " << old_size << " - new_size = " << new_size
- << std::endl;
- return s;
-}
-
-void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size) {
- pool_->Free(buffer, size);
- std::cout << "Free: size = " << size << std::endl;
-}
-
-int64_t LoggingMemoryPool::bytes_allocated() const {
- int64_t nb_bytes = pool_->bytes_allocated();
- std::cout << "bytes_allocated: " << nb_bytes << std::endl;
- return nb_bytes;
-}
-
-int64_t LoggingMemoryPool::max_memory() const {
- int64_t mem = pool_->max_memory();
- std::cout << "max_memory: " << mem << std::endl;
- return mem;
-}
-
-std::string LoggingMemoryPool::backend_name() const { return pool_->backend_name(); }
-
-///////////////////////////////////////////////////////////////////////
-// ProxyMemoryPool implementation
-
-class ProxyMemoryPool::ProxyMemoryPoolImpl {
- public:
- explicit ProxyMemoryPoolImpl(MemoryPool* pool) : pool_(pool) {}
-
- Status Allocate(int64_t size, uint8_t** out) {
- RETURN_NOT_OK(pool_->Allocate(size, out));
- stats_.UpdateAllocatedBytes(size);
- return Status::OK();
- }
-
- Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- RETURN_NOT_OK(pool_->Reallocate(old_size, new_size, ptr));
- stats_.UpdateAllocatedBytes(new_size - old_size);
- return Status::OK();
- }
-
- void Free(uint8_t* buffer, int64_t size) {
- pool_->Free(buffer, size);
- stats_.UpdateAllocatedBytes(-size);
- }
-
- int64_t bytes_allocated() const { return stats_.bytes_allocated(); }
-
- int64_t max_memory() const { return stats_.max_memory(); }
-
- std::string backend_name() const { return pool_->backend_name(); }
-
- private:
- MemoryPool* pool_;
- internal::MemoryPoolStats stats_;
-};
-
-ProxyMemoryPool::ProxyMemoryPool(MemoryPool* pool) {
- impl_.reset(new ProxyMemoryPoolImpl(pool));
-}
-
-ProxyMemoryPool::~ProxyMemoryPool() {}
-
-Status ProxyMemoryPool::Allocate(int64_t size, uint8_t** out) {
- return impl_->Allocate(size, out);
-}
-
-Status ProxyMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
- return impl_->Reallocate(old_size, new_size, ptr);
-}
-
-void ProxyMemoryPool::Free(uint8_t* buffer, int64_t size) {
- return impl_->Free(buffer, size);
-}
-
-int64_t ProxyMemoryPool::bytes_allocated() const { return impl_->bytes_allocated(); }
-
-int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }
-
-std::string ProxyMemoryPool::backend_name() const { return impl_->backend_name(); }
-
+}
+
+#define RETURN_IF_JEMALLOC_ERROR(ERR) \
+ do { \
+ if (err != 0) { \
+ return Status::UnknownError(std::strerror(ERR)); \
+ } \
+ } while (0)
+
+Status jemalloc_set_decay_ms(int ms) {
+#ifdef ARROW_JEMALLOC
+ ssize_t decay_time_ms = static_cast<ssize_t>(ms);
+
+ int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms,
+ sizeof(decay_time_ms));
+ RETURN_IF_JEMALLOC_ERROR(err);
+ err = mallctl("arenas.muzzy_decay_ms", nullptr, nullptr, &decay_time_ms,
+ sizeof(decay_time_ms));
+ RETURN_IF_JEMALLOC_ERROR(err);
+
+ return Status::OK();
+#else
+ return Status::Invalid("jemalloc support is not built");
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////
+// LoggingMemoryPool implementation
+
+LoggingMemoryPool::LoggingMemoryPool(MemoryPool* pool) : pool_(pool) {}
+
+Status LoggingMemoryPool::Allocate(int64_t size, uint8_t** out) {
+ Status s = pool_->Allocate(size, out);
+ std::cout << "Allocate: size = " << size << std::endl;
+ return s;
+}
+
+Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ Status s = pool_->Reallocate(old_size, new_size, ptr);
+ std::cout << "Reallocate: old_size = " << old_size << " - new_size = " << new_size
+ << std::endl;
+ return s;
+}
+
+void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size) {
+ pool_->Free(buffer, size);
+ std::cout << "Free: size = " << size << std::endl;
+}
+
+int64_t LoggingMemoryPool::bytes_allocated() const {
+ int64_t nb_bytes = pool_->bytes_allocated();
+ std::cout << "bytes_allocated: " << nb_bytes << std::endl;
+ return nb_bytes;
+}
+
+int64_t LoggingMemoryPool::max_memory() const {
+ int64_t mem = pool_->max_memory();
+ std::cout << "max_memory: " << mem << std::endl;
+ return mem;
+}
+
+std::string LoggingMemoryPool::backend_name() const { return pool_->backend_name(); }
+
+///////////////////////////////////////////////////////////////////////
+// ProxyMemoryPool implementation
+
+class ProxyMemoryPool::ProxyMemoryPoolImpl {
+ public:
+ explicit ProxyMemoryPoolImpl(MemoryPool* pool) : pool_(pool) {}
+
+ Status Allocate(int64_t size, uint8_t** out) {
+ RETURN_NOT_OK(pool_->Allocate(size, out));
+ stats_.UpdateAllocatedBytes(size);
+ return Status::OK();
+ }
+
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ RETURN_NOT_OK(pool_->Reallocate(old_size, new_size, ptr));
+ stats_.UpdateAllocatedBytes(new_size - old_size);
+ return Status::OK();
+ }
+
+ void Free(uint8_t* buffer, int64_t size) {
+ pool_->Free(buffer, size);
+ stats_.UpdateAllocatedBytes(-size);
+ }
+
+ int64_t bytes_allocated() const { return stats_.bytes_allocated(); }
+
+ int64_t max_memory() const { return stats_.max_memory(); }
+
+ std::string backend_name() const { return pool_->backend_name(); }
+
+ private:
+ MemoryPool* pool_;
+ internal::MemoryPoolStats stats_;
+};
+
+ProxyMemoryPool::ProxyMemoryPool(MemoryPool* pool) {
+ impl_.reset(new ProxyMemoryPoolImpl(pool));
+}
+
+ProxyMemoryPool::~ProxyMemoryPool() {}
+
+Status ProxyMemoryPool::Allocate(int64_t size, uint8_t** out) {
+ return impl_->Allocate(size, out);
+}
+
+Status ProxyMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ return impl_->Reallocate(old_size, new_size, ptr);
+}
+
+void ProxyMemoryPool::Free(uint8_t* buffer, int64_t size) {
+ return impl_->Free(buffer, size);
+}
+
+int64_t ProxyMemoryPool::bytes_allocated() const { return impl_->bytes_allocated(); }
+
+int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }
+
+std::string ProxyMemoryPool::backend_name() const { return impl_->backend_name(); }
+
std::vector<std::string> SupportedMemoryBackendNames() {
std::vector<std::string> supported;
for (const auto backend : SupportedBackends()) {
@@ -794,4 +794,4 @@ Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t s
size);
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
index 81b1b112dc7..12a0395183a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
@@ -1,92 +1,92 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic>
-#include <cstdint>
-#include <memory>
-#include <string>
-
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-namespace internal {
-
-///////////////////////////////////////////////////////////////////////
-// Helper tracking memory statistics
-
-class MemoryPoolStats {
- public:
- MemoryPoolStats() : bytes_allocated_(0), max_memory_(0) {}
-
- int64_t max_memory() const { return max_memory_.load(); }
-
- int64_t bytes_allocated() const { return bytes_allocated_.load(); }
-
- inline void UpdateAllocatedBytes(int64_t diff) {
- auto allocated = bytes_allocated_.fetch_add(diff) + diff;
- // "maximum" allocated memory is ill-defined in multi-threaded code,
- // so don't try to be too rigorous here
- if (diff > 0 && allocated > max_memory_) {
- max_memory_ = allocated;
- }
- }
-
- protected:
- std::atomic<int64_t> bytes_allocated_;
- std::atomic<int64_t> max_memory_;
-};
-
-} // namespace internal
-
-/// Base class for memory allocation on the CPU.
-///
-/// Besides tracking the number of allocated bytes, the allocator also should
-/// take care of the required 64-byte alignment.
-class ARROW_EXPORT MemoryPool {
- public:
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////
+// Helper tracking memory statistics
+
+class MemoryPoolStats {
+ public:
+ MemoryPoolStats() : bytes_allocated_(0), max_memory_(0) {}
+
+ int64_t max_memory() const { return max_memory_.load(); }
+
+ int64_t bytes_allocated() const { return bytes_allocated_.load(); }
+
+ inline void UpdateAllocatedBytes(int64_t diff) {
+ auto allocated = bytes_allocated_.fetch_add(diff) + diff;
+ // "maximum" allocated memory is ill-defined in multi-threaded code,
+ // so don't try to be too rigorous here
+ if (diff > 0 && allocated > max_memory_) {
+ max_memory_ = allocated;
+ }
+ }
+
+ protected:
+ std::atomic<int64_t> bytes_allocated_;
+ std::atomic<int64_t> max_memory_;
+};
+
+} // namespace internal
+
+/// Base class for memory allocation on the CPU.
+///
+/// Besides tracking the number of allocated bytes, the allocator also should
+/// take care of the required 64-byte alignment.
+class ARROW_EXPORT MemoryPool {
+ public:
virtual ~MemoryPool() = default;
-
- /// \brief EXPERIMENTAL. Create a new instance of the default MemoryPool
- static std::unique_ptr<MemoryPool> CreateDefault();
-
- /// Allocate a new memory region of at least size bytes.
- ///
- /// The allocated region shall be 64-byte aligned.
- virtual Status Allocate(int64_t size, uint8_t** out) = 0;
-
- /// Resize an already allocated memory section.
- ///
- /// As by default most default allocators on a platform don't support aligned
- /// reallocation, this function can involve a copy of the underlying data.
- virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0;
-
- /// Free an allocated region.
- ///
- /// @param buffer Pointer to the start of the allocated memory region
- /// @param size Allocated size located at buffer. An allocator implementation
- /// may use this for tracking the amount of allocated bytes as well as for
- /// faster deallocation if supported by its backend.
- virtual void Free(uint8_t* buffer, int64_t size) = 0;
-
+
+ /// \brief EXPERIMENTAL. Create a new instance of the default MemoryPool
+ static std::unique_ptr<MemoryPool> CreateDefault();
+
+ /// Allocate a new memory region of at least size bytes.
+ ///
+ /// The allocated region shall be 64-byte aligned.
+ virtual Status Allocate(int64_t size, uint8_t** out) = 0;
+
+ /// Resize an already allocated memory section.
+ ///
+ /// As by default most default allocators on a platform don't support aligned
+ /// reallocation, this function can involve a copy of the underlying data.
+ virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0;
+
+ /// Free an allocated region.
+ ///
+ /// @param buffer Pointer to the start of the allocated memory region
+ /// @param size Allocated size located at buffer. An allocator implementation
+ /// may use this for tracking the amount of allocated bytes as well as for
+ /// faster deallocation if supported by its backend.
+ virtual void Free(uint8_t* buffer, int64_t size) = 0;
+
/// Return unused memory to the OS
///
/// Only applies to allocators that hold onto unused memory. This will be
@@ -94,92 +94,92 @@ class ARROW_EXPORT MemoryPool {
/// unable to fulfill the request due to fragmentation.
virtual void ReleaseUnused() {}
- /// The number of bytes that were allocated and not yet free'd through
- /// this allocator.
- virtual int64_t bytes_allocated() const = 0;
-
- /// Return peak memory allocation in this memory pool
- ///
- /// \return Maximum bytes allocated. If not known (or not implemented),
- /// returns -1
- virtual int64_t max_memory() const;
-
+ /// The number of bytes that were allocated and not yet free'd through
+ /// this allocator.
+ virtual int64_t bytes_allocated() const = 0;
+
+ /// Return peak memory allocation in this memory pool
+ ///
+ /// \return Maximum bytes allocated. If not known (or not implemented),
+ /// returns -1
+ virtual int64_t max_memory() const;
+
/// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc").
- virtual std::string backend_name() const = 0;
-
- protected:
+ virtual std::string backend_name() const = 0;
+
+ protected:
MemoryPool() = default;
-};
-
-class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
- public:
- explicit LoggingMemoryPool(MemoryPool* pool);
- ~LoggingMemoryPool() override = default;
-
- Status Allocate(int64_t size, uint8_t** out) override;
- Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
-
- void Free(uint8_t* buffer, int64_t size) override;
-
- int64_t bytes_allocated() const override;
-
- int64_t max_memory() const override;
-
- std::string backend_name() const override;
-
- private:
- MemoryPool* pool_;
-};
-
-/// Derived class for memory allocation.
-///
-/// Tracks the number of bytes and maximum memory allocated through its direct
-/// calls. Actual allocation is delegated to MemoryPool class.
-class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
- public:
- explicit ProxyMemoryPool(MemoryPool* pool);
- ~ProxyMemoryPool() override;
-
- Status Allocate(int64_t size, uint8_t** out) override;
- Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
-
- void Free(uint8_t* buffer, int64_t size) override;
-
- int64_t bytes_allocated() const override;
-
- int64_t max_memory() const override;
-
- std::string backend_name() const override;
-
- private:
- class ProxyMemoryPoolImpl;
- std::unique_ptr<ProxyMemoryPoolImpl> impl_;
-};
-
+};
+
+class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
+ public:
+ explicit LoggingMemoryPool(MemoryPool* pool);
+ ~LoggingMemoryPool() override = default;
+
+ Status Allocate(int64_t size, uint8_t** out) override;
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
+
+ void Free(uint8_t* buffer, int64_t size) override;
+
+ int64_t bytes_allocated() const override;
+
+ int64_t max_memory() const override;
+
+ std::string backend_name() const override;
+
+ private:
+ MemoryPool* pool_;
+};
+
+/// Derived class for memory allocation.
+///
+/// Tracks the number of bytes and maximum memory allocated through its direct
+/// calls. Actual allocation is delegated to MemoryPool class.
+class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
+ public:
+ explicit ProxyMemoryPool(MemoryPool* pool);
+ ~ProxyMemoryPool() override;
+
+ Status Allocate(int64_t size, uint8_t** out) override;
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
+
+ void Free(uint8_t* buffer, int64_t size) override;
+
+ int64_t bytes_allocated() const override;
+
+ int64_t max_memory() const override;
+
+ std::string backend_name() const override;
+
+ private:
+ class ProxyMemoryPoolImpl;
+ std::unique_ptr<ProxyMemoryPoolImpl> impl_;
+};
+
/// \brief Return a process-wide memory pool based on the system allocator.
-ARROW_EXPORT MemoryPool* system_memory_pool();
-
+ARROW_EXPORT MemoryPool* system_memory_pool();
+
/// \brief Return a process-wide memory pool based on jemalloc.
-///
-/// May return NotImplemented if jemalloc is not available.
-ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
-
-/// \brief Set jemalloc memory page purging behavior for future-created arenas
-/// to the indicated number of milliseconds. See dirty_decay_ms and
-/// muzzy_decay_ms options in jemalloc for a description of what these do. The
-/// default is configured to 1000 (1 second) which releases memory more
-/// aggressively to the operating system than the jemalloc default of 10
-/// seconds. If you set the value to 0, dirty / muzzy pages will be released
-/// immediately rather than with a time decay, but this may reduce application
-/// performance.
-ARROW_EXPORT
-Status jemalloc_set_decay_ms(int ms);
-
+///
+/// May return NotImplemented if jemalloc is not available.
+ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
+
+/// \brief Set jemalloc memory page purging behavior for future-created arenas
+/// to the indicated number of milliseconds. See dirty_decay_ms and
+/// muzzy_decay_ms options in jemalloc for a description of what these do. The
+/// default is configured to 1000 (1 second) which releases memory more
+/// aggressively to the operating system than the jemalloc default of 10
+/// seconds. If you set the value to 0, dirty / muzzy pages will be released
+/// immediately rather than with a time decay, but this may reduce application
+/// performance.
+ARROW_EXPORT
+Status jemalloc_set_decay_ms(int ms);
+
/// \brief Return a process-wide memory pool based on mimalloc.
-///
-/// May return NotImplemented if mimalloc is not available.
-ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out);
-
+///
+/// May return NotImplemented if mimalloc is not available.
+ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out);
+
ARROW_EXPORT std::vector<std::string> SupportedMemoryBackendNames();
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
index 8d1c16e0ed6..34f4b0a9f67 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
@@ -1,711 +1,711 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
#include "arrow/pretty_print.h"
-#include <algorithm>
-#include <chrono>
-#include <cstddef>
-#include <cstdint>
-#include <iostream>
-#include <memory>
-#include <sstream> // IWYU pragma: keep
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/chunked_array.h"
-#include "arrow/record_batch.h"
-#include "arrow/status.h"
-#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/string.h"
-#include "arrow/vendored/datetime.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-class PrettyPrinter {
- public:
- PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink)
- : options_(options), indent_(options.indent), sink_(sink) {}
-
- void Write(const char* data);
- void Write(const std::string& data);
- void WriteIndented(const char* data);
- void WriteIndented(const std::string& data);
- void Newline();
- void Indent();
- void OpenArray(const Array& array);
- void CloseArray(const Array& array);
-
- void Flush() { (*sink_) << std::flush; }
-
- protected:
- const PrettyPrintOptions& options_;
- int indent_;
- std::ostream* sink_;
-};
-
-void PrettyPrinter::OpenArray(const Array& array) {
+#include <algorithm>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <memory>
+#include <sstream> // IWYU pragma: keep
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/string.h"
+#include "arrow/vendored/datetime.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+class PrettyPrinter {
+ public:
+ PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink)
+ : options_(options), indent_(options.indent), sink_(sink) {}
+
+ void Write(const char* data);
+ void Write(const std::string& data);
+ void WriteIndented(const char* data);
+ void WriteIndented(const std::string& data);
+ void Newline();
+ void Indent();
+ void OpenArray(const Array& array);
+ void CloseArray(const Array& array);
+
+ void Flush() { (*sink_) << std::flush; }
+
+ protected:
+ const PrettyPrintOptions& options_;
+ int indent_;
+ std::ostream* sink_;
+};
+
+void PrettyPrinter::OpenArray(const Array& array) {
if (!options_.skip_new_lines) {
Indent();
}
- (*sink_) << "[";
- if (array.length() > 0) {
+ (*sink_) << "[";
+ if (array.length() > 0) {
Newline();
- indent_ += options_.indent_size;
- }
-}
-
-void PrettyPrinter::CloseArray(const Array& array) {
- if (array.length() > 0) {
- indent_ -= options_.indent_size;
- Indent();
- }
- (*sink_) << "]";
-}
-
-void PrettyPrinter::Write(const char* data) { (*sink_) << data; }
-void PrettyPrinter::Write(const std::string& data) { (*sink_) << data; }
-
-void PrettyPrinter::WriteIndented(const char* data) {
- Indent();
- Write(data);
-}
-
-void PrettyPrinter::WriteIndented(const std::string& data) {
- Indent();
- Write(data);
-}
-
-void PrettyPrinter::Newline() {
- if (options_.skip_new_lines) {
- return;
- }
- (*sink_) << "\n";
-}
-
-void PrettyPrinter::Indent() {
- for (int i = 0; i < indent_; ++i) {
- (*sink_) << " ";
- }
-}
-
-class ArrayPrinter : public PrettyPrinter {
- public:
- ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink)
- : PrettyPrinter(options, sink) {}
-
- template <typename FormatFunction>
- void WriteValues(const Array& array, FormatFunction&& func) {
- bool skip_comma = true;
- for (int64_t i = 0; i < array.length(); ++i) {
- if (skip_comma) {
- skip_comma = false;
- } else {
+ indent_ += options_.indent_size;
+ }
+}
+
+void PrettyPrinter::CloseArray(const Array& array) {
+ if (array.length() > 0) {
+ indent_ -= options_.indent_size;
+ Indent();
+ }
+ (*sink_) << "]";
+}
+
+void PrettyPrinter::Write(const char* data) { (*sink_) << data; }
+void PrettyPrinter::Write(const std::string& data) { (*sink_) << data; }
+
+void PrettyPrinter::WriteIndented(const char* data) {
+ Indent();
+ Write(data);
+}
+
+void PrettyPrinter::WriteIndented(const std::string& data) {
+ Indent();
+ Write(data);
+}
+
+void PrettyPrinter::Newline() {
+ if (options_.skip_new_lines) {
+ return;
+ }
+ (*sink_) << "\n";
+}
+
+void PrettyPrinter::Indent() {
+ for (int i = 0; i < indent_; ++i) {
+ (*sink_) << " ";
+ }
+}
+
+class ArrayPrinter : public PrettyPrinter {
+ public:
+ ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink)
+ : PrettyPrinter(options, sink) {}
+
+ template <typename FormatFunction>
+ void WriteValues(const Array& array, FormatFunction&& func) {
+ bool skip_comma = true;
+ for (int64_t i = 0; i < array.length(); ++i) {
+ if (skip_comma) {
+ skip_comma = false;
+ } else {
(*sink_) << ",";
Newline();
- }
+ }
if (!options_.skip_new_lines) {
Indent();
}
- if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
(*sink_) << "...";
Newline();
- i = array.length() - options_.window - 1;
- skip_comma = true;
- } else if (array.IsNull(i)) {
- (*sink_) << options_.null_rep;
- } else {
- func(i);
- }
- }
+ i = array.length() - options_.window - 1;
+ skip_comma = true;
+ } else if (array.IsNull(i)) {
+ (*sink_) << options_.null_rep;
+ } else {
+ func(i);
+ }
+ }
Newline();
- }
-
- Status WriteDataValues(const BooleanArray& array) {
- WriteValues(array, [&](int64_t i) { Write(array.Value(i) ? "true" : "false"); });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_integer<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- const auto data = array.raw_values();
- // Need to upcast integers to avoid selecting operator<<(char)
- WriteValues(array, [&](int64_t i) { (*sink_) << internal::UpcastInt(data[i]); });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_floating_point<typename T::TypeClass, Status> WriteDataValues(
- const T& array) {
- const auto data = array.raw_values();
- WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_date<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- const auto data = array.raw_values();
- using unit = typename std::conditional<std::is_same<T, Date32Array>::value,
- arrow_vendored::date::days,
- std::chrono::milliseconds>::type;
- WriteValues(array, [&](int64_t i) { FormatDateTime<unit>("%F", data[i], true); });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- const auto data = array.raw_values();
- const auto type = static_cast<const TimeType*>(array.type().get());
- WriteValues(array,
- [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i], false); });
- return Status::OK();
- }
-
- Status WriteDataValues(const TimestampArray& array) {
- const int64_t* data = array.raw_values();
- const auto type = static_cast<const TimestampType*>(array.type().get());
- WriteValues(array,
- [&](int64_t i) { FormatDateTime(type->unit(), "%F %T", data[i], true); });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_duration<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- const auto data = array.raw_values();
- WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
- return Status::OK();
- }
-
- Status WriteDataValues(const DayTimeIntervalArray& array) {
- WriteValues(array, [&](int64_t i) {
- auto day_millis = array.GetValue(i);
- (*sink_) << day_millis.days << "d" << day_millis.milliseconds << "ms";
- });
- return Status::OK();
- }
-
- Status WriteDataValues(const MonthIntervalArray& array) {
- const auto data = array.raw_values();
- WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
- return Status::OK();
- }
-
- template <typename T>
- enable_if_string_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- WriteValues(array, [&](int64_t i) { (*sink_) << "\"" << array.GetView(i) << "\""; });
- return Status::OK();
- }
-
- // Binary
- template <typename T>
- enable_if_binary_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
- return Status::OK();
- }
-
- Status WriteDataValues(const Decimal128Array& array) {
- WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); });
- return Status::OK();
- }
-
+ }
+
+ Status WriteDataValues(const BooleanArray& array) {
+ WriteValues(array, [&](int64_t i) { Write(array.Value(i) ? "true" : "false"); });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_integer<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ // Need to upcast integers to avoid selecting operator<<(char)
+ WriteValues(array, [&](int64_t i) { (*sink_) << internal::UpcastInt(data[i]); });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_floating_point<typename T::TypeClass, Status> WriteDataValues(
+ const T& array) {
+ const auto data = array.raw_values();
+ WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_date<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ using unit = typename std::conditional<std::is_same<T, Date32Array>::value,
+ arrow_vendored::date::days,
+ std::chrono::milliseconds>::type;
+ WriteValues(array, [&](int64_t i) { FormatDateTime<unit>("%F", data[i], true); });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ const auto type = static_cast<const TimeType*>(array.type().get());
+ WriteValues(array,
+ [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i], false); });
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const TimestampArray& array) {
+ const int64_t* data = array.raw_values();
+ const auto type = static_cast<const TimestampType*>(array.type().get());
+ WriteValues(array,
+ [&](int64_t i) { FormatDateTime(type->unit(), "%F %T", data[i], true); });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_duration<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const DayTimeIntervalArray& array) {
+ WriteValues(array, [&](int64_t i) {
+ auto day_millis = array.GetValue(i);
+ (*sink_) << day_millis.days << "d" << day_millis.milliseconds << "ms";
+ });
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const MonthIntervalArray& array) {
+ const auto data = array.raw_values();
+ WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_string_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ WriteValues(array, [&](int64_t i) { (*sink_) << "\"" << array.GetView(i) << "\""; });
+ return Status::OK();
+ }
+
+ // Binary
+ template <typename T>
+ enable_if_binary_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const Decimal128Array& array) {
+ WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); });
+ return Status::OK();
+ }
+
Status WriteDataValues(const Decimal256Array& array) {
WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); });
return Status::OK();
}
- template <typename T>
- enable_if_list_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
- bool skip_comma = true;
- for (int64_t i = 0; i < array.length(); ++i) {
- if (skip_comma) {
- skip_comma = false;
- } else {
+ template <typename T>
+ enable_if_list_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ bool skip_comma = true;
+ for (int64_t i = 0; i < array.length(); ++i) {
+ if (skip_comma) {
+ skip_comma = false;
+ } else {
(*sink_) << ",";
Newline();
- }
- if ((i >= options_.window) && (i < (array.length() - options_.window))) {
- Indent();
+ }
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+ Indent();
(*sink_) << "...";
Newline();
- i = array.length() - options_.window - 1;
- skip_comma = true;
- } else if (array.IsNull(i)) {
- Indent();
- (*sink_) << options_.null_rep;
- } else {
- std::shared_ptr<Array> slice =
- array.values()->Slice(array.value_offset(i), array.value_length(i));
+ i = array.length() - options_.window - 1;
+ skip_comma = true;
+ } else if (array.IsNull(i)) {
+ Indent();
+ (*sink_) << options_.null_rep;
+ } else {
+ std::shared_ptr<Array> slice =
+ array.values()->Slice(array.value_offset(i), array.value_length(i));
RETURN_NOT_OK(
PrettyPrint(*slice, PrettyPrintOptions{indent_, options_.window}, sink_));
- }
- }
+ }
+ }
Newline();
- return Status::OK();
- }
-
- Status WriteDataValues(const MapArray& array) {
- bool skip_comma = true;
- for (int64_t i = 0; i < array.length(); ++i) {
- if (skip_comma) {
- skip_comma = false;
- } else {
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const MapArray& array) {
+ bool skip_comma = true;
+ for (int64_t i = 0; i < array.length(); ++i) {
+ if (skip_comma) {
+ skip_comma = false;
+ } else {
(*sink_) << ",";
Newline();
- }
+ }
if (!options_.skip_new_lines) {
Indent();
}
- if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
(*sink_) << "...";
Newline();
- i = array.length() - options_.window - 1;
- skip_comma = true;
- } else if (array.IsNull(i)) {
- (*sink_) << options_.null_rep;
- } else {
+ i = array.length() - options_.window - 1;
+ skip_comma = true;
+ } else if (array.IsNull(i)) {
+ (*sink_) << options_.null_rep;
+ } else {
(*sink_) << "keys:";
Newline();
- auto keys_slice =
- array.keys()->Slice(array.value_offset(i), array.value_length(i));
+ auto keys_slice =
+ array.keys()->Slice(array.value_offset(i), array.value_length(i));
RETURN_NOT_OK(PrettyPrint(*keys_slice,
PrettyPrintOptions{indent_, options_.window}, sink_));
Newline();
- Indent();
+ Indent();
(*sink_) << "values:";
Newline();
- auto values_slice =
- array.items()->Slice(array.value_offset(i), array.value_length(i));
+ auto values_slice =
+ array.items()->Slice(array.value_offset(i), array.value_length(i));
RETURN_NOT_OK(PrettyPrint(*values_slice,
PrettyPrintOptions{indent_, options_.window}, sink_));
- }
- }
- (*sink_) << "\n";
- return Status::OK();
- }
-
- Status Visit(const NullArray& array) {
- (*sink_) << array.length() << " nulls";
- return Status::OK();
- }
-
- template <typename T>
- enable_if_t<std::is_base_of<PrimitiveArray, T>::value ||
- std::is_base_of<FixedSizeBinaryArray, T>::value ||
- std::is_base_of<BinaryArray, T>::value ||
- std::is_base_of<LargeBinaryArray, T>::value ||
- std::is_base_of<ListArray, T>::value ||
- std::is_base_of<LargeListArray, T>::value ||
- std::is_base_of<MapArray, T>::value ||
- std::is_base_of<FixedSizeListArray, T>::value,
- Status>
- Visit(const T& array) {
- OpenArray(array);
- if (array.length() > 0) {
- RETURN_NOT_OK(WriteDataValues(array));
- }
- CloseArray(array);
- return Status::OK();
- }
-
- Status Visit(const ExtensionArray& array) { return Print(*array.storage()); }
-
- Status WriteValidityBitmap(const Array& array);
-
- Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
- int64_t length) {
- for (size_t i = 0; i < fields.size(); ++i) {
- Newline();
+ }
+ }
+ (*sink_) << "\n";
+ return Status::OK();
+ }
+
+ Status Visit(const NullArray& array) {
+ (*sink_) << array.length() << " nulls";
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_t<std::is_base_of<PrimitiveArray, T>::value ||
+ std::is_base_of<FixedSizeBinaryArray, T>::value ||
+ std::is_base_of<BinaryArray, T>::value ||
+ std::is_base_of<LargeBinaryArray, T>::value ||
+ std::is_base_of<ListArray, T>::value ||
+ std::is_base_of<LargeListArray, T>::value ||
+ std::is_base_of<MapArray, T>::value ||
+ std::is_base_of<FixedSizeListArray, T>::value,
+ Status>
+ Visit(const T& array) {
+ OpenArray(array);
+ if (array.length() > 0) {
+ RETURN_NOT_OK(WriteDataValues(array));
+ }
+ CloseArray(array);
+ return Status::OK();
+ }
+
+ Status Visit(const ExtensionArray& array) { return Print(*array.storage()); }
+
+ Status WriteValidityBitmap(const Array& array);
+
+ Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
+ int64_t length) {
+ for (size_t i = 0; i < fields.size(); ++i) {
+ Newline();
Indent();
- std::stringstream ss;
- ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
- Write(ss.str());
-
- std::shared_ptr<Array> field = fields[i];
- if (offset != 0) {
- field = field->Slice(offset, length);
- }
- RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
- }
- return Status::OK();
- }
-
- Status Visit(const StructArray& array) {
- RETURN_NOT_OK(WriteValidityBitmap(array));
- std::vector<std::shared_ptr<Array>> children;
- children.reserve(array.num_fields());
- for (int i = 0; i < array.num_fields(); ++i) {
- children.emplace_back(array.field(i));
- }
- return PrintChildren(children, 0, array.length());
- }
-
- Status Visit(const UnionArray& array) {
- RETURN_NOT_OK(WriteValidityBitmap(array));
-
- Newline();
+ std::stringstream ss;
+ ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
+ Write(ss.str());
+
+ std::shared_ptr<Array> field = fields[i];
+ if (offset != 0) {
+ field = field->Slice(offset, length);
+ }
+ RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const StructArray& array) {
+ RETURN_NOT_OK(WriteValidityBitmap(array));
+ std::vector<std::shared_ptr<Array>> children;
+ children.reserve(array.num_fields());
+ for (int i = 0; i < array.num_fields(); ++i) {
+ children.emplace_back(array.field(i));
+ }
+ return PrintChildren(children, 0, array.length());
+ }
+
+ Status Visit(const UnionArray& array) {
+ RETURN_NOT_OK(WriteValidityBitmap(array));
+
+ Newline();
Indent();
- Write("-- type_ids: ");
- UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
- RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
-
- if (array.mode() == UnionMode::DENSE) {
- Newline();
+ Write("-- type_ids: ");
+ UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
+ RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
+
+ if (array.mode() == UnionMode::DENSE) {
+ Newline();
Indent();
- Write("-- value_offsets: ");
- Int32Array value_offsets(
- array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
- nullptr, 0, array.offset());
- RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
- }
-
- // Print the children without any offset, because the type ids are absolute
- std::vector<std::shared_ptr<Array>> children;
- children.reserve(array.num_fields());
- for (int i = 0; i < array.num_fields(); ++i) {
- children.emplace_back(array.field(i));
- }
- return PrintChildren(children, 0, array.length() + array.offset());
- }
-
- Status Visit(const DictionaryArray& array) {
- Newline();
+ Write("-- value_offsets: ");
+ Int32Array value_offsets(
+ array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
+ nullptr, 0, array.offset());
+ RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
+ }
+
+ // Print the children without any offset, because the type ids are absolute
+ std::vector<std::shared_ptr<Array>> children;
+ children.reserve(array.num_fields());
+ for (int i = 0; i < array.num_fields(); ++i) {
+ children.emplace_back(array.field(i));
+ }
+ return PrintChildren(children, 0, array.length() + array.offset());
+ }
+
+ Status Visit(const DictionaryArray& array) {
+ Newline();
Indent();
- Write("-- dictionary:\n");
- RETURN_NOT_OK(
- PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
-
- Newline();
+ Write("-- dictionary:\n");
+ RETURN_NOT_OK(
+ PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
+
+ Newline();
Indent();
- Write("-- indices:\n");
- return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
- }
-
- Status Print(const Array& array) {
- RETURN_NOT_OK(VisitArrayInline(array, this));
- Flush();
- return Status::OK();
- }
-
- private:
- template <typename Unit>
- void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) {
- if (add_epoch) {
- (*sink_) << arrow_vendored::date::format(fmt, epoch_ + Unit{value});
- } else {
- (*sink_) << arrow_vendored::date::format(fmt, Unit{value});
- }
- }
-
- void FormatDateTime(TimeUnit::type unit, const char* fmt, int64_t value,
- bool add_epoch) {
- switch (unit) {
- case TimeUnit::NANO:
- FormatDateTime<std::chrono::nanoseconds>(fmt, value, add_epoch);
- break;
- case TimeUnit::MICRO:
- FormatDateTime<std::chrono::microseconds>(fmt, value, add_epoch);
- break;
- case TimeUnit::MILLI:
- FormatDateTime<std::chrono::milliseconds>(fmt, value, add_epoch);
- break;
- case TimeUnit::SECOND:
- FormatDateTime<std::chrono::seconds>(fmt, value, add_epoch);
- break;
- }
- }
-
- static arrow_vendored::date::sys_days epoch_;
-};
-
-arrow_vendored::date::sys_days ArrayPrinter::epoch_ =
- arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970};
-
-Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
- Indent();
- Write("-- is_valid:");
-
- if (array.null_count() > 0) {
- Newline();
+ Write("-- indices:\n");
+ return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
+ }
+
+ Status Print(const Array& array) {
+ RETURN_NOT_OK(VisitArrayInline(array, this));
+ Flush();
+ return Status::OK();
+ }
+
+ private:
+ template <typename Unit>
+ void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) {
+ if (add_epoch) {
+ (*sink_) << arrow_vendored::date::format(fmt, epoch_ + Unit{value});
+ } else {
+ (*sink_) << arrow_vendored::date::format(fmt, Unit{value});
+ }
+ }
+
+ void FormatDateTime(TimeUnit::type unit, const char* fmt, int64_t value,
+ bool add_epoch) {
+ switch (unit) {
+ case TimeUnit::NANO:
+ FormatDateTime<std::chrono::nanoseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::MICRO:
+ FormatDateTime<std::chrono::microseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::MILLI:
+ FormatDateTime<std::chrono::milliseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::SECOND:
+ FormatDateTime<std::chrono::seconds>(fmt, value, add_epoch);
+ break;
+ }
+ }
+
+ static arrow_vendored::date::sys_days epoch_;
+};
+
+arrow_vendored::date::sys_days ArrayPrinter::epoch_ =
+ arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970};
+
+Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
+ Indent();
+ Write("-- is_valid:");
+
+ if (array.null_count() > 0) {
+ Newline();
Indent();
- BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
- array.offset());
- return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
- } else {
- Write(" all not null");
- return Status::OK();
- }
-}
-
-Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
- PrettyPrintOptions options;
- options.indent = indent;
- ArrayPrinter printer(options, sink);
- return printer.Print(arr);
-}
-
-Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
- std::ostream* sink) {
- ArrayPrinter printer(options, sink);
- return printer.Print(arr);
-}
-
-Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
- std::string* result) {
- std::ostringstream sink;
- RETURN_NOT_OK(PrettyPrint(arr, options, &sink));
- *result = sink.str();
- return Status::OK();
-}
-
-Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
- std::ostream* sink) {
- int num_chunks = chunked_arr.num_chunks();
- int indent = options.indent;
- int window = options.window;
-
- for (int i = 0; i < indent; ++i) {
- (*sink) << " ";
- }
+ BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
+ array.offset());
+ return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
+ } else {
+ Write(" all not null");
+ return Status::OK();
+ }
+}
+
+Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
+ PrettyPrintOptions options;
+ options.indent = indent;
+ ArrayPrinter printer(options, sink);
+ return printer.Print(arr);
+}
+
+Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
+ std::ostream* sink) {
+ ArrayPrinter printer(options, sink);
+ return printer.Print(arr);
+}
+
+Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
+ std::string* result) {
+ std::ostringstream sink;
+ RETURN_NOT_OK(PrettyPrint(arr, options, &sink));
+ *result = sink.str();
+ return Status::OK();
+}
+
+Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
+ std::ostream* sink) {
+ int num_chunks = chunked_arr.num_chunks();
+ int indent = options.indent;
+ int window = options.window;
+
+ for (int i = 0; i < indent; ++i) {
+ (*sink) << " ";
+ }
(*sink) << "[";
if (!options.skip_new_lines) {
*sink << "\n";
}
- bool skip_comma = true;
- for (int i = 0; i < num_chunks; ++i) {
- if (skip_comma) {
- skip_comma = false;
- } else {
+ bool skip_comma = true;
+ for (int i = 0; i < num_chunks; ++i) {
+ if (skip_comma) {
+ skip_comma = false;
+ } else {
(*sink) << ",";
if (!options.skip_new_lines) {
*sink << "\n";
}
- }
- if ((i >= window) && (i < (num_chunks - window))) {
- for (int i = 0; i < indent; ++i) {
- (*sink) << " ";
- }
+ }
+ if ((i >= window) && (i < (num_chunks - window))) {
+ for (int i = 0; i < indent; ++i) {
+ (*sink) << " ";
+ }
(*sink) << "...";
if (!options.skip_new_lines) {
*sink << "\n";
}
- i = num_chunks - window - 1;
- skip_comma = true;
- } else {
- PrettyPrintOptions chunk_options = options;
- chunk_options.indent += options.indent_size;
- ArrayPrinter printer(chunk_options, sink);
- RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
- }
- }
+ i = num_chunks - window - 1;
+ skip_comma = true;
+ } else {
+ PrettyPrintOptions chunk_options = options;
+ chunk_options.indent += options.indent_size;
+ ArrayPrinter printer(chunk_options, sink);
+ RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
+ }
+ }
if (!options.skip_new_lines) {
*sink << "\n";
}
-
- for (int i = 0; i < indent; ++i) {
- (*sink) << " ";
- }
- (*sink) << "]";
-
- return Status::OK();
-}
-
-Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
- std::string* result) {
- std::ostringstream sink;
- RETURN_NOT_OK(PrettyPrint(chunked_arr, options, &sink));
- *result = sink.str();
- return Status::OK();
-}
-
-Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
- for (int i = 0; i < batch.num_columns(); ++i) {
- const std::string& name = batch.column_name(i);
- (*sink) << name << ": ";
- RETURN_NOT_OK(PrettyPrint(*batch.column(i), indent + 2, sink));
- (*sink) << "\n";
- }
- (*sink) << std::flush;
- return Status::OK();
-}
-
-Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options,
- std::ostream* sink) {
- for (int i = 0; i < batch.num_columns(); ++i) {
- const std::string& name = batch.column_name(i);
- PrettyPrintOptions column_options = options;
- column_options.indent += 2;
-
- (*sink) << name << ": ";
- RETURN_NOT_OK(PrettyPrint(*batch.column(i), column_options, sink));
- (*sink) << "\n";
- }
- (*sink) << std::flush;
- return Status::OK();
-}
-
-Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
- std::ostream* sink) {
- RETURN_NOT_OK(PrettyPrint(*table.schema(), options, sink));
- (*sink) << "\n";
- (*sink) << "----\n";
-
- PrettyPrintOptions column_options = options;
- column_options.indent += 2;
- for (int i = 0; i < table.num_columns(); ++i) {
- for (int j = 0; j < options.indent; ++j) {
- (*sink) << " ";
- }
- (*sink) << table.schema()->field(i)->name() << ":\n";
- RETURN_NOT_OK(PrettyPrint(*table.column(i), column_options, sink));
- (*sink) << "\n";
- }
- (*sink) << std::flush;
- return Status::OK();
-}
-
-Status DebugPrint(const Array& arr, int indent) {
- return PrettyPrint(arr, indent, &std::cerr);
-}
-
-class SchemaPrinter : public PrettyPrinter {
- public:
- SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options,
- std::ostream* sink)
- : PrettyPrinter(options, sink), schema_(schema) {}
-
- Status PrintType(const DataType& type, bool nullable);
- Status PrintField(const Field& field);
-
- void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
- for (int64_t i = 0; i < metadata.size(); ++i) {
- Newline();
+
+ for (int i = 0; i < indent; ++i) {
+ (*sink) << " ";
+ }
+ (*sink) << "]";
+
+ return Status::OK();
+}
+
+Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
+ std::string* result) {
+ std::ostringstream sink;
+ RETURN_NOT_OK(PrettyPrint(chunked_arr, options, &sink));
+ *result = sink.str();
+ return Status::OK();
+}
+
+Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const std::string& name = batch.column_name(i);
+ (*sink) << name << ": ";
+ RETURN_NOT_OK(PrettyPrint(*batch.column(i), indent + 2, sink));
+ (*sink) << "\n";
+ }
+ (*sink) << std::flush;
+ return Status::OK();
+}
+
+Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options,
+ std::ostream* sink) {
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const std::string& name = batch.column_name(i);
+ PrettyPrintOptions column_options = options;
+ column_options.indent += 2;
+
+ (*sink) << name << ": ";
+ RETURN_NOT_OK(PrettyPrint(*batch.column(i), column_options, sink));
+ (*sink) << "\n";
+ }
+ (*sink) << std::flush;
+ return Status::OK();
+}
+
+Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
+ std::ostream* sink) {
+ RETURN_NOT_OK(PrettyPrint(*table.schema(), options, sink));
+ (*sink) << "\n";
+ (*sink) << "----\n";
+
+ PrettyPrintOptions column_options = options;
+ column_options.indent += 2;
+ for (int i = 0; i < table.num_columns(); ++i) {
+ for (int j = 0; j < options.indent; ++j) {
+ (*sink) << " ";
+ }
+ (*sink) << table.schema()->field(i)->name() << ":\n";
+ RETURN_NOT_OK(PrettyPrint(*table.column(i), column_options, sink));
+ (*sink) << "\n";
+ }
+ (*sink) << std::flush;
+ return Status::OK();
+}
+
+Status DebugPrint(const Array& arr, int indent) {
+ return PrettyPrint(arr, indent, &std::cerr);
+}
+
+class SchemaPrinter : public PrettyPrinter {
+ public:
+ SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options,
+ std::ostream* sink)
+ : PrettyPrinter(options, sink), schema_(schema) {}
+
+ Status PrintType(const DataType& type, bool nullable);
+ Status PrintField(const Field& field);
+
+ void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
+ for (int64_t i = 0; i < metadata.size(); ++i) {
+ Newline();
Indent();
- Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
- }
- }
-
- void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
- for (int64_t i = 0; i < metadata.size(); ++i) {
- Newline();
+ Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
+ }
+ }
+
+ void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
+ for (int64_t i = 0; i < metadata.size(); ++i) {
+ Newline();
Indent();
- size_t size = metadata.value(i).size();
- size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
- if (size <= truncated_size) {
- Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
- continue;
- }
-
- Write(metadata.key(i) + ": '" + metadata.value(i).substr(0, truncated_size) +
- "' + " + std::to_string(size - truncated_size));
- }
- }
-
- void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
- if (metadata.size() > 0) {
- Newline();
+ size_t size = metadata.value(i).size();
+ size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
+ if (size <= truncated_size) {
+ Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
+ continue;
+ }
+
+ Write(metadata.key(i) + ": '" + metadata.value(i).substr(0, truncated_size) +
+ "' + " + std::to_string(size - truncated_size));
+ }
+ }
+
+ void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
+ if (metadata.size() > 0) {
+ Newline();
Indent();
- Write(metadata_type);
- if (options_.truncate_metadata) {
- PrintTruncatedMetadata(metadata);
- } else {
- PrintVerboseMetadata(metadata);
- }
- }
- }
-
- Status Print() {
- for (int i = 0; i < schema_.num_fields(); ++i) {
- if (i > 0) {
- Newline();
- Indent();
- } else {
+ Write(metadata_type);
+ if (options_.truncate_metadata) {
+ PrintTruncatedMetadata(metadata);
+ } else {
+ PrintVerboseMetadata(metadata);
+ }
+ }
+ }
+
+ Status Print() {
+ for (int i = 0; i < schema_.num_fields(); ++i) {
+ if (i > 0) {
+ Newline();
Indent();
- }
- RETURN_NOT_OK(PrintField(*schema_.field(i)));
- }
-
- if (options_.show_schema_metadata && schema_.metadata() != nullptr) {
- PrintMetadata("-- schema metadata --", *schema_.metadata());
- }
- Flush();
- return Status::OK();
- }
-
- private:
- const Schema& schema_;
-};
-
-Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
- Write(type.ToString());
- if (!nullable) {
- Write(" not null");
- }
- for (int i = 0; i < type.num_fields(); ++i) {
- Newline();
+ } else {
+ Indent();
+ }
+ RETURN_NOT_OK(PrintField(*schema_.field(i)));
+ }
+
+ if (options_.show_schema_metadata && schema_.metadata() != nullptr) {
+ PrintMetadata("-- schema metadata --", *schema_.metadata());
+ }
+ Flush();
+ return Status::OK();
+ }
+
+ private:
+ const Schema& schema_;
+};
+
+Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
+ Write(type.ToString());
+ if (!nullable) {
+ Write(" not null");
+ }
+ for (int i = 0; i < type.num_fields(); ++i) {
+ Newline();
Indent();
-
- std::stringstream ss;
- ss << "child " << i << ", ";
-
- indent_ += options_.indent_size;
- WriteIndented(ss.str());
- RETURN_NOT_OK(PrintField(*type.field(i)));
- indent_ -= options_.indent_size;
- }
- return Status::OK();
-}
-
-Status SchemaPrinter::PrintField(const Field& field) {
- Write(field.name());
- Write(": ");
- RETURN_NOT_OK(PrintType(*field.type(), field.nullable()));
-
- if (options_.show_field_metadata && field.metadata() != nullptr) {
- indent_ += options_.indent_size;
- PrintMetadata("-- field metadata --", *field.metadata());
- indent_ -= options_.indent_size;
- }
- return Status::OK();
-}
-
-Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
- std::ostream* sink) {
- SchemaPrinter printer(schema, options, sink);
- return printer.Print();
-}
-
-Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
- std::string* result) {
- std::ostringstream sink;
- RETURN_NOT_OK(PrettyPrint(schema, options, &sink));
- *result = sink.str();
- return Status::OK();
-}
-
-} // namespace arrow
+
+ std::stringstream ss;
+ ss << "child " << i << ", ";
+
+ indent_ += options_.indent_size;
+ WriteIndented(ss.str());
+ RETURN_NOT_OK(PrintField(*type.field(i)));
+ indent_ -= options_.indent_size;
+ }
+ return Status::OK();
+}
+
+Status SchemaPrinter::PrintField(const Field& field) {
+ Write(field.name());
+ Write(": ");
+ RETURN_NOT_OK(PrintType(*field.type(), field.nullable()));
+
+ if (options_.show_field_metadata && field.metadata() != nullptr) {
+ indent_ += options_.indent_size;
+ PrintMetadata("-- field metadata --", *field.metadata());
+ indent_ -= options_.indent_size;
+ }
+ return Status::OK();
+}
+
+Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
+ std::ostream* sink) {
+ SchemaPrinter printer(schema, options, sink);
+ return printer.Print();
+}
+
+Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
+ std::string* result) {
+ std::ostringstream sink;
+ RETURN_NOT_OK(PrettyPrint(schema, options, &sink));
+ *result = sink.str();
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
index 1bc086a6889..7b070e797f7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
@@ -1,125 +1,125 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <iosfwd>
-#include <string>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <string>
#include <utility>
-
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class ChunkedArray;
-class RecordBatch;
-class Schema;
-class Status;
-class Table;
-
-struct PrettyPrintOptions {
- PrettyPrintOptions() = default;
-
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class RecordBatch;
+class Schema;
+class Status;
+class Table;
+
+struct PrettyPrintOptions {
+ PrettyPrintOptions() = default;
+
PrettyPrintOptions(int indent_arg, // NOLINT runtime/explicit
int window_arg = 10, int indent_size_arg = 2,
- std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
- bool truncate_metadata_arg = true)
- : indent(indent_arg),
- indent_size(indent_size_arg),
- window(window_arg),
+ std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
+ bool truncate_metadata_arg = true)
+ : indent(indent_arg),
+ indent_size(indent_size_arg),
+ window(window_arg),
null_rep(std::move(null_rep_arg)),
- skip_new_lines(skip_new_lines_arg),
- truncate_metadata(truncate_metadata_arg) {}
-
- static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }
-
- /// Number of spaces to shift entire formatted object to the right
- int indent = 0;
-
- /// Size of internal indents
- int indent_size = 2;
-
- /// Maximum number of elements to show at the beginning and at the end.
- int window = 10;
-
- /// String to use for representing a null value, defaults to "null"
- std::string null_rep = "null";
-
- /// Skip new lines between elements, defaults to false
- bool skip_new_lines = false;
-
- /// Limit display of each KeyValueMetadata key/value pair to a single line at
- /// 80 character width
- bool truncate_metadata = true;
-
- /// If true, display field metadata when pretty-printing a Schema
- bool show_field_metadata = true;
-
- /// If true, display schema metadata when pretty-printing a Schema
- bool show_schema_metadata = true;
-};
-
-/// \brief Print human-readable representation of RecordBatch
-ARROW_EXPORT
-Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink);
-
-ARROW_EXPORT
-Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options,
- std::ostream* sink);
-
-/// \brief Print human-readable representation of Table
-ARROW_EXPORT
-Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
- std::ostream* sink);
-
-/// \brief Print human-readable representation of Array
-ARROW_EXPORT
-Status PrettyPrint(const Array& arr, int indent, std::ostream* sink);
-
-/// \brief Print human-readable representation of Array
-ARROW_EXPORT
-Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
- std::ostream* sink);
-
-/// \brief Print human-readable representation of Array
-ARROW_EXPORT
-Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
- std::string* result);
-
-/// \brief Print human-readable representation of ChunkedArray
-ARROW_EXPORT
-Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
- std::ostream* sink);
-
-/// \brief Print human-readable representation of ChunkedArray
-ARROW_EXPORT
-Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
- std::string* result);
-
-ARROW_EXPORT
-Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
- std::ostream* sink);
-
-ARROW_EXPORT
-Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
- std::string* result);
-
-ARROW_EXPORT
-Status DebugPrint(const Array& arr, int indent);
-
-} // namespace arrow
+ skip_new_lines(skip_new_lines_arg),
+ truncate_metadata(truncate_metadata_arg) {}
+
+ static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }
+
+ /// Number of spaces to shift entire formatted object to the right
+ int indent = 0;
+
+ /// Size of internal indents
+ int indent_size = 2;
+
+ /// Maximum number of elements to show at the beginning and at the end.
+ int window = 10;
+
+ /// String to use for representing a null value, defaults to "null"
+ std::string null_rep = "null";
+
+ /// Skip new lines between elements, defaults to false
+ bool skip_new_lines = false;
+
+ /// Limit display of each KeyValueMetadata key/value pair to a single line at
+ /// 80 character width
+ bool truncate_metadata = true;
+
+ /// If true, display field metadata when pretty-printing a Schema
+ bool show_field_metadata = true;
+
+ /// If true, display schema metadata when pretty-printing a Schema
+ bool show_schema_metadata = true;
+};
+
+/// \brief Print human-readable representation of RecordBatch
+ARROW_EXPORT
+Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink);
+
+ARROW_EXPORT
+Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options,
+ std::ostream* sink);
+
+/// \brief Print human-readable representation of Table
+ARROW_EXPORT
+Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
+ std::ostream* sink);
+
+/// \brief Print human-readable representation of Array
+ARROW_EXPORT
+Status PrettyPrint(const Array& arr, int indent, std::ostream* sink);
+
+/// \brief Print human-readable representation of Array
+ARROW_EXPORT
+Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
+ std::ostream* sink);
+
+/// \brief Print human-readable representation of Array
+ARROW_EXPORT
+Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
+ std::string* result);
+
+/// \brief Print human-readable representation of ChunkedArray
+ARROW_EXPORT
+Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
+ std::ostream* sink);
+
+/// \brief Print human-readable representation of ChunkedArray
+ARROW_EXPORT
+Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
+ std::string* result);
+
+ARROW_EXPORT
+Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
+ std::ostream* sink);
+
+ARROW_EXPORT
+Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
+ std::string* result);
+
+ARROW_EXPORT
+Status DebugPrint(const Array& arr, int indent);
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
index 66f9e932b58..4a7a8beb4ff 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
@@ -1,74 +1,74 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/record_batch.h"
-
-#include <algorithm>
-#include <cstdlib>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <utility>
-
-#include "arrow/array.h"
-#include "arrow/array/validate.h"
-#include "arrow/pretty_print.h"
-#include "arrow/status.h"
-#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/util/atomic_shared_ptr.h"
-#include "arrow/util/iterator.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/vector.h"
-
-namespace arrow {
-
-Result<std::shared_ptr<RecordBatch>> RecordBatch::AddColumn(
- int i, std::string field_name, const std::shared_ptr<Array>& column) const {
- auto field = ::arrow::field(std::move(field_name), column->type());
- return AddColumn(i, field, column);
-}
-
-std::shared_ptr<Array> RecordBatch::GetColumnByName(const std::string& name) const {
- auto i = schema_->GetFieldIndex(name);
- return i == -1 ? NULLPTR : column(i);
-}
-
-int RecordBatch::num_columns() const { return schema_->num_fields(); }
-
-/// \class SimpleRecordBatch
-/// \brief A basic, non-lazy in-memory record batch
-class SimpleRecordBatch : public RecordBatch {
- public:
- SimpleRecordBatch(std::shared_ptr<Schema> schema, int64_t num_rows,
- std::vector<std::shared_ptr<Array>> columns)
- : RecordBatch(std::move(schema), num_rows), boxed_columns_(std::move(columns)) {
- columns_.resize(boxed_columns_.size());
- for (size_t i = 0; i < columns_.size(); ++i) {
- columns_[i] = boxed_columns_[i]->data();
- }
- }
-
- SimpleRecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
- std::vector<std::shared_ptr<ArrayData>> columns)
- : RecordBatch(std::move(schema), num_rows), columns_(std::move(columns)) {
- boxed_columns_.resize(schema_->num_fields());
- }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/record_batch.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/array/validate.h"
+#include "arrow/pretty_print.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/vector.h"
+
+namespace arrow {
+
+Result<std::shared_ptr<RecordBatch>> RecordBatch::AddColumn(
+ int i, std::string field_name, const std::shared_ptr<Array>& column) const {
+ auto field = ::arrow::field(std::move(field_name), column->type());
+ return AddColumn(i, field, column);
+}
+
+std::shared_ptr<Array> RecordBatch::GetColumnByName(const std::string& name) const {
+ auto i = schema_->GetFieldIndex(name);
+ return i == -1 ? NULLPTR : column(i);
+}
+
+int RecordBatch::num_columns() const { return schema_->num_fields(); }
+
+/// \class SimpleRecordBatch
+/// \brief A basic, non-lazy in-memory record batch
+class SimpleRecordBatch : public RecordBatch {
+ public:
+ SimpleRecordBatch(std::shared_ptr<Schema> schema, int64_t num_rows,
+ std::vector<std::shared_ptr<Array>> columns)
+ : RecordBatch(std::move(schema), num_rows), boxed_columns_(std::move(columns)) {
+ columns_.resize(boxed_columns_.size());
+ for (size_t i = 0; i < columns_.size(); ++i) {
+ columns_[i] = boxed_columns_[i]->data();
+ }
+ }
+
+ SimpleRecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
+ std::vector<std::shared_ptr<ArrayData>> columns)
+ : RecordBatch(std::move(schema), num_rows), columns_(std::move(columns)) {
+ boxed_columns_.resize(schema_->num_fields());
+ }
+
const std::vector<std::shared_ptr<Array>>& columns() const override {
for (int i = 0; i < num_columns(); ++i) {
// Force all columns to be boxed
@@ -77,41 +77,41 @@ class SimpleRecordBatch : public RecordBatch {
return boxed_columns_;
}
- std::shared_ptr<Array> column(int i) const override {
- std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
- if (!result) {
- result = MakeArray(columns_[i]);
- internal::atomic_store(&boxed_columns_[i], result);
- }
- return result;
- }
-
- std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
-
+ std::shared_ptr<Array> column(int i) const override {
+ std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
+ if (!result) {
+ result = MakeArray(columns_[i]);
+ internal::atomic_store(&boxed_columns_[i], result);
+ }
+ return result;
+ }
+
+ std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
+
const ArrayDataVector& column_data() const override { return columns_; }
-
- Result<std::shared_ptr<RecordBatch>> AddColumn(
- int i, const std::shared_ptr<Field>& field,
- const std::shared_ptr<Array>& column) const override {
- ARROW_CHECK(field != nullptr);
- ARROW_CHECK(column != nullptr);
-
- if (!field->type()->Equals(column->type())) {
+
+ Result<std::shared_ptr<RecordBatch>> AddColumn(
+ int i, const std::shared_ptr<Field>& field,
+ const std::shared_ptr<Array>& column) const override {
+ ARROW_CHECK(field != nullptr);
+ ARROW_CHECK(column != nullptr);
+
+ if (!field->type()->Equals(column->type())) {
return Status::TypeError("Column data type ", field->type()->name(),
" does not match field data type ",
column->type()->name());
- }
- if (column->length() != num_rows_) {
- return Status::Invalid(
- "Added column's length must match record batch's length. Expected length ",
- num_rows_, " but got length ", column->length());
- }
-
- ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field));
+ }
+ if (column->length() != num_rows_) {
+ return Status::Invalid(
+ "Added column's length must match record batch's length. Expected length ",
+ num_rows_, " but got length ", column->length());
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field));
return RecordBatch::Make(std::move(new_schema), num_rows_,
- internal::AddVectorElement(columns_, i, column->data()));
- }
-
+ internal::AddVectorElement(columns_, i, column->data()));
+ }
+
Result<std::shared_ptr<RecordBatch>> SetColumn(
int i, const std::shared_ptr<Field>& field,
const std::shared_ptr<Array>& column) const override {
@@ -134,123 +134,123 @@ class SimpleRecordBatch : public RecordBatch {
internal::ReplaceVectorElement(columns_, i, column->data()));
}
- Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const override {
- ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
+ Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const override {
+ ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
return RecordBatch::Make(std::move(new_schema), num_rows_,
- internal::DeleteVectorElement(columns_, i));
- }
-
- std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
- auto new_schema = schema_->WithMetadata(metadata);
+ internal::DeleteVectorElement(columns_, i));
+ }
+
+ std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
+ auto new_schema = schema_->WithMetadata(metadata);
return RecordBatch::Make(std::move(new_schema), num_rows_, columns_);
- }
-
- std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const override {
- std::vector<std::shared_ptr<ArrayData>> arrays;
- arrays.reserve(num_columns());
- for (const auto& field : columns_) {
- arrays.emplace_back(field->Slice(offset, length));
- }
- int64_t num_rows = std::min(num_rows_ - offset, length);
- return std::make_shared<SimpleRecordBatch>(schema_, num_rows, std::move(arrays));
- }
-
- Status Validate() const override {
- if (static_cast<int>(columns_.size()) != schema_->num_fields()) {
- return Status::Invalid("Number of columns did not match schema");
- }
- return RecordBatch::Validate();
- }
-
- private:
- std::vector<std::shared_ptr<ArrayData>> columns_;
-
- // Caching boxed array data
- mutable std::vector<std::shared_ptr<Array>> boxed_columns_;
-};
-
-RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows)
- : schema_(schema), num_rows_(num_rows) {}
-
-std::shared_ptr<RecordBatch> RecordBatch::Make(
- std::shared_ptr<Schema> schema, int64_t num_rows,
- std::vector<std::shared_ptr<Array>> columns) {
- DCHECK_EQ(schema->num_fields(), static_cast<int>(columns.size()));
- return std::make_shared<SimpleRecordBatch>(std::move(schema), num_rows, columns);
-}
-
-std::shared_ptr<RecordBatch> RecordBatch::Make(
- std::shared_ptr<Schema> schema, int64_t num_rows,
- std::vector<std::shared_ptr<ArrayData>> columns) {
- DCHECK_EQ(schema->num_fields(), static_cast<int>(columns.size()));
- return std::make_shared<SimpleRecordBatch>(std::move(schema), num_rows,
- std::move(columns));
-}
-
-Result<std::shared_ptr<RecordBatch>> RecordBatch::FromStructArray(
- const std::shared_ptr<Array>& array) {
- if (array->type_id() != Type::STRUCT) {
+ }
+
+ std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const override {
+ std::vector<std::shared_ptr<ArrayData>> arrays;
+ arrays.reserve(num_columns());
+ for (const auto& field : columns_) {
+ arrays.emplace_back(field->Slice(offset, length));
+ }
+ int64_t num_rows = std::min(num_rows_ - offset, length);
+ return std::make_shared<SimpleRecordBatch>(schema_, num_rows, std::move(arrays));
+ }
+
+ Status Validate() const override {
+ if (static_cast<int>(columns_.size()) != schema_->num_fields()) {
+ return Status::Invalid("Number of columns did not match schema");
+ }
+ return RecordBatch::Validate();
+ }
+
+ private:
+ std::vector<std::shared_ptr<ArrayData>> columns_;
+
+ // Caching boxed array data
+ mutable std::vector<std::shared_ptr<Array>> boxed_columns_;
+};
+
+RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows)
+ : schema_(schema), num_rows_(num_rows) {}
+
+std::shared_ptr<RecordBatch> RecordBatch::Make(
+ std::shared_ptr<Schema> schema, int64_t num_rows,
+ std::vector<std::shared_ptr<Array>> columns) {
+ DCHECK_EQ(schema->num_fields(), static_cast<int>(columns.size()));
+ return std::make_shared<SimpleRecordBatch>(std::move(schema), num_rows, columns);
+}
+
+std::shared_ptr<RecordBatch> RecordBatch::Make(
+ std::shared_ptr<Schema> schema, int64_t num_rows,
+ std::vector<std::shared_ptr<ArrayData>> columns) {
+ DCHECK_EQ(schema->num_fields(), static_cast<int>(columns.size()));
+ return std::make_shared<SimpleRecordBatch>(std::move(schema), num_rows,
+ std::move(columns));
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatch::FromStructArray(
+ const std::shared_ptr<Array>& array) {
+ if (array->type_id() != Type::STRUCT) {
return Status::TypeError("Cannot construct record batch from array of type ",
*array->type());
- }
- if (array->null_count() != 0) {
- return Status::Invalid(
- "Unable to construct record batch from a StructArray with non-zero nulls.");
- }
- return Make(arrow::schema(array->type()->fields()), array->length(),
- array->data()->child_data);
-}
-
-Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
- if (num_columns() != 0) {
- return StructArray::Make(columns(), schema()->fields());
- }
- return std::make_shared<StructArray>(arrow::struct_({}), num_rows_,
- std::vector<std::shared_ptr<Array>>{},
- /*null_bitmap=*/nullptr,
- /*null_count=*/0,
- /*offset=*/0);
-}
-
-const std::string& RecordBatch::column_name(int i) const {
- return schema_->field(i)->name();
-}
-
-bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata) const {
- if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
- return false;
- }
-
- if (check_metadata) {
- if (!schema_->Equals(*other.schema(), /*check_metadata=*/true)) {
- return false;
- }
- }
-
- for (int i = 0; i < num_columns(); ++i) {
- if (!column(i)->Equals(other.column(i))) {
- return false;
- }
- }
-
- return true;
-}
-
-bool RecordBatch::ApproxEquals(const RecordBatch& other) const {
- if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
- return false;
- }
-
- for (int i = 0; i < num_columns(); ++i) {
- if (!column(i)->ApproxEquals(other.column(i))) {
- return false;
- }
- }
-
- return true;
-}
-
+ }
+ if (array->null_count() != 0) {
+ return Status::Invalid(
+ "Unable to construct record batch from a StructArray with non-zero nulls.");
+ }
+ return Make(arrow::schema(array->type()->fields()), array->length(),
+ array->data()->child_data);
+}
+
+Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
+ if (num_columns() != 0) {
+ return StructArray::Make(columns(), schema()->fields());
+ }
+ return std::make_shared<StructArray>(arrow::struct_({}), num_rows_,
+ std::vector<std::shared_ptr<Array>>{},
+ /*null_bitmap=*/nullptr,
+ /*null_count=*/0,
+ /*offset=*/0);
+}
+
+const std::string& RecordBatch::column_name(int i) const {
+ return schema_->field(i)->name();
+}
+
+bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata) const {
+ if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
+ return false;
+ }
+
+ if (check_metadata) {
+ if (!schema_->Equals(*other.schema(), /*check_metadata=*/true)) {
+ return false;
+ }
+ }
+
+ for (int i = 0; i < num_columns(); ++i) {
+ if (!column(i)->Equals(other.column(i))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool RecordBatch::ApproxEquals(const RecordBatch& other) const {
+ if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
+ return false;
+ }
+
+ for (int i = 0; i < num_columns(); ++i) {
+ if (!column(i)->ApproxEquals(other.column(i))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
const std::vector<int>& indices) const {
int n = static_cast<int>(indices.size());
@@ -272,96 +272,96 @@ Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns));
}
-std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
- return Slice(offset, this->num_rows() - offset);
-}
-
-std::string RecordBatch::ToString() const {
- std::stringstream ss;
- ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
- return ss.str();
-}
-
-Status RecordBatch::Validate() const {
- for (int i = 0; i < num_columns(); ++i) {
- const auto& array = *this->column(i);
- if (array.length() != num_rows_) {
- return Status::Invalid("Number of rows in column ", i,
- " did not match batch: ", array.length(), " vs ", num_rows_);
- }
- const auto& schema_type = *schema_->field(i)->type();
- if (!array.type()->Equals(schema_type)) {
- return Status::Invalid("Column ", i,
- " type not match schema: ", array.type()->ToString(), " vs ",
- schema_type.ToString());
- }
- RETURN_NOT_OK(internal::ValidateArray(array));
- }
- return Status::OK();
-}
-
-Status RecordBatch::ValidateFull() const {
- RETURN_NOT_OK(Validate());
- for (int i = 0; i < num_columns(); ++i) {
- const auto& array = *this->column(i);
+std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
+ return Slice(offset, this->num_rows() - offset);
+}
+
+std::string RecordBatch::ToString() const {
+ std::stringstream ss;
+ ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
+ return ss.str();
+}
+
+Status RecordBatch::Validate() const {
+ for (int i = 0; i < num_columns(); ++i) {
+ const auto& array = *this->column(i);
+ if (array.length() != num_rows_) {
+ return Status::Invalid("Number of rows in column ", i,
+ " did not match batch: ", array.length(), " vs ", num_rows_);
+ }
+ const auto& schema_type = *schema_->field(i)->type();
+ if (!array.type()->Equals(schema_type)) {
+ return Status::Invalid("Column ", i,
+ " type not match schema: ", array.type()->ToString(), " vs ",
+ schema_type.ToString());
+ }
+ RETURN_NOT_OK(internal::ValidateArray(array));
+ }
+ return Status::OK();
+}
+
+Status RecordBatch::ValidateFull() const {
+ RETURN_NOT_OK(Validate());
+ for (int i = 0; i < num_columns(); ++i) {
+ const auto& array = *this->column(i);
RETURN_NOT_OK(internal::ValidateArrayFull(array));
- }
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// Base record batch reader
-
-Status RecordBatchReader::ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches) {
- while (true) {
- std::shared_ptr<RecordBatch> batch;
- RETURN_NOT_OK(ReadNext(&batch));
- if (!batch) {
- break;
- }
- batches->emplace_back(std::move(batch));
- }
- return Status::OK();
-}
-
-Status RecordBatchReader::ReadAll(std::shared_ptr<Table>* table) {
- std::vector<std::shared_ptr<RecordBatch>> batches;
- RETURN_NOT_OK(ReadAll(&batches));
- return Table::FromRecordBatches(schema(), std::move(batches)).Value(table);
-}
-
-class SimpleRecordBatchReader : public RecordBatchReader {
- public:
- SimpleRecordBatchReader(Iterator<std::shared_ptr<RecordBatch>> it,
- std::shared_ptr<Schema> schema)
- : schema_(std::move(schema)), it_(std::move(it)) {}
-
- SimpleRecordBatchReader(std::vector<std::shared_ptr<RecordBatch>> batches,
- std::shared_ptr<Schema> schema)
- : schema_(std::move(schema)), it_(MakeVectorIterator(std::move(batches))) {}
-
- Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
- return it_.Next().Value(batch);
- }
-
- std::shared_ptr<Schema> schema() const override { return schema_; }
-
- protected:
- std::shared_ptr<Schema> schema_;
- Iterator<std::shared_ptr<RecordBatch>> it_;
-};
-
-Result<std::shared_ptr<RecordBatchReader>> RecordBatchReader::Make(
- std::vector<std::shared_ptr<RecordBatch>> batches, std::shared_ptr<Schema> schema) {
- if (schema == nullptr) {
- if (batches.size() == 0 || batches[0] == nullptr) {
- return Status::Invalid("Cannot infer schema from empty vector or nullptr");
- }
-
- schema = batches[0]->schema();
- }
-
- return std::make_shared<SimpleRecordBatchReader>(std::move(batches), schema);
-}
-
-} // namespace arrow
+ }
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Base record batch reader
+
+Status RecordBatchReader::ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches) {
+ while (true) {
+ std::shared_ptr<RecordBatch> batch;
+ RETURN_NOT_OK(ReadNext(&batch));
+ if (!batch) {
+ break;
+ }
+ batches->emplace_back(std::move(batch));
+ }
+ return Status::OK();
+}
+
+Status RecordBatchReader::ReadAll(std::shared_ptr<Table>* table) {
+ std::vector<std::shared_ptr<RecordBatch>> batches;
+ RETURN_NOT_OK(ReadAll(&batches));
+ return Table::FromRecordBatches(schema(), std::move(batches)).Value(table);
+}
+
+class SimpleRecordBatchReader : public RecordBatchReader {
+ public:
+ SimpleRecordBatchReader(Iterator<std::shared_ptr<RecordBatch>> it,
+ std::shared_ptr<Schema> schema)
+ : schema_(std::move(schema)), it_(std::move(it)) {}
+
+ SimpleRecordBatchReader(std::vector<std::shared_ptr<RecordBatch>> batches,
+ std::shared_ptr<Schema> schema)
+ : schema_(std::move(schema)), it_(MakeVectorIterator(std::move(batches))) {}
+
+ Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+ return it_.Next().Value(batch);
+ }
+
+ std::shared_ptr<Schema> schema() const override { return schema_; }
+
+ protected:
+ std::shared_ptr<Schema> schema_;
+ Iterator<std::shared_ptr<RecordBatch>> it_;
+};
+
+Result<std::shared_ptr<RecordBatchReader>> RecordBatchReader::Make(
+ std::vector<std::shared_ptr<RecordBatch>> batches, std::shared_ptr<Schema> schema) {
+ if (schema == nullptr) {
+ if (batches.size() == 0 || batches[0] == nullptr) {
+ return Status::Invalid("Cannot infer schema from empty vector or nullptr");
+ }
+
+ schema = batches[0]->schema();
+ }
+
+ return std::make_shared<SimpleRecordBatchReader>(std::move(batches), schema);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
index 3dc1f54a083..02ab6fecbeb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
@@ -1,238 +1,238 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \class RecordBatch
-/// \brief Collection of equal-length arrays matching a particular Schema
-///
-/// A record batch is table-like data structure that is semantically a sequence
-/// of fields, each a contiguous Arrow array
-class ARROW_EXPORT RecordBatch {
- public:
- virtual ~RecordBatch() = default;
-
- /// \param[in] schema The record batch schema
- /// \param[in] num_rows length of fields in the record batch. Each array
- /// should have the same length as num_rows
- /// \param[in] columns the record batch fields as vector of arrays
- static std::shared_ptr<RecordBatch> Make(std::shared_ptr<Schema> schema,
- int64_t num_rows,
- std::vector<std::shared_ptr<Array>> columns);
-
- /// \brief Construct record batch from vector of internal data structures
- /// \since 0.5.0
- ///
- /// This class is intended for internal use, or advanced users.
- ///
- /// \param schema the record batch schema
- /// \param num_rows the number of semantic rows in the record batch. This
- /// should be equal to the length of each field
- /// \param columns the data for the batch's columns
- static std::shared_ptr<RecordBatch> Make(
- std::shared_ptr<Schema> schema, int64_t num_rows,
- std::vector<std::shared_ptr<ArrayData>> columns);
-
- /// \brief Convert record batch to struct array
- ///
- /// Create a struct array whose child arrays are the record batch's columns.
- /// Note that the record batch's top-level field metadata cannot be reflected
- /// in the resulting struct array.
- Result<std::shared_ptr<StructArray>> ToStructArray() const;
-
- /// \brief Construct record batch from struct array
- ///
- /// This constructs a record batch using the child arrays of the given
- /// array, which must be a struct array. Note that the struct array's own
- /// null bitmap is not reflected in the resulting record batch.
- static Result<std::shared_ptr<RecordBatch>> FromStructArray(
- const std::shared_ptr<Array>& array);
-
- /// \brief Determine if two record batches are exactly equal
- ///
- /// \param[in] other the RecordBatch to compare with
- /// \param[in] check_metadata if true, check that Schema metadata is the same
- /// \return true if batches are equal
- bool Equals(const RecordBatch& other, bool check_metadata = false) const;
-
- /// \brief Determine if two record batches are approximately equal
- bool ApproxEquals(const RecordBatch& other) const;
-
- // \return the table's schema
- /// \return true if batches are equal
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \class RecordBatch
+/// \brief Collection of equal-length arrays matching a particular Schema
+///
+/// A record batch is table-like data structure that is semantically a sequence
+/// of fields, each a contiguous Arrow array
+class ARROW_EXPORT RecordBatch {
+ public:
+ virtual ~RecordBatch() = default;
+
+ /// \param[in] schema The record batch schema
+ /// \param[in] num_rows length of fields in the record batch. Each array
+ /// should have the same length as num_rows
+ /// \param[in] columns the record batch fields as vector of arrays
+ static std::shared_ptr<RecordBatch> Make(std::shared_ptr<Schema> schema,
+ int64_t num_rows,
+ std::vector<std::shared_ptr<Array>> columns);
+
+ /// \brief Construct record batch from vector of internal data structures
+ /// \since 0.5.0
+ ///
+ /// This class is intended for internal use, or advanced users.
+ ///
+ /// \param schema the record batch schema
+ /// \param num_rows the number of semantic rows in the record batch. This
+ /// should be equal to the length of each field
+ /// \param columns the data for the batch's columns
+ static std::shared_ptr<RecordBatch> Make(
+ std::shared_ptr<Schema> schema, int64_t num_rows,
+ std::vector<std::shared_ptr<ArrayData>> columns);
+
+ /// \brief Convert record batch to struct array
+ ///
+ /// Create a struct array whose child arrays are the record batch's columns.
+ /// Note that the record batch's top-level field metadata cannot be reflected
+ /// in the resulting struct array.
+ Result<std::shared_ptr<StructArray>> ToStructArray() const;
+
+ /// \brief Construct record batch from struct array
+ ///
+ /// This constructs a record batch using the child arrays of the given
+ /// array, which must be a struct array. Note that the struct array's own
+ /// null bitmap is not reflected in the resulting record batch.
+ static Result<std::shared_ptr<RecordBatch>> FromStructArray(
+ const std::shared_ptr<Array>& array);
+
+ /// \brief Determine if two record batches are exactly equal
+ ///
+ /// \param[in] other the RecordBatch to compare with
+ /// \param[in] check_metadata if true, check that Schema metadata is the same
+ /// \return true if batches are equal
+ bool Equals(const RecordBatch& other, bool check_metadata = false) const;
+
+ /// \brief Determine if two record batches are approximately equal
+ bool ApproxEquals(const RecordBatch& other) const;
+
+ // \return the table's schema
+ /// \return true if batches are equal
const std::shared_ptr<Schema>& schema() const { return schema_; }
-
- /// \brief Retrieve all columns at once
+
+ /// \brief Retrieve all columns at once
virtual const std::vector<std::shared_ptr<Array>>& columns() const = 0;
-
- /// \brief Retrieve an array from the record batch
- /// \param[in] i field index, does not boundscheck
- /// \return an Array object
- virtual std::shared_ptr<Array> column(int i) const = 0;
-
- /// \brief Retrieve an array from the record batch
- /// \param[in] name field name
- /// \return an Array or null if no field was found
- std::shared_ptr<Array> GetColumnByName(const std::string& name) const;
-
- /// \brief Retrieve an array's internal data from the record batch
- /// \param[in] i field index, does not boundscheck
- /// \return an internal ArrayData object
- virtual std::shared_ptr<ArrayData> column_data(int i) const = 0;
-
- /// \brief Retrieve all arrays' internal data from the record batch.
+
+ /// \brief Retrieve an array from the record batch
+ /// \param[in] i field index, does not boundscheck
+ /// \return an Array object
+ virtual std::shared_ptr<Array> column(int i) const = 0;
+
+ /// \brief Retrieve an array from the record batch
+ /// \param[in] name field name
+ /// \return an Array or null if no field was found
+ std::shared_ptr<Array> GetColumnByName(const std::string& name) const;
+
+ /// \brief Retrieve an array's internal data from the record batch
+ /// \param[in] i field index, does not boundscheck
+ /// \return an internal ArrayData object
+ virtual std::shared_ptr<ArrayData> column_data(int i) const = 0;
+
+ /// \brief Retrieve all arrays' internal data from the record batch.
virtual const ArrayDataVector& column_data() const = 0;
-
- /// \brief Add column to the record batch, producing a new RecordBatch
- ///
- /// \param[in] i field index, which will be boundschecked
- /// \param[in] field field to be added
- /// \param[in] column column to be added
- virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
- int i, const std::shared_ptr<Field>& field,
- const std::shared_ptr<Array>& column) const = 0;
-
- /// \brief Add new nullable column to the record batch, producing a new
- /// RecordBatch.
- ///
- /// For non-nullable columns, use the Field-based version of this method.
- ///
- /// \param[in] i field index, which will be boundschecked
- /// \param[in] field_name name of field to be added
- /// \param[in] column column to be added
- virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
- int i, std::string field_name, const std::shared_ptr<Array>& column) const;
-
+
+ /// \brief Add column to the record batch, producing a new RecordBatch
+ ///
+ /// \param[in] i field index, which will be boundschecked
+ /// \param[in] field field to be added
+ /// \param[in] column column to be added
+ virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
+ int i, const std::shared_ptr<Field>& field,
+ const std::shared_ptr<Array>& column) const = 0;
+
+ /// \brief Add new nullable column to the record batch, producing a new
+ /// RecordBatch.
+ ///
+ /// For non-nullable columns, use the Field-based version of this method.
+ ///
+ /// \param[in] i field index, which will be boundschecked
+ /// \param[in] field_name name of field to be added
+ /// \param[in] column column to be added
+ virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
+ int i, std::string field_name, const std::shared_ptr<Array>& column) const;
+
/// \brief Replace a column in the table, producing a new Table
virtual Result<std::shared_ptr<RecordBatch>> SetColumn(
int i, const std::shared_ptr<Field>& field,
const std::shared_ptr<Array>& column) const = 0;
- /// \brief Remove column from the record batch, producing a new RecordBatch
- ///
- /// \param[in] i field index, does boundscheck
- virtual Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const = 0;
-
- virtual std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const = 0;
-
- /// \brief Name in i-th column
- const std::string& column_name(int i) const;
-
- /// \return the number of columns in the table
- int num_columns() const;
-
- /// \return the number of rows (the corresponding length of each column)
- int64_t num_rows() const { return num_rows_; }
-
- /// \brief Slice each of the arrays in the record batch
- /// \param[in] offset the starting offset to slice, through end of batch
- /// \return new record batch
- virtual std::shared_ptr<RecordBatch> Slice(int64_t offset) const;
-
- /// \brief Slice each of the arrays in the record batch
- /// \param[in] offset the starting offset to slice
- /// \param[in] length the number of elements to slice from offset
- /// \return new record batch
- virtual std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const = 0;
-
- /// \return PrettyPrint representation suitable for debugging
- std::string ToString() const;
-
+ /// \brief Remove column from the record batch, producing a new RecordBatch
+ ///
+ /// \param[in] i field index, does boundscheck
+ virtual Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const = 0;
+
+ virtual std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const = 0;
+
+ /// \brief Name in i-th column
+ const std::string& column_name(int i) const;
+
+ /// \return the number of columns in the table
+ int num_columns() const;
+
+ /// \return the number of rows (the corresponding length of each column)
+ int64_t num_rows() const { return num_rows_; }
+
+ /// \brief Slice each of the arrays in the record batch
+ /// \param[in] offset the starting offset to slice, through end of batch
+ /// \return new record batch
+ virtual std::shared_ptr<RecordBatch> Slice(int64_t offset) const;
+
+ /// \brief Slice each of the arrays in the record batch
+ /// \param[in] offset the starting offset to slice
+ /// \param[in] length the number of elements to slice from offset
+ /// \return new record batch
+ virtual std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const = 0;
+
+ /// \return PrettyPrint representation suitable for debugging
+ std::string ToString() const;
+
/// \brief Return new record batch with specified columns
Result<std::shared_ptr<RecordBatch>> SelectColumns(
const std::vector<int>& indices) const;
- /// \brief Perform cheap validation checks to determine obvious inconsistencies
- /// within the record batch's schema and internal data.
- ///
- /// This is O(k) where k is the total number of fields and array descendents.
- ///
- /// \return Status
- virtual Status Validate() const;
-
- /// \brief Perform extensive validation checks to determine inconsistencies
- /// within the record batch's schema and internal data.
- ///
- /// This is potentially O(k*n) where n is the number of rows.
- ///
- /// \return Status
- virtual Status ValidateFull() const;
-
- protected:
- RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows);
-
- std::shared_ptr<Schema> schema_;
- int64_t num_rows_;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatch);
-};
-
-/// \brief Abstract interface for reading stream of record batches
-class ARROW_EXPORT RecordBatchReader {
- public:
+ /// \brief Perform cheap validation checks to determine obvious inconsistencies
+ /// within the record batch's schema and internal data.
+ ///
+ /// This is O(k) where k is the total number of fields and array descendents.
+ ///
+ /// \return Status
+ virtual Status Validate() const;
+
+ /// \brief Perform extensive validation checks to determine inconsistencies
+ /// within the record batch's schema and internal data.
+ ///
+ /// This is potentially O(k*n) where n is the number of rows.
+ ///
+ /// \return Status
+ virtual Status ValidateFull() const;
+
+ protected:
+ RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows);
+
+ std::shared_ptr<Schema> schema_;
+ int64_t num_rows_;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatch);
+};
+
+/// \brief Abstract interface for reading stream of record batches
+class ARROW_EXPORT RecordBatchReader {
+ public:
using ValueType = std::shared_ptr<RecordBatch>;
- virtual ~RecordBatchReader() = default;
-
- /// \return the shared schema of the record batches in the stream
- virtual std::shared_ptr<Schema> schema() const = 0;
-
- /// \brief Read the next record batch in the stream. Return null for batch
- /// when reaching end of stream
- ///
- /// \param[out] batch the next loaded batch, null at end of stream
- /// \return Status
- virtual Status ReadNext(std::shared_ptr<RecordBatch>* batch) = 0;
-
- /// \brief Iterator interface
- Result<std::shared_ptr<RecordBatch>> Next() {
- std::shared_ptr<RecordBatch> batch;
- ARROW_RETURN_NOT_OK(ReadNext(&batch));
- return batch;
- }
-
- /// \brief Consume entire stream as a vector of record batches
- Status ReadAll(RecordBatchVector* batches);
-
- /// \brief Read all batches and concatenate as arrow::Table
- Status ReadAll(std::shared_ptr<Table>* table);
-
- /// \brief Create a RecordBatchReader from a vector of RecordBatch.
- ///
- /// \param[in] batches the vector of RecordBatch to read from
- /// \param[in] schema schema to conform to. Will be inferred from the first
- /// element if not provided.
- static Result<std::shared_ptr<RecordBatchReader>> Make(
- RecordBatchVector batches, std::shared_ptr<Schema> schema = NULLPTR);
-};
-
-} // namespace arrow
+ virtual ~RecordBatchReader() = default;
+
+ /// \return the shared schema of the record batches in the stream
+ virtual std::shared_ptr<Schema> schema() const = 0;
+
+ /// \brief Read the next record batch in the stream. Return null for batch
+ /// when reaching end of stream
+ ///
+ /// \param[out] batch the next loaded batch, null at end of stream
+ /// \return Status
+ virtual Status ReadNext(std::shared_ptr<RecordBatch>* batch) = 0;
+
+ /// \brief Iterator interface
+ Result<std::shared_ptr<RecordBatch>> Next() {
+ std::shared_ptr<RecordBatch> batch;
+ ARROW_RETURN_NOT_OK(ReadNext(&batch));
+ return batch;
+ }
+
+ /// \brief Consume entire stream as a vector of record batches
+ Status ReadAll(RecordBatchVector* batches);
+
+ /// \brief Read all batches and concatenate as arrow::Table
+ Status ReadAll(std::shared_ptr<Table>* table);
+
+ /// \brief Create a RecordBatchReader from a vector of RecordBatch.
+ ///
+ /// \param[in] batches the vector of RecordBatch to read from
+ /// \param[in] schema schema to conform to. Will be inferred from the first
+ /// element if not provided.
+ static Result<std::shared_ptr<RecordBatchReader>> Make(
+ RecordBatchVector batches, std::shared_ptr<Schema> schema = NULLPTR);
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/result.cc b/contrib/libs/apache/arrow/cpp/src/arrow/result.cc
index 0bb65acb831..df2adf7c801 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/result.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/result.cc
@@ -1,36 +1,36 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/result.h"
-
-#include <string>
-
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-namespace internal {
-
-void DieWithMessage(const std::string& msg) { ARROW_LOG(FATAL) << msg; }
-
-void InvalidValueOrDie(const Status& st) {
- DieWithMessage(std::string("ValueOrDie called on an error: ") + st.ToString());
-}
-
-} // namespace internal
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/result.h"
+
+#include <string>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+namespace internal {
+
+void DieWithMessage(const std::string& msg) { ARROW_LOG(FATAL) << msg; }
+
+void InvalidValueOrDie(const Status& st) {
+ DieWithMessage(std::string("ValueOrDie called on an error: ") + st.ToString());
+}
+
+} // namespace internal
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/result.h b/contrib/libs/apache/arrow/cpp/src/arrow/result.h
index cb7437cd242..6112cc1bd8e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/result.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/result.h
@@ -1,408 +1,408 @@
-//
-// Copyright 2017 Asylo authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// Adapted from Asylo
-
-#pragma once
-
+//
+// Copyright 2017 Asylo authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Adapted from Asylo
+
+#pragma once
+
#include <cstddef>
-#include <new>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/status.h"
-#include "arrow/util/compare.h"
-
-namespace arrow {
-
+#include <new>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/status.h"
+#include "arrow/util/compare.h"
+
+namespace arrow {
+
template <typename>
struct EnsureResult;
-namespace internal {
-
-#if __cplusplus >= 201703L
-using std::launder;
-#else
-template <class T>
-constexpr T* launder(T* p) noexcept {
- return p;
-}
-#endif
-
-ARROW_EXPORT void DieWithMessage(const std::string& msg);
-
-ARROW_EXPORT void InvalidValueOrDie(const Status& st);
-
-} // namespace internal
-
-/// A class for representing either a usable value, or an error.
-///
-/// A Result object either contains a value of type `T` or a Status object
-/// explaining why such a value is not present. The type `T` must be
-/// copy-constructible and/or move-constructible.
-///
-/// The state of a Result object may be determined by calling ok() or
-/// status(). The ok() method returns true if the object contains a valid value.
-/// The status() method returns the internal Status object. A Result object
-/// that contains a valid value will return an OK Status for a call to status().
-///
-/// A value of type `T` may be extracted from a Result object through a call
-/// to ValueOrDie(). This function should only be called if a call to ok()
-/// returns true. Sample usage:
-///
-/// ```
-/// arrow::Result<Foo> result = CalculateFoo();
-/// if (result.ok()) {
-/// Foo foo = result.ValueOrDie();
-/// foo.DoSomethingCool();
-/// } else {
-/// ARROW_LOG(ERROR) << result.status();
-/// }
-/// ```
-///
-/// If `T` is a move-only type, like `std::unique_ptr<>`, then the value should
-/// only be extracted after invoking `std::move()` on the Result object.
-/// Sample usage:
-///
-/// ```
-/// arrow::Result<std::unique_ptr<Foo>> result = CalculateFoo();
-/// if (result.ok()) {
-/// std::unique_ptr<Foo> foo = std::move(result).ValueOrDie();
-/// foo->DoSomethingCool();
-/// } else {
-/// ARROW_LOG(ERROR) << result.status();
-/// }
-/// ```
-///
-/// Result is provided for the convenience of implementing functions that
-/// return some value but may fail during execution. For instance, consider a
-/// function with the following signature:
-///
-/// ```
-/// arrow::Status CalculateFoo(int *output);
-/// ```
-///
-/// This function may instead be written as:
-///
-/// ```
-/// arrow::Result<int> CalculateFoo();
-/// ```
-template <class T>
-class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
- template <typename U>
- friend class Result;
-
- static_assert(!std::is_same<T, Status>::value,
- "this assert indicates you have probably made a metaprogramming error");
-
- public:
- using ValueType = T;
-
- /// Constructs a Result object that contains a non-OK status.
- ///
- /// This constructor is marked `explicit` to prevent attempts to `return {}`
- /// from a function with a return type of, for example,
- /// `Result<std::vector<int>>`. While `return {}` seems like it would return
- /// an empty vector, it will actually invoke the default constructor of
- /// Result.
- explicit Result() // NOLINT(runtime/explicit)
- : status_(Status::UnknownError("Uninitialized Result<T>")) {}
-
- ~Result() noexcept { Destroy(); }
-
- /// Constructs a Result object with the given non-OK Status object. All
- /// calls to ValueOrDie() on this object will abort. The given `status` must
- /// not be an OK status, otherwise this constructor will abort.
- ///
- /// This constructor is not declared explicit so that a function with a return
- /// type of `Result<T>` can return a Status object, and the status will be
- /// implicitly converted to the appropriate return type as a matter of
- /// convenience.
- ///
- /// \param status The non-OK Status object to initialize to.
- Result(const Status& status) // NOLINT(runtime/explicit)
- : status_(status) {
- if (ARROW_PREDICT_FALSE(status.ok())) {
- internal::DieWithMessage(std::string("Constructed with a non-error status: ") +
- status.ToString());
- }
- }
-
- /// Constructs a Result object that contains `value`. The resulting object
- /// is considered to have an OK status. The wrapped element can be accessed
- /// with ValueOrDie().
- ///
- /// This constructor is made implicit so that a function with a return type of
- /// `Result<T>` can return an object of type `U &&`, implicitly converting
- /// it to a `Result<T>` object.
- ///
- /// Note that `T` must be implicitly constructible from `U`, and `U` must not
- /// be a (cv-qualified) Status or Status-reference type. Due to C++
- /// reference-collapsing rules and perfect-forwarding semantics, this
- /// constructor matches invocations that pass `value` either as a const
- /// reference or as an rvalue reference. Since Result needs to work for both
- /// reference and rvalue-reference types, the constructor uses perfect
- /// forwarding to avoid invalidating arguments that were passed by reference.
- /// See http://thbecker.net/articles/rvalue_references/section_08.html for
- /// additional details.
- ///
- /// \param value The value to initialize to.
- template <typename U,
- typename E = typename std::enable_if<
- std::is_constructible<T, U>::value && std::is_convertible<U, T>::value &&
- !std::is_same<typename std::remove_reference<
- typename std::remove_cv<U>::type>::type,
- Status>::value>::type>
- Result(U&& value) noexcept { // NOLINT(runtime/explicit)
- ConstructValue(std::forward<U>(value));
- }
-
- /// Constructs a Result object that contains `value`. The resulting object
- /// is considered to have an OK status. The wrapped element can be accessed
- /// with ValueOrDie().
- ///
- /// This constructor is made implicit so that a function with a return type of
- /// `Result<T>` can return an object of type `T`, implicitly converting
- /// it to a `Result<T>` object.
- ///
- /// \param value The value to initialize to.
- // NOTE `Result(U&& value)` above should be sufficient, but some compilers
- // fail matching it.
- Result(T&& value) noexcept { // NOLINT(runtime/explicit)
- ConstructValue(std::move(value));
- }
-
- /// Copy constructor.
- ///
- /// This constructor needs to be explicitly defined because the presence of
- /// the move-assignment operator deletes the default copy constructor. In such
- /// a scenario, since the deleted copy constructor has stricter binding rules
- /// than the templated copy constructor, the templated constructor cannot act
- /// as a copy constructor, and any attempt to copy-construct a `Result`
- /// object results in a compilation error.
- ///
- /// \param other The value to copy from.
- Result(const Result& other) : status_(other.status_) {
- if (ARROW_PREDICT_TRUE(status_.ok())) {
- ConstructValue(other.ValueUnsafe());
- }
- }
-
- /// Templatized constructor that constructs a `Result<T>` from a const
- /// reference to a `Result<U>`.
- ///
- /// `T` must be implicitly constructible from `const U &`.
- ///
- /// \param other The value to copy from.
- template <typename U, typename E = typename std::enable_if<
- std::is_constructible<T, const U&>::value &&
- std::is_convertible<U, T>::value>::type>
- Result(const Result<U>& other) : status_(other.status_) {
- if (ARROW_PREDICT_TRUE(status_.ok())) {
- ConstructValue(other.ValueUnsafe());
- }
- }
-
- /// Copy-assignment operator.
- ///
- /// \param other The Result object to copy.
- Result& operator=(const Result& other) {
- // Check for self-assignment.
- if (this == &other) {
- return *this;
- }
- Destroy();
- status_ = other.status_;
- if (ARROW_PREDICT_TRUE(status_.ok())) {
- ConstructValue(other.ValueUnsafe());
- }
- return *this;
- }
-
- /// Templatized constructor which constructs a `Result<T>` by moving the
- /// contents of a `Result<U>`. `T` must be implicitly constructible from `U
- /// &&`.
- ///
- /// Sets `other` to contain a non-OK status with a`StatusError::Invalid`
- /// error code.
- ///
- /// \param other The Result object to move from and set to a non-OK status.
- template <typename U,
- typename E = typename std::enable_if<std::is_constructible<T, U&&>::value &&
- std::is_convertible<U, T>::value>::type>
- Result(Result<U>&& other) noexcept {
- if (ARROW_PREDICT_TRUE(other.status_.ok())) {
- status_ = std::move(other.status_);
- ConstructValue(other.MoveValueUnsafe());
- } else {
- // If we moved the status, the other status may become ok but the other
- // value hasn't been constructed => crash on other destructor.
- status_ = other.status_;
- }
- }
-
- /// Move-assignment operator.
- ///
- /// Sets `other` to an invalid state..
- ///
- /// \param other The Result object to assign from and set to a non-OK
- /// status.
- Result& operator=(Result&& other) noexcept {
- // Check for self-assignment.
- if (this == &other) {
- return *this;
- }
- Destroy();
- if (ARROW_PREDICT_TRUE(other.status_.ok())) {
- status_ = std::move(other.status_);
- ConstructValue(other.MoveValueUnsafe());
- } else {
- // If we moved the status, the other status may become ok but the other
- // value hasn't been constructed => crash on other destructor.
- status_ = other.status_;
- }
- return *this;
- }
-
- /// Compare to another Result.
- bool Equals(const Result& other) const {
- if (ARROW_PREDICT_TRUE(status_.ok())) {
- return other.status_.ok() && ValueUnsafe() == other.ValueUnsafe();
- }
- return status_ == other.status_;
- }
-
- /// Indicates whether the object contains a `T` value. Generally instead
- /// of accessing this directly you will want to use ASSIGN_OR_RAISE defined
- /// below.
- ///
- /// \return True if this Result object's status is OK (i.e. a call to ok()
- /// returns true). If this function returns true, then it is safe to access
- /// the wrapped element through a call to ValueOrDie().
- bool ok() const { return status_.ok(); }
-
- /// \brief Equivalent to ok().
- // operator bool() const { return ok(); }
-
- /// Gets the stored status object, or an OK status if a `T` value is stored.
- ///
- /// \return The stored non-OK status object, or an OK status if this object
- /// has a value.
- const Status& status() const { return status_; }
-
- /// Gets the stored `T` value.
- ///
- /// This method should only be called if this Result object's status is OK
- /// (i.e. a call to ok() returns true), otherwise this call will abort.
- ///
- /// \return The stored `T` value.
- const T& ValueOrDie() const& {
- if (ARROW_PREDICT_FALSE(!ok())) {
- internal::InvalidValueOrDie(status_);
- }
- return ValueUnsafe();
- }
- const T& operator*() const& { return ValueOrDie(); }
+namespace internal {
+
+#if __cplusplus >= 201703L
+using std::launder;
+#else
+template <class T>
+constexpr T* launder(T* p) noexcept {
+ return p;
+}
+#endif
+
+ARROW_EXPORT void DieWithMessage(const std::string& msg);
+
+ARROW_EXPORT void InvalidValueOrDie(const Status& st);
+
+} // namespace internal
+
+/// A class for representing either a usable value, or an error.
+///
+/// A Result object either contains a value of type `T` or a Status object
+/// explaining why such a value is not present. The type `T` must be
+/// copy-constructible and/or move-constructible.
+///
+/// The state of a Result object may be determined by calling ok() or
+/// status(). The ok() method returns true if the object contains a valid value.
+/// The status() method returns the internal Status object. A Result object
+/// that contains a valid value will return an OK Status for a call to status().
+///
+/// A value of type `T` may be extracted from a Result object through a call
+/// to ValueOrDie(). This function should only be called if a call to ok()
+/// returns true. Sample usage:
+///
+/// ```
+/// arrow::Result<Foo> result = CalculateFoo();
+/// if (result.ok()) {
+/// Foo foo = result.ValueOrDie();
+/// foo.DoSomethingCool();
+/// } else {
+/// ARROW_LOG(ERROR) << result.status();
+/// }
+/// ```
+///
+/// If `T` is a move-only type, like `std::unique_ptr<>`, then the value should
+/// only be extracted after invoking `std::move()` on the Result object.
+/// Sample usage:
+///
+/// ```
+/// arrow::Result<std::unique_ptr<Foo>> result = CalculateFoo();
+/// if (result.ok()) {
+/// std::unique_ptr<Foo> foo = std::move(result).ValueOrDie();
+/// foo->DoSomethingCool();
+/// } else {
+/// ARROW_LOG(ERROR) << result.status();
+/// }
+/// ```
+///
+/// Result is provided for the convenience of implementing functions that
+/// return some value but may fail during execution. For instance, consider a
+/// function with the following signature:
+///
+/// ```
+/// arrow::Status CalculateFoo(int *output);
+/// ```
+///
+/// This function may instead be written as:
+///
+/// ```
+/// arrow::Result<int> CalculateFoo();
+/// ```
+template <class T>
+class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
+ template <typename U>
+ friend class Result;
+
+ static_assert(!std::is_same<T, Status>::value,
+ "this assert indicates you have probably made a metaprogramming error");
+
+ public:
+ using ValueType = T;
+
+ /// Constructs a Result object that contains a non-OK status.
+ ///
+ /// This constructor is marked `explicit` to prevent attempts to `return {}`
+ /// from a function with a return type of, for example,
+ /// `Result<std::vector<int>>`. While `return {}` seems like it would return
+ /// an empty vector, it will actually invoke the default constructor of
+ /// Result.
+ explicit Result() // NOLINT(runtime/explicit)
+ : status_(Status::UnknownError("Uninitialized Result<T>")) {}
+
+ ~Result() noexcept { Destroy(); }
+
+ /// Constructs a Result object with the given non-OK Status object. All
+ /// calls to ValueOrDie() on this object will abort. The given `status` must
+ /// not be an OK status, otherwise this constructor will abort.
+ ///
+ /// This constructor is not declared explicit so that a function with a return
+ /// type of `Result<T>` can return a Status object, and the status will be
+ /// implicitly converted to the appropriate return type as a matter of
+ /// convenience.
+ ///
+ /// \param status The non-OK Status object to initialize to.
+ Result(const Status& status) // NOLINT(runtime/explicit)
+ : status_(status) {
+ if (ARROW_PREDICT_FALSE(status.ok())) {
+ internal::DieWithMessage(std::string("Constructed with a non-error status: ") +
+ status.ToString());
+ }
+ }
+
+ /// Constructs a Result object that contains `value`. The resulting object
+ /// is considered to have an OK status. The wrapped element can be accessed
+ /// with ValueOrDie().
+ ///
+ /// This constructor is made implicit so that a function with a return type of
+ /// `Result<T>` can return an object of type `U &&`, implicitly converting
+ /// it to a `Result<T>` object.
+ ///
+ /// Note that `T` must be implicitly constructible from `U`, and `U` must not
+ /// be a (cv-qualified) Status or Status-reference type. Due to C++
+ /// reference-collapsing rules and perfect-forwarding semantics, this
+ /// constructor matches invocations that pass `value` either as a const
+ /// reference or as an rvalue reference. Since Result needs to work for both
+ /// reference and rvalue-reference types, the constructor uses perfect
+ /// forwarding to avoid invalidating arguments that were passed by reference.
+ /// See http://thbecker.net/articles/rvalue_references/section_08.html for
+ /// additional details.
+ ///
+ /// \param value The value to initialize to.
+ template <typename U,
+ typename E = typename std::enable_if<
+ std::is_constructible<T, U>::value && std::is_convertible<U, T>::value &&
+ !std::is_same<typename std::remove_reference<
+ typename std::remove_cv<U>::type>::type,
+ Status>::value>::type>
+ Result(U&& value) noexcept { // NOLINT(runtime/explicit)
+ ConstructValue(std::forward<U>(value));
+ }
+
+ /// Constructs a Result object that contains `value`. The resulting object
+ /// is considered to have an OK status. The wrapped element can be accessed
+ /// with ValueOrDie().
+ ///
+ /// This constructor is made implicit so that a function with a return type of
+ /// `Result<T>` can return an object of type `T`, implicitly converting
+ /// it to a `Result<T>` object.
+ ///
+ /// \param value The value to initialize to.
+ // NOTE `Result(U&& value)` above should be sufficient, but some compilers
+ // fail matching it.
+ Result(T&& value) noexcept { // NOLINT(runtime/explicit)
+ ConstructValue(std::move(value));
+ }
+
+ /// Copy constructor.
+ ///
+ /// This constructor needs to be explicitly defined because the presence of
+ /// the move-assignment operator deletes the default copy constructor. In such
+ /// a scenario, since the deleted copy constructor has stricter binding rules
+ /// than the templated copy constructor, the templated constructor cannot act
+ /// as a copy constructor, and any attempt to copy-construct a `Result`
+ /// object results in a compilation error.
+ ///
+ /// \param other The value to copy from.
+ Result(const Result& other) : status_(other.status_) {
+ if (ARROW_PREDICT_TRUE(status_.ok())) {
+ ConstructValue(other.ValueUnsafe());
+ }
+ }
+
+ /// Templatized constructor that constructs a `Result<T>` from a const
+ /// reference to a `Result<U>`.
+ ///
+ /// `T` must be implicitly constructible from `const U &`.
+ ///
+ /// \param other The value to copy from.
+ template <typename U, typename E = typename std::enable_if<
+ std::is_constructible<T, const U&>::value &&
+ std::is_convertible<U, T>::value>::type>
+ Result(const Result<U>& other) : status_(other.status_) {
+ if (ARROW_PREDICT_TRUE(status_.ok())) {
+ ConstructValue(other.ValueUnsafe());
+ }
+ }
+
+ /// Copy-assignment operator.
+ ///
+ /// \param other The Result object to copy.
+ Result& operator=(const Result& other) {
+ // Check for self-assignment.
+ if (this == &other) {
+ return *this;
+ }
+ Destroy();
+ status_ = other.status_;
+ if (ARROW_PREDICT_TRUE(status_.ok())) {
+ ConstructValue(other.ValueUnsafe());
+ }
+ return *this;
+ }
+
+ /// Templatized constructor which constructs a `Result<T>` by moving the
+ /// contents of a `Result<U>`. `T` must be implicitly constructible from `U
+ /// &&`.
+ ///
+ /// Sets `other` to contain a non-OK status with a`StatusError::Invalid`
+ /// error code.
+ ///
+ /// \param other The Result object to move from and set to a non-OK status.
+ template <typename U,
+ typename E = typename std::enable_if<std::is_constructible<T, U&&>::value &&
+ std::is_convertible<U, T>::value>::type>
+ Result(Result<U>&& other) noexcept {
+ if (ARROW_PREDICT_TRUE(other.status_.ok())) {
+ status_ = std::move(other.status_);
+ ConstructValue(other.MoveValueUnsafe());
+ } else {
+ // If we moved the status, the other status may become ok but the other
+ // value hasn't been constructed => crash on other destructor.
+ status_ = other.status_;
+ }
+ }
+
+ /// Move-assignment operator.
+ ///
+ /// Sets `other` to an invalid state..
+ ///
+ /// \param other The Result object to assign from and set to a non-OK
+ /// status.
+ Result& operator=(Result&& other) noexcept {
+ // Check for self-assignment.
+ if (this == &other) {
+ return *this;
+ }
+ Destroy();
+ if (ARROW_PREDICT_TRUE(other.status_.ok())) {
+ status_ = std::move(other.status_);
+ ConstructValue(other.MoveValueUnsafe());
+ } else {
+ // If we moved the status, the other status may become ok but the other
+ // value hasn't been constructed => crash on other destructor.
+ status_ = other.status_;
+ }
+ return *this;
+ }
+
+ /// Compare to another Result.
+ bool Equals(const Result& other) const {
+ if (ARROW_PREDICT_TRUE(status_.ok())) {
+ return other.status_.ok() && ValueUnsafe() == other.ValueUnsafe();
+ }
+ return status_ == other.status_;
+ }
+
+ /// Indicates whether the object contains a `T` value. Generally instead
+ /// of accessing this directly you will want to use ASSIGN_OR_RAISE defined
+ /// below.
+ ///
+ /// \return True if this Result object's status is OK (i.e. a call to ok()
+ /// returns true). If this function returns true, then it is safe to access
+ /// the wrapped element through a call to ValueOrDie().
+ bool ok() const { return status_.ok(); }
+
+ /// \brief Equivalent to ok().
+ // operator bool() const { return ok(); }
+
+ /// Gets the stored status object, or an OK status if a `T` value is stored.
+ ///
+ /// \return The stored non-OK status object, or an OK status if this object
+ /// has a value.
+ const Status& status() const { return status_; }
+
+ /// Gets the stored `T` value.
+ ///
+ /// This method should only be called if this Result object's status is OK
+ /// (i.e. a call to ok() returns true), otherwise this call will abort.
+ ///
+ /// \return The stored `T` value.
+ const T& ValueOrDie() const& {
+ if (ARROW_PREDICT_FALSE(!ok())) {
+ internal::InvalidValueOrDie(status_);
+ }
+ return ValueUnsafe();
+ }
+ const T& operator*() const& { return ValueOrDie(); }
const T* operator->() const { return &ValueOrDie(); }
-
- /// Gets a mutable reference to the stored `T` value.
- ///
- /// This method should only be called if this Result object's status is OK
- /// (i.e. a call to ok() returns true), otherwise this call will abort.
- ///
- /// \return The stored `T` value.
- T& ValueOrDie() & {
- if (ARROW_PREDICT_FALSE(!ok())) {
- internal::InvalidValueOrDie(status_);
- }
- return ValueUnsafe();
- }
- T& operator*() & { return ValueOrDie(); }
+
+ /// Gets a mutable reference to the stored `T` value.
+ ///
+ /// This method should only be called if this Result object's status is OK
+ /// (i.e. a call to ok() returns true), otherwise this call will abort.
+ ///
+ /// \return The stored `T` value.
+ T& ValueOrDie() & {
+ if (ARROW_PREDICT_FALSE(!ok())) {
+ internal::InvalidValueOrDie(status_);
+ }
+ return ValueUnsafe();
+ }
+ T& operator*() & { return ValueOrDie(); }
T* operator->() { return &ValueOrDie(); }
-
- /// Moves and returns the internally-stored `T` value.
- ///
- /// This method should only be called if this Result object's status is OK
- /// (i.e. a call to ok() returns true), otherwise this call will abort. The
- /// Result object is invalidated after this call and will be updated to
- /// contain a non-OK status.
- ///
- /// \return The stored `T` value.
- T ValueOrDie() && {
- if (ARROW_PREDICT_FALSE(!ok())) {
- internal::InvalidValueOrDie(status_);
- }
- return MoveValueUnsafe();
- }
- T operator*() && { return std::move(*this).ValueOrDie(); }
-
- /// Helper method for implementing Status returning functions in terms of semantically
- /// equivalent Result returning functions. For example:
- ///
- /// Status GetInt(int *out) { return GetInt().Value(out); }
- template <typename U, typename E = typename std::enable_if<
- std::is_constructible<U, T>::value>::type>
- Status Value(U* out) && {
- if (!ok()) {
- return status();
- }
- *out = U(MoveValueUnsafe());
- return Status::OK();
- }
-
- /// Move and return the internally stored value or alternative if an error is stored.
- T ValueOr(T alternative) && {
- if (!ok()) {
- return alternative;
- }
- return MoveValueUnsafe();
- }
-
- /// Retrieve the value if ok(), falling back to an alternative generated by the provided
- /// factory
- template <typename G>
- T ValueOrElse(G&& generate_alternative) && {
- if (ok()) {
- return MoveValueUnsafe();
- }
- return generate_alternative();
- }
-
- /// Apply a function to the internally stored value to produce a new result or propagate
- /// the stored error.
- template <typename M>
+
+ /// Moves and returns the internally-stored `T` value.
+ ///
+ /// This method should only be called if this Result object's status is OK
+ /// (i.e. a call to ok() returns true), otherwise this call will abort. The
+ /// Result object is invalidated after this call and will be updated to
+ /// contain a non-OK status.
+ ///
+ /// \return The stored `T` value.
+ T ValueOrDie() && {
+ if (ARROW_PREDICT_FALSE(!ok())) {
+ internal::InvalidValueOrDie(status_);
+ }
+ return MoveValueUnsafe();
+ }
+ T operator*() && { return std::move(*this).ValueOrDie(); }
+
+ /// Helper method for implementing Status returning functions in terms of semantically
+ /// equivalent Result returning functions. For example:
+ ///
+ /// Status GetInt(int *out) { return GetInt().Value(out); }
+ template <typename U, typename E = typename std::enable_if<
+ std::is_constructible<U, T>::value>::type>
+ Status Value(U* out) && {
+ if (!ok()) {
+ return status();
+ }
+ *out = U(MoveValueUnsafe());
+ return Status::OK();
+ }
+
+ /// Move and return the internally stored value or alternative if an error is stored.
+ T ValueOr(T alternative) && {
+ if (!ok()) {
+ return alternative;
+ }
+ return MoveValueUnsafe();
+ }
+
+ /// Retrieve the value if ok(), falling back to an alternative generated by the provided
+ /// factory
+ template <typename G>
+ T ValueOrElse(G&& generate_alternative) && {
+ if (ok()) {
+ return MoveValueUnsafe();
+ }
+ return generate_alternative();
+ }
+
+ /// Apply a function to the internally stored value to produce a new result or propagate
+ /// the stored error.
+ template <typename M>
typename EnsureResult<typename std::result_of<M && (T)>::type>::type Map(M&& m) && {
- if (!ok()) {
- return status();
- }
- return std::forward<M>(m)(MoveValueUnsafe());
- }
-
- /// Apply a function to the internally stored value to produce a new result or propagate
- /// the stored error.
- template <typename M>
+ if (!ok()) {
+ return status();
+ }
+ return std::forward<M>(m)(MoveValueUnsafe());
+ }
+
+ /// Apply a function to the internally stored value to produce a new result or propagate
+ /// the stored error.
+ template <typename M>
typename EnsureResult<typename std::result_of<M && (const T&)>::type>::type Map(
M&& m) const& {
- if (!ok()) {
- return status();
- }
- return std::forward<M>(m)(ValueUnsafe());
- }
-
+ if (!ok()) {
+ return status();
+ }
+ return std::forward<M>(m)(ValueUnsafe());
+ }
+
/// Cast the internally stored value to produce a new result or propagate the stored
/// error.
template <typename U, typename E = typename std::enable_if<
@@ -425,56 +425,56 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
return U(ValueUnsafe());
}
- const T& ValueUnsafe() const& {
- return *internal::launder(reinterpret_cast<const T*>(&data_));
- }
-
- T& ValueUnsafe() & { return *internal::launder(reinterpret_cast<T*>(&data_)); }
-
- T ValueUnsafe() && { return MoveValueUnsafe(); }
-
- T MoveValueUnsafe() {
- return std::move(*internal::launder(reinterpret_cast<T*>(&data_)));
- }
-
- private:
- Status status_; // pointer-sized
- typename std::aligned_storage<sizeof(T), alignof(T)>::type data_;
-
- template <typename U>
- void ConstructValue(U&& u) {
- new (&data_) T(std::forward<U>(u));
- }
-
- void Destroy() {
- if (ARROW_PREDICT_TRUE(status_.ok())) {
+ const T& ValueUnsafe() const& {
+ return *internal::launder(reinterpret_cast<const T*>(&data_));
+ }
+
+ T& ValueUnsafe() & { return *internal::launder(reinterpret_cast<T*>(&data_)); }
+
+ T ValueUnsafe() && { return MoveValueUnsafe(); }
+
+ T MoveValueUnsafe() {
+ return std::move(*internal::launder(reinterpret_cast<T*>(&data_)));
+ }
+
+ private:
+ Status status_; // pointer-sized
+ typename std::aligned_storage<sizeof(T), alignof(T)>::type data_;
+
+ template <typename U>
+ void ConstructValue(U&& u) {
+ new (&data_) T(std::forward<U>(u));
+ }
+
+ void Destroy() {
+ if (ARROW_PREDICT_TRUE(status_.ok())) {
static_assert(offsetof(Result<T>, status_) == 0,
"Status is guaranteed to be at the start of Result<>");
- internal::launder(reinterpret_cast<const T*>(&data_))->~T();
- }
- }
-};
-
+ internal::launder(reinterpret_cast<const T*>(&data_))->~T();
+ }
+ }
+};
+
#define ARROW_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr) \
auto&& result_name = (rexpr); \
ARROW_RETURN_IF_(!(result_name).ok(), (result_name).status(), ARROW_STRINGIFY(rexpr)); \
- lhs = std::move(result_name).ValueUnsafe();
-
-#define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-/// \brief Execute an expression that returns a Result, extracting its value
-/// into the variable defined by `lhs` (or returning a Status on error).
-///
-/// Example: Assigning to a new value:
-/// ARROW_ASSIGN_OR_RAISE(auto value, MaybeGetValue(arg));
-///
-/// Example: Assigning to an existing value:
-/// ValueType value;
-/// ARROW_ASSIGN_OR_RAISE(value, MaybeGetValue(arg));
-///
-/// WARNING: ARROW_ASSIGN_OR_RAISE expands into multiple statements;
-/// it cannot be used in a single statement (e.g. as the body of an if
-/// statement without {})!
+ lhs = std::move(result_name).ValueUnsafe();
+
+#define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
+
+/// \brief Execute an expression that returns a Result, extracting its value
+/// into the variable defined by `lhs` (or returning a Status on error).
+///
+/// Example: Assigning to a new value:
+/// ARROW_ASSIGN_OR_RAISE(auto value, MaybeGetValue(arg));
+///
+/// Example: Assigning to an existing value:
+/// ValueType value;
+/// ARROW_ASSIGN_OR_RAISE(value, MaybeGetValue(arg));
+///
+/// WARNING: ARROW_ASSIGN_OR_RAISE expands into multiple statements;
+/// it cannot be used in a single statement (e.g. as the body of an if
+/// statement without {})!
///
/// WARNING: ARROW_ASSIGN_OR_RAISE `std::move`s its right operand. If you have
/// an lvalue Result which you *don't* want to move out of cast appropriately.
@@ -483,29 +483,29 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
/// maintain lifetimes of all temporaries in `rexpr` (e.g.
/// `ARROW_ASSIGN_OR_RAISE(auto x, MakeTemp().GetResultRef());`
/// will most likely segfault)!
-#define ARROW_ASSIGN_OR_RAISE(lhs, rexpr) \
- ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
- lhs, rexpr);
-
-namespace internal {
-
-template <typename T>
+#define ARROW_ASSIGN_OR_RAISE(lhs, rexpr) \
+ ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
+ lhs, rexpr);
+
+namespace internal {
+
+template <typename T>
inline const Status& GenericToStatus(const Result<T>& res) {
- return res.status();
-}
-
-template <typename T>
-inline Status GenericToStatus(Result<T>&& res) {
- return std::move(res).status();
-}
-
-} // namespace internal
-
+ return res.status();
+}
+
+template <typename T>
+inline Status GenericToStatus(Result<T>&& res) {
+ return std::move(res).status();
+}
+
+} // namespace internal
+
template <typename T, typename R = typename EnsureResult<T>::type>
R ToResult(T t) {
return R(std::move(t));
-}
-
+}
+
template <typename T>
struct EnsureResult {
using type = Result<T>;
@@ -516,4 +516,4 @@ struct EnsureResult<Result<T>> {
using type = Result<T>;
};
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/result_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/result_internal.h
index 7550f945d85..6ae7c0cf69b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/result_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/result_internal.h
@@ -1,22 +1,22 @@
-//
-// Copyright 2017 Asylo authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#pragma once
-
-#include "arrow/result.h"
-
-#ifndef ASSIGN_OR_RAISE
-#define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
-#endif
+//
+// Copyright 2017 Asylo authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#pragma once
+
+#include "arrow/result.h"
+
+#ifndef ASSIGN_OR_RAISE
+#define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
index cb7755ba3f1..de10d72a3f4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
@@ -1,79 +1,79 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/scalar.h"
-
-#include <memory>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/scalar.h"
+
+#include <memory>
#include <sstream>
-#include <string>
-#include <utility>
-
-#include "arrow/array.h"
-#include "arrow/array/util.h"
-#include "arrow/buffer.h"
-#include "arrow/compare.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/formatting.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/time.h"
-#include "arrow/util/value_parsing.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::checked_pointer_cast;
-
-bool Scalar::Equals(const Scalar& other, const EqualOptions& options) const {
- return ScalarEquals(*this, other, options);
-}
-
+#include <string>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/formatting.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/time.h"
+#include "arrow/util/value_parsing.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+bool Scalar::Equals(const Scalar& other, const EqualOptions& options) const {
+ return ScalarEquals(*this, other, options);
+}
+
bool Scalar::ApproxEquals(const Scalar& other, const EqualOptions& options) const {
return ScalarApproxEquals(*this, other, options);
}
-struct ScalarHashImpl {
- static std::hash<std::string> string_hash;
-
- Status Visit(const NullScalar& s) { return Status::OK(); }
-
- template <typename T>
- Status Visit(const internal::PrimitiveScalar<T>& s) {
- return ValueHash(s);
- }
-
- Status Visit(const BaseBinaryScalar& s) { return BufferHash(*s.value); }
-
- template <typename T>
- Status Visit(const TemporalScalar<T>& s) {
- return ValueHash(s);
- }
-
- Status Visit(const DayTimeIntervalScalar& s) {
- return StdHash(s.value.days) & StdHash(s.value.days);
- }
-
- Status Visit(const Decimal128Scalar& s) {
- return StdHash(s.value.low_bits()) & StdHash(s.value.high_bits());
- }
-
+struct ScalarHashImpl {
+ static std::hash<std::string> string_hash;
+
+ Status Visit(const NullScalar& s) { return Status::OK(); }
+
+ template <typename T>
+ Status Visit(const internal::PrimitiveScalar<T>& s) {
+ return ValueHash(s);
+ }
+
+ Status Visit(const BaseBinaryScalar& s) { return BufferHash(*s.value); }
+
+ template <typename T>
+ Status Visit(const TemporalScalar<T>& s) {
+ return ValueHash(s);
+ }
+
+ Status Visit(const DayTimeIntervalScalar& s) {
+ return StdHash(s.value.days) & StdHash(s.value.days);
+ }
+
+ Status Visit(const Decimal128Scalar& s) {
+ return StdHash(s.value.low_bits()) & StdHash(s.value.high_bits());
+ }
+
Status Visit(const Decimal256Scalar& s) {
Status status = Status::OK();
for (uint64_t elem : s.value.little_endian_array()) {
@@ -82,117 +82,117 @@ struct ScalarHashImpl {
return status;
}
- Status Visit(const BaseListScalar& s) { return ArrayHash(*s.value); }
-
- Status Visit(const StructScalar& s) {
- for (const auto& child : s.value) {
- AccumulateHashFrom(*child);
- }
- return Status::OK();
- }
-
+ Status Visit(const BaseListScalar& s) { return ArrayHash(*s.value); }
+
+ Status Visit(const StructScalar& s) {
+ for (const auto& child : s.value) {
+ AccumulateHashFrom(*child);
+ }
+ return Status::OK();
+ }
+
Status Visit(const DictionaryScalar& s) {
AccumulateHashFrom(*s.value.index);
return Status::OK();
}
- // TODO(bkietz) implement less wimpy hashing when these have ValueType
- Status Visit(const UnionScalar& s) { return Status::OK(); }
- Status Visit(const ExtensionScalar& s) { return Status::OK(); }
-
- template <typename T>
- Status StdHash(const T& t) {
- static std::hash<T> hash;
- hash_ ^= hash(t);
- return Status::OK();
- }
-
- template <typename S>
- Status ValueHash(const S& s) {
- return StdHash(s.value);
- }
-
- Status BufferHash(const Buffer& b) {
- hash_ ^= internal::ComputeStringHash<1>(b.data(), b.size());
- return Status::OK();
- }
-
- Status ArrayHash(const Array& a) { return ArrayHash(*a.data()); }
-
- Status ArrayHash(const ArrayData& a) {
- RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount()));
- if (a.buffers[0] != nullptr) {
- // We can't visit values without unboxing the whole array, so only hash
- // the null bitmap for now.
- RETURN_NOT_OK(BufferHash(*a.buffers[0]));
- }
- for (const auto& child : a.child_data) {
- RETURN_NOT_OK(ArrayHash(*child));
- }
- return Status::OK();
- }
-
+ // TODO(bkietz) implement less wimpy hashing when these have ValueType
+ Status Visit(const UnionScalar& s) { return Status::OK(); }
+ Status Visit(const ExtensionScalar& s) { return Status::OK(); }
+
+ template <typename T>
+ Status StdHash(const T& t) {
+ static std::hash<T> hash;
+ hash_ ^= hash(t);
+ return Status::OK();
+ }
+
+ template <typename S>
+ Status ValueHash(const S& s) {
+ return StdHash(s.value);
+ }
+
+ Status BufferHash(const Buffer& b) {
+ hash_ ^= internal::ComputeStringHash<1>(b.data(), b.size());
+ return Status::OK();
+ }
+
+ Status ArrayHash(const Array& a) { return ArrayHash(*a.data()); }
+
+ Status ArrayHash(const ArrayData& a) {
+ RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount()));
+ if (a.buffers[0] != nullptr) {
+ // We can't visit values without unboxing the whole array, so only hash
+ // the null bitmap for now.
+ RETURN_NOT_OK(BufferHash(*a.buffers[0]));
+ }
+ for (const auto& child : a.child_data) {
+ RETURN_NOT_OK(ArrayHash(*child));
+ }
+ return Status::OK();
+ }
+
explicit ScalarHashImpl(const Scalar& scalar) : hash_(scalar.type->Hash()) {
if (scalar.is_valid) {
AccumulateHashFrom(scalar);
}
}
-
- void AccumulateHashFrom(const Scalar& scalar) {
- DCHECK_OK(StdHash(scalar.type->fingerprint()));
- DCHECK_OK(VisitScalarInline(scalar, this));
- }
-
+
+ void AccumulateHashFrom(const Scalar& scalar) {
+ DCHECK_OK(StdHash(scalar.type->fingerprint()));
+ DCHECK_OK(VisitScalarInline(scalar, this));
+ }
+
size_t hash_;
-};
-
+};
+
size_t Scalar::hash() const { return ScalarHashImpl(*this).hash_; }
-
-StringScalar::StringScalar(std::string s)
- : StringScalar(Buffer::FromString(std::move(s))) {}
-
-LargeStringScalar::LargeStringScalar(std::string s)
- : LargeStringScalar(Buffer::FromString(std::move(s))) {}
-
-FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::shared_ptr<Buffer> value,
- std::shared_ptr<DataType> type)
- : BinaryScalar(std::move(value), std::move(type)) {
- ARROW_CHECK_EQ(checked_cast<const FixedSizeBinaryType&>(*this->type).byte_width(),
- this->value->size());
-}
-
-BaseListScalar::BaseListScalar(std::shared_ptr<Array> value,
- std::shared_ptr<DataType> type)
- : Scalar{std::move(type), true}, value(std::move(value)) {
- ARROW_CHECK(this->type->field(0)->type()->Equals(this->value->type()));
-}
-
-ListScalar::ListScalar(std::shared_ptr<Array> value)
- : BaseListScalar(value, list(value->type())) {}
-
-LargeListScalar::LargeListScalar(std::shared_ptr<Array> value)
- : BaseListScalar(value, large_list(value->type())) {}
-
-inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pair_type) {
- ARROW_CHECK_EQ(pair_type->id(), Type::STRUCT);
- ARROW_CHECK_EQ(pair_type->num_fields(), 2);
- return map(pair_type->field(0)->type(), pair_type->field(1)->type());
-}
-
-MapScalar::MapScalar(std::shared_ptr<Array> value)
- : BaseListScalar(value, MakeMapType(value->type())) {}
-
-FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value,
- std::shared_ptr<DataType> type)
- : BaseListScalar(value, std::move(type)) {
- ARROW_CHECK_EQ(this->value->length(),
- checked_cast<const FixedSizeListType&>(*this->type).list_size());
-}
-
-FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value)
- : BaseListScalar(
- value, fixed_size_list(value->type(), static_cast<int32_t>(value->length()))) {}
-
+
+StringScalar::StringScalar(std::string s)
+ : StringScalar(Buffer::FromString(std::move(s))) {}
+
+LargeStringScalar::LargeStringScalar(std::string s)
+ : LargeStringScalar(Buffer::FromString(std::move(s))) {}
+
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::shared_ptr<Buffer> value,
+ std::shared_ptr<DataType> type)
+ : BinaryScalar(std::move(value), std::move(type)) {
+ ARROW_CHECK_EQ(checked_cast<const FixedSizeBinaryType&>(*this->type).byte_width(),
+ this->value->size());
+}
+
+BaseListScalar::BaseListScalar(std::shared_ptr<Array> value,
+ std::shared_ptr<DataType> type)
+ : Scalar{std::move(type), true}, value(std::move(value)) {
+ ARROW_CHECK(this->type->field(0)->type()->Equals(this->value->type()));
+}
+
+ListScalar::ListScalar(std::shared_ptr<Array> value)
+ : BaseListScalar(value, list(value->type())) {}
+
+LargeListScalar::LargeListScalar(std::shared_ptr<Array> value)
+ : BaseListScalar(value, large_list(value->type())) {}
+
+inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pair_type) {
+ ARROW_CHECK_EQ(pair_type->id(), Type::STRUCT);
+ ARROW_CHECK_EQ(pair_type->num_fields(), 2);
+ return map(pair_type->field(0)->type(), pair_type->field(1)->type());
+}
+
+MapScalar::MapScalar(std::shared_ptr<Array> value)
+ : BaseListScalar(value, MakeMapType(value->type())) {}
+
+FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value,
+ std::shared_ptr<DataType> type)
+ : BaseListScalar(value, std::move(type)) {
+ ARROW_CHECK_EQ(this->value->length(),
+ checked_cast<const FixedSizeListType&>(*this->type).list_size());
+}
+
+FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value)
+ : BaseListScalar(
+ value, fixed_size_list(value->type(), static_cast<int32_t>(value->length()))) {}
+
Result<std::shared_ptr<StructScalar>> StructScalar::Make(
ScalarVector values, std::vector<std::string> field_names) {
if (values.size() != field_names.size()) {
@@ -207,76 +207,76 @@ Result<std::shared_ptr<StructScalar>> StructScalar::Make(
return std::make_shared<StructScalar>(std::move(values), struct_(std::move(fields)));
}
-Result<std::shared_ptr<Scalar>> StructScalar::field(FieldRef ref) const {
- ARROW_ASSIGN_OR_RAISE(auto path, ref.FindOne(*type));
- if (path.indices().size() != 1) {
- return Status::NotImplemented("retrieval of nested fields from StructScalar");
- }
- auto index = path.indices()[0];
- if (is_valid) {
- return value[index];
- } else {
- const auto& struct_type = checked_cast<const StructType&>(*this->type);
- const auto& field_type = struct_type.field(index)->type();
- return MakeNullScalar(field_type);
- }
-}
-
-DictionaryScalar::DictionaryScalar(std::shared_ptr<DataType> type)
- : Scalar(std::move(type)),
- value{MakeNullScalar(checked_cast<const DictionaryType&>(*this->type).index_type()),
- MakeArrayOfNull(checked_cast<const DictionaryType&>(*this->type).value_type(),
- 0)
- .ValueOrDie()} {}
-
-Result<std::shared_ptr<Scalar>> DictionaryScalar::GetEncodedValue() const {
- const auto& dict_type = checked_cast<DictionaryType&>(*type);
-
- if (!is_valid) {
- return MakeNullScalar(dict_type.value_type());
- }
-
- int64_t index_value = 0;
- switch (dict_type.index_type()->id()) {
- case Type::UINT8:
- index_value =
- static_cast<int64_t>(checked_cast<const UInt8Scalar&>(*value.index).value);
- break;
- case Type::INT8:
- index_value =
- static_cast<int64_t>(checked_cast<const Int8Scalar&>(*value.index).value);
- break;
- case Type::UINT16:
- index_value =
- static_cast<int64_t>(checked_cast<const UInt16Scalar&>(*value.index).value);
- break;
- case Type::INT16:
- index_value =
- static_cast<int64_t>(checked_cast<const Int16Scalar&>(*value.index).value);
- break;
- case Type::UINT32:
- index_value =
- static_cast<int64_t>(checked_cast<const UInt32Scalar&>(*value.index).value);
- break;
- case Type::INT32:
- index_value =
- static_cast<int64_t>(checked_cast<const Int32Scalar&>(*value.index).value);
- break;
- case Type::UINT64:
- index_value =
- static_cast<int64_t>(checked_cast<const UInt64Scalar&>(*value.index).value);
- break;
- case Type::INT64:
- index_value =
- static_cast<int64_t>(checked_cast<const Int64Scalar&>(*value.index).value);
- break;
- default:
- return Status::TypeError("Not implemented dictionary index type");
- break;
- }
- return value.dictionary->GetScalar(index_value);
-}
-
+Result<std::shared_ptr<Scalar>> StructScalar::field(FieldRef ref) const {
+ ARROW_ASSIGN_OR_RAISE(auto path, ref.FindOne(*type));
+ if (path.indices().size() != 1) {
+ return Status::NotImplemented("retrieval of nested fields from StructScalar");
+ }
+ auto index = path.indices()[0];
+ if (is_valid) {
+ return value[index];
+ } else {
+ const auto& struct_type = checked_cast<const StructType&>(*this->type);
+ const auto& field_type = struct_type.field(index)->type();
+ return MakeNullScalar(field_type);
+ }
+}
+
+DictionaryScalar::DictionaryScalar(std::shared_ptr<DataType> type)
+ : Scalar(std::move(type)),
+ value{MakeNullScalar(checked_cast<const DictionaryType&>(*this->type).index_type()),
+ MakeArrayOfNull(checked_cast<const DictionaryType&>(*this->type).value_type(),
+ 0)
+ .ValueOrDie()} {}
+
+Result<std::shared_ptr<Scalar>> DictionaryScalar::GetEncodedValue() const {
+ const auto& dict_type = checked_cast<DictionaryType&>(*type);
+
+ if (!is_valid) {
+ return MakeNullScalar(dict_type.value_type());
+ }
+
+ int64_t index_value = 0;
+ switch (dict_type.index_type()->id()) {
+ case Type::UINT8:
+ index_value =
+ static_cast<int64_t>(checked_cast<const UInt8Scalar&>(*value.index).value);
+ break;
+ case Type::INT8:
+ index_value =
+ static_cast<int64_t>(checked_cast<const Int8Scalar&>(*value.index).value);
+ break;
+ case Type::UINT16:
+ index_value =
+ static_cast<int64_t>(checked_cast<const UInt16Scalar&>(*value.index).value);
+ break;
+ case Type::INT16:
+ index_value =
+ static_cast<int64_t>(checked_cast<const Int16Scalar&>(*value.index).value);
+ break;
+ case Type::UINT32:
+ index_value =
+ static_cast<int64_t>(checked_cast<const UInt32Scalar&>(*value.index).value);
+ break;
+ case Type::INT32:
+ index_value =
+ static_cast<int64_t>(checked_cast<const Int32Scalar&>(*value.index).value);
+ break;
+ case Type::UINT64:
+ index_value =
+ static_cast<int64_t>(checked_cast<const UInt64Scalar&>(*value.index).value);
+ break;
+ case Type::INT64:
+ index_value =
+ static_cast<int64_t>(checked_cast<const Int64Scalar&>(*value.index).value);
+ break;
+ default:
+ return Status::TypeError("Not implemented dictionary index type");
+ break;
+ }
+ return value.dictionary->GetScalar(index_value);
+}
+
std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar> index,
std::shared_ptr<Array> dict) {
auto type = dictionary(index->type, dict->type());
@@ -284,273 +284,273 @@ std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar>
std::move(type));
}
-template <typename T>
-using scalar_constructor_has_arrow_type =
- std::is_constructible<typename TypeTraits<T>::ScalarType, std::shared_ptr<DataType>>;
-
-template <typename T, typename R = void>
-using enable_if_scalar_constructor_has_arrow_type =
- typename std::enable_if<scalar_constructor_has_arrow_type<T>::value, R>::type;
-
-template <typename T, typename R = void>
-using enable_if_scalar_constructor_has_no_arrow_type =
- typename std::enable_if<!scalar_constructor_has_arrow_type<T>::value, R>::type;
-
-struct MakeNullImpl {
- template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
- enable_if_scalar_constructor_has_arrow_type<T, Status> Visit(const T&) {
- out_ = std::make_shared<ScalarType>(type_);
- return Status::OK();
- }
-
- template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
- enable_if_scalar_constructor_has_no_arrow_type<T, Status> Visit(const T&) {
- out_ = std::make_shared<ScalarType>();
- return Status::OK();
- }
-
- std::shared_ptr<Scalar> Finish() && {
- // Should not fail.
- DCHECK_OK(VisitTypeInline(*type_, this));
- return std::move(out_);
- }
-
- std::shared_ptr<DataType> type_;
- std::shared_ptr<Scalar> out_;
-};
-
-std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type) {
- return MakeNullImpl{std::move(type), nullptr}.Finish();
-}
-
-std::string Scalar::ToString() const {
- if (!this->is_valid) {
- return "null";
- }
- if (type->id() == Type::DICTIONARY) {
- auto dict_scalar = checked_cast<const DictionaryScalar*>(this);
- return dict_scalar->value.dictionary->ToString() + "[" +
- dict_scalar->value.index->ToString() + "]";
- }
- auto maybe_repr = CastTo(utf8());
- if (maybe_repr.ok()) {
- return checked_cast<const StringScalar&>(*maybe_repr.ValueOrDie()).value->ToString();
- }
- return "...";
-}
-
-struct ScalarParseImpl {
- template <typename T, typename = internal::enable_if_parseable<T>>
- Status Visit(const T& t) {
- typename internal::StringConverter<T>::value_type value;
- if (!internal::ParseValue(t, s_.data(), s_.size(), &value)) {
- return Status::Invalid("error parsing '", s_, "' as scalar of type ", t);
- }
- return Finish(value);
- }
-
- Status Visit(const BinaryType&) { return FinishWithBuffer(); }
-
- Status Visit(const LargeBinaryType&) { return FinishWithBuffer(); }
-
- Status Visit(const FixedSizeBinaryType&) { return FinishWithBuffer(); }
-
- Status Visit(const DictionaryType& t) {
- ARROW_ASSIGN_OR_RAISE(auto value, Scalar::Parse(t.value_type(), s_));
- return Finish(std::move(value));
- }
-
- Status Visit(const DataType& t) {
- return Status::NotImplemented("parsing scalars of type ", t);
- }
-
- template <typename Arg>
- Status Finish(Arg&& arg) {
- return MakeScalar(std::move(type_), std::forward<Arg>(arg)).Value(&out_);
- }
-
- Status FinishWithBuffer() { return Finish(Buffer::FromString(std::string(s_))); }
-
- Result<std::shared_ptr<Scalar>> Finish() && {
- RETURN_NOT_OK(VisitTypeInline(*type_, this));
- return std::move(out_);
- }
-
- ScalarParseImpl(std::shared_ptr<DataType> type, util::string_view s)
- : type_(std::move(type)), s_(s) {}
-
- std::shared_ptr<DataType> type_;
- util::string_view s_;
- std::shared_ptr<Scalar> out_;
-};
-
-Result<std::shared_ptr<Scalar>> Scalar::Parse(const std::shared_ptr<DataType>& type,
- util::string_view s) {
- return ScalarParseImpl{type, s}.Finish();
-}
-
-namespace internal {
-Status CheckBufferLength(const FixedSizeBinaryType* t, const std::shared_ptr<Buffer>* b) {
- return t->byte_width() == (*b)->size()
- ? Status::OK()
- : Status::Invalid("buffer length ", (*b)->size(), " is not compatible with ",
- *t);
-}
-} // namespace internal
-
-namespace {
-// CastImpl(...) assumes `to` points to a non null scalar of the correct type with
-// uninitialized value
-
-// helper for StringFormatter
-template <typename Formatter, typename ScalarType>
-std::shared_ptr<Buffer> FormatToBuffer(Formatter&& formatter, const ScalarType& from) {
- if (!from.is_valid) {
- return Buffer::FromString("null");
- }
- return formatter(from.value, [&](util::string_view v) {
- return Buffer::FromString(std::string(v));
- });
-}
-
-// error fallback
-Status CastImpl(const Scalar& from, Scalar* to) {
- return Status::NotImplemented("casting scalars of type ", *from.type, " to type ",
- *to->type);
-}
-
-// numeric to numeric
-template <typename From, typename To>
-Status CastImpl(const NumericScalar<From>& from, NumericScalar<To>* to) {
- to->value = static_cast<typename To::c_type>(from.value);
- return Status::OK();
-}
-
-// numeric to boolean
-template <typename T>
-Status CastImpl(const NumericScalar<T>& from, BooleanScalar* to) {
- constexpr auto zero = static_cast<typename T::c_type>(0);
- to->value = from.value != zero;
- return Status::OK();
-}
-
-// boolean to numeric
-template <typename T>
-Status CastImpl(const BooleanScalar& from, NumericScalar<T>* to) {
- to->value = static_cast<typename T::c_type>(from.value);
- return Status::OK();
-}
-
-// numeric to temporal
-template <typename From, typename To>
-typename std::enable_if<std::is_base_of<TemporalType, To>::value &&
- !std::is_same<DayTimeIntervalType, To>::value,
- Status>::type
-CastImpl(const NumericScalar<From>& from, TemporalScalar<To>* to) {
- to->value = static_cast<typename To::c_type>(from.value);
- return Status::OK();
-}
-
-// temporal to numeric
-template <typename From, typename To>
-typename std::enable_if<std::is_base_of<TemporalType, From>::value &&
- !std::is_same<DayTimeIntervalType, From>::value,
- Status>::type
-CastImpl(const TemporalScalar<From>& from, NumericScalar<To>* to) {
- to->value = static_cast<typename To::c_type>(from.value);
- return Status::OK();
-}
-
-// timestamp to timestamp
-Status CastImpl(const TimestampScalar& from, TimestampScalar* to) {
- return util::ConvertTimestampValue(from.type, to->type, from.value).Value(&to->value);
-}
-
-template <typename TypeWithTimeUnit>
-std::shared_ptr<DataType> AsTimestampType(const std::shared_ptr<DataType>& type) {
- return timestamp(checked_cast<const TypeWithTimeUnit&>(*type).unit());
-}
-
-// duration to duration
-Status CastImpl(const DurationScalar& from, DurationScalar* to) {
- return util::ConvertTimestampValue(AsTimestampType<DurationType>(from.type),
- AsTimestampType<DurationType>(to->type), from.value)
- .Value(&to->value);
-}
-
-// time to time
-template <typename F, typename ToScalar, typename T = typename ToScalar::TypeClass>
-enable_if_time<T, Status> CastImpl(const TimeScalar<F>& from, ToScalar* to) {
- return util::ConvertTimestampValue(AsTimestampType<F>(from.type),
- AsTimestampType<T>(to->type), from.value)
- .Value(&to->value);
-}
-
-constexpr int64_t kMillisecondsInDay = 86400000;
-
-// date to date
-Status CastImpl(const Date32Scalar& from, Date64Scalar* to) {
- to->value = from.value * kMillisecondsInDay;
- return Status::OK();
-}
-Status CastImpl(const Date64Scalar& from, Date32Scalar* to) {
- to->value = static_cast<int32_t>(from.value / kMillisecondsInDay);
- return Status::OK();
-}
-
-// timestamp to date
-Status CastImpl(const TimestampScalar& from, Date64Scalar* to) {
- ARROW_ASSIGN_OR_RAISE(
- auto millis,
- util::ConvertTimestampValue(from.type, timestamp(TimeUnit::MILLI), from.value));
- to->value = millis - millis % kMillisecondsInDay;
- return Status::OK();
-}
-Status CastImpl(const TimestampScalar& from, Date32Scalar* to) {
- ARROW_ASSIGN_OR_RAISE(
- auto millis,
- util::ConvertTimestampValue(from.type, timestamp(TimeUnit::MILLI), from.value));
- to->value = static_cast<int32_t>(millis / kMillisecondsInDay);
- return Status::OK();
-}
-
-// date to timestamp
-template <typename D>
-Status CastImpl(const DateScalar<D>& from, TimestampScalar* to) {
- int64_t millis = from.value;
- if (std::is_same<D, Date32Type>::value) {
- millis *= kMillisecondsInDay;
- }
- return util::ConvertTimestampValue(timestamp(TimeUnit::MILLI), to->type, millis)
- .Value(&to->value);
-}
-
-// string to any
-template <typename ScalarType>
-Status CastImpl(const StringScalar& from, ScalarType* to) {
- ARROW_ASSIGN_OR_RAISE(auto out,
- Scalar::Parse(to->type, util::string_view(*from.value)));
- to->value = std::move(checked_cast<ScalarType&>(*out).value);
- return Status::OK();
-}
-
-// binary to string
-Status CastImpl(const BinaryScalar& from, StringScalar* to) {
- to->value = from.value;
- return Status::OK();
-}
-
-// formattable to string
-template <typename ScalarType, typename T = typename ScalarType::TypeClass,
- typename Formatter = internal::StringFormatter<T>,
- // note: Value unused but necessary to trigger SFINAE if Formatter is
- // undefined
- typename Value = typename Formatter::value_type>
-Status CastImpl(const ScalarType& from, StringScalar* to) {
- to->value = FormatToBuffer(Formatter{from.type}, from);
- return Status::OK();
-}
-
+template <typename T>
+using scalar_constructor_has_arrow_type =
+ std::is_constructible<typename TypeTraits<T>::ScalarType, std::shared_ptr<DataType>>;
+
+template <typename T, typename R = void>
+using enable_if_scalar_constructor_has_arrow_type =
+ typename std::enable_if<scalar_constructor_has_arrow_type<T>::value, R>::type;
+
+template <typename T, typename R = void>
+using enable_if_scalar_constructor_has_no_arrow_type =
+ typename std::enable_if<!scalar_constructor_has_arrow_type<T>::value, R>::type;
+
+struct MakeNullImpl {
+ template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+ enable_if_scalar_constructor_has_arrow_type<T, Status> Visit(const T&) {
+ out_ = std::make_shared<ScalarType>(type_);
+ return Status::OK();
+ }
+
+ template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+ enable_if_scalar_constructor_has_no_arrow_type<T, Status> Visit(const T&) {
+ out_ = std::make_shared<ScalarType>();
+ return Status::OK();
+ }
+
+ std::shared_ptr<Scalar> Finish() && {
+ // Should not fail.
+ DCHECK_OK(VisitTypeInline(*type_, this));
+ return std::move(out_);
+ }
+
+ std::shared_ptr<DataType> type_;
+ std::shared_ptr<Scalar> out_;
+};
+
+std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type) {
+ return MakeNullImpl{std::move(type), nullptr}.Finish();
+}
+
+std::string Scalar::ToString() const {
+ if (!this->is_valid) {
+ return "null";
+ }
+ if (type->id() == Type::DICTIONARY) {
+ auto dict_scalar = checked_cast<const DictionaryScalar*>(this);
+ return dict_scalar->value.dictionary->ToString() + "[" +
+ dict_scalar->value.index->ToString() + "]";
+ }
+ auto maybe_repr = CastTo(utf8());
+ if (maybe_repr.ok()) {
+ return checked_cast<const StringScalar&>(*maybe_repr.ValueOrDie()).value->ToString();
+ }
+ return "...";
+}
+
+struct ScalarParseImpl {
+ template <typename T, typename = internal::enable_if_parseable<T>>
+ Status Visit(const T& t) {
+ typename internal::StringConverter<T>::value_type value;
+ if (!internal::ParseValue(t, s_.data(), s_.size(), &value)) {
+ return Status::Invalid("error parsing '", s_, "' as scalar of type ", t);
+ }
+ return Finish(value);
+ }
+
+ Status Visit(const BinaryType&) { return FinishWithBuffer(); }
+
+ Status Visit(const LargeBinaryType&) { return FinishWithBuffer(); }
+
+ Status Visit(const FixedSizeBinaryType&) { return FinishWithBuffer(); }
+
+ Status Visit(const DictionaryType& t) {
+ ARROW_ASSIGN_OR_RAISE(auto value, Scalar::Parse(t.value_type(), s_));
+ return Finish(std::move(value));
+ }
+
+ Status Visit(const DataType& t) {
+ return Status::NotImplemented("parsing scalars of type ", t);
+ }
+
+ template <typename Arg>
+ Status Finish(Arg&& arg) {
+ return MakeScalar(std::move(type_), std::forward<Arg>(arg)).Value(&out_);
+ }
+
+ Status FinishWithBuffer() { return Finish(Buffer::FromString(std::string(s_))); }
+
+ Result<std::shared_ptr<Scalar>> Finish() && {
+ RETURN_NOT_OK(VisitTypeInline(*type_, this));
+ return std::move(out_);
+ }
+
+ ScalarParseImpl(std::shared_ptr<DataType> type, util::string_view s)
+ : type_(std::move(type)), s_(s) {}
+
+ std::shared_ptr<DataType> type_;
+ util::string_view s_;
+ std::shared_ptr<Scalar> out_;
+};
+
+Result<std::shared_ptr<Scalar>> Scalar::Parse(const std::shared_ptr<DataType>& type,
+ util::string_view s) {
+ return ScalarParseImpl{type, s}.Finish();
+}
+
+namespace internal {
+Status CheckBufferLength(const FixedSizeBinaryType* t, const std::shared_ptr<Buffer>* b) {
+ return t->byte_width() == (*b)->size()
+ ? Status::OK()
+ : Status::Invalid("buffer length ", (*b)->size(), " is not compatible with ",
+ *t);
+}
+} // namespace internal
+
+namespace {
+// CastImpl(...) assumes `to` points to a non null scalar of the correct type with
+// uninitialized value
+
+// helper for StringFormatter
+template <typename Formatter, typename ScalarType>
+std::shared_ptr<Buffer> FormatToBuffer(Formatter&& formatter, const ScalarType& from) {
+ if (!from.is_valid) {
+ return Buffer::FromString("null");
+ }
+ return formatter(from.value, [&](util::string_view v) {
+ return Buffer::FromString(std::string(v));
+ });
+}
+
+// error fallback
+Status CastImpl(const Scalar& from, Scalar* to) {
+ return Status::NotImplemented("casting scalars of type ", *from.type, " to type ",
+ *to->type);
+}
+
+// numeric to numeric
+template <typename From, typename To>
+Status CastImpl(const NumericScalar<From>& from, NumericScalar<To>* to) {
+ to->value = static_cast<typename To::c_type>(from.value);
+ return Status::OK();
+}
+
+// numeric to boolean
+template <typename T>
+Status CastImpl(const NumericScalar<T>& from, BooleanScalar* to) {
+ constexpr auto zero = static_cast<typename T::c_type>(0);
+ to->value = from.value != zero;
+ return Status::OK();
+}
+
+// boolean to numeric
+template <typename T>
+Status CastImpl(const BooleanScalar& from, NumericScalar<T>* to) {
+ to->value = static_cast<typename T::c_type>(from.value);
+ return Status::OK();
+}
+
+// numeric to temporal
+template <typename From, typename To>
+typename std::enable_if<std::is_base_of<TemporalType, To>::value &&
+ !std::is_same<DayTimeIntervalType, To>::value,
+ Status>::type
+CastImpl(const NumericScalar<From>& from, TemporalScalar<To>* to) {
+ to->value = static_cast<typename To::c_type>(from.value);
+ return Status::OK();
+}
+
+// temporal to numeric
+template <typename From, typename To>
+typename std::enable_if<std::is_base_of<TemporalType, From>::value &&
+ !std::is_same<DayTimeIntervalType, From>::value,
+ Status>::type
+CastImpl(const TemporalScalar<From>& from, NumericScalar<To>* to) {
+ to->value = static_cast<typename To::c_type>(from.value);
+ return Status::OK();
+}
+
+// timestamp to timestamp
+Status CastImpl(const TimestampScalar& from, TimestampScalar* to) {
+ return util::ConvertTimestampValue(from.type, to->type, from.value).Value(&to->value);
+}
+
+template <typename TypeWithTimeUnit>
+std::shared_ptr<DataType> AsTimestampType(const std::shared_ptr<DataType>& type) {
+ return timestamp(checked_cast<const TypeWithTimeUnit&>(*type).unit());
+}
+
+// duration to duration
+Status CastImpl(const DurationScalar& from, DurationScalar* to) {
+ return util::ConvertTimestampValue(AsTimestampType<DurationType>(from.type),
+ AsTimestampType<DurationType>(to->type), from.value)
+ .Value(&to->value);
+}
+
+// time to time
+template <typename F, typename ToScalar, typename T = typename ToScalar::TypeClass>
+enable_if_time<T, Status> CastImpl(const TimeScalar<F>& from, ToScalar* to) {
+ return util::ConvertTimestampValue(AsTimestampType<F>(from.type),
+ AsTimestampType<T>(to->type), from.value)
+ .Value(&to->value);
+}
+
+constexpr int64_t kMillisecondsInDay = 86400000;
+
+// date to date
+Status CastImpl(const Date32Scalar& from, Date64Scalar* to) {
+ to->value = from.value * kMillisecondsInDay;
+ return Status::OK();
+}
+Status CastImpl(const Date64Scalar& from, Date32Scalar* to) {
+ to->value = static_cast<int32_t>(from.value / kMillisecondsInDay);
+ return Status::OK();
+}
+
+// timestamp to date
+Status CastImpl(const TimestampScalar& from, Date64Scalar* to) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto millis,
+ util::ConvertTimestampValue(from.type, timestamp(TimeUnit::MILLI), from.value));
+ to->value = millis - millis % kMillisecondsInDay;
+ return Status::OK();
+}
+Status CastImpl(const TimestampScalar& from, Date32Scalar* to) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto millis,
+ util::ConvertTimestampValue(from.type, timestamp(TimeUnit::MILLI), from.value));
+ to->value = static_cast<int32_t>(millis / kMillisecondsInDay);
+ return Status::OK();
+}
+
+// date to timestamp
+template <typename D>
+Status CastImpl(const DateScalar<D>& from, TimestampScalar* to) {
+ int64_t millis = from.value;
+ if (std::is_same<D, Date32Type>::value) {
+ millis *= kMillisecondsInDay;
+ }
+ return util::ConvertTimestampValue(timestamp(TimeUnit::MILLI), to->type, millis)
+ .Value(&to->value);
+}
+
+// string to any
+template <typename ScalarType>
+Status CastImpl(const StringScalar& from, ScalarType* to) {
+ ARROW_ASSIGN_OR_RAISE(auto out,
+ Scalar::Parse(to->type, util::string_view(*from.value)));
+ to->value = std::move(checked_cast<ScalarType&>(*out).value);
+ return Status::OK();
+}
+
+// binary to string
+Status CastImpl(const BinaryScalar& from, StringScalar* to) {
+ to->value = from.value;
+ return Status::OK();
+}
+
+// formattable to string
+template <typename ScalarType, typename T = typename ScalarType::TypeClass,
+ typename Formatter = internal::StringFormatter<T>,
+ // note: Value unused but necessary to trigger SFINAE if Formatter is
+ // undefined
+ typename Value = typename Formatter::value_type>
+Status CastImpl(const ScalarType& from, StringScalar* to) {
+ to->value = FormatToBuffer(Formatter{from.type}, from);
+ return Status::OK();
+}
+
Status CastImpl(const Decimal128Scalar& from, StringScalar* to) {
auto from_type = checked_cast<const Decimal128Type*>(from.type.get());
to->value = Buffer::FromString(from.value.ToString(from_type->scale()));
@@ -576,84 +576,84 @@ Status CastImpl(const StructScalar& from, StringScalar* to) {
return Status::OK();
}
-struct CastImplVisitor {
- Status NotImplemented() {
- return Status::NotImplemented("cast to ", *to_type_, " from ", *from_.type);
- }
-
- const Scalar& from_;
- const std::shared_ptr<DataType>& to_type_;
- Scalar* out_;
-};
-
-template <typename ToType>
-struct FromTypeVisitor : CastImplVisitor {
- using ToScalar = typename TypeTraits<ToType>::ScalarType;
-
- FromTypeVisitor(const Scalar& from, const std::shared_ptr<DataType>& to_type,
- Scalar* out)
- : CastImplVisitor{from, to_type, out} {}
-
- template <typename FromType>
- Status Visit(const FromType&) {
- return CastImpl(checked_cast<const typename TypeTraits<FromType>::ScalarType&>(from_),
- checked_cast<ToScalar*>(out_));
- }
-
- // identity cast only for parameter free types
- template <typename T1 = ToType>
- typename std::enable_if<TypeTraits<T1>::is_parameter_free, Status>::type Visit(
- const ToType&) {
- checked_cast<ToScalar*>(out_)->value = checked_cast<const ToScalar&>(from_).value;
- return Status::OK();
- }
-
- Status Visit(const NullType&) { return NotImplemented(); }
- Status Visit(const SparseUnionType&) { return NotImplemented(); }
- Status Visit(const DenseUnionType&) { return NotImplemented(); }
- Status Visit(const DictionaryType&) { return NotImplemented(); }
- Status Visit(const ExtensionType&) { return NotImplemented(); }
-};
-
-struct ToTypeVisitor : CastImplVisitor {
- ToTypeVisitor(const Scalar& from, const std::shared_ptr<DataType>& to_type, Scalar* out)
- : CastImplVisitor{from, to_type, out} {}
-
- template <typename ToType>
- Status Visit(const ToType&) {
- FromTypeVisitor<ToType> unpack_from_type{from_, to_type_, out_};
- return VisitTypeInline(*from_.type, &unpack_from_type);
- }
-
- Status Visit(const NullType&) {
- if (from_.is_valid) {
- return Status::Invalid("attempting to cast non-null scalar to NullScalar");
- }
- return Status::OK();
- }
-
- Status Visit(const DictionaryType& dict_type) {
- auto& out = checked_cast<DictionaryScalar*>(out_)->value;
- ARROW_ASSIGN_OR_RAISE(auto cast_value, from_.CastTo(dict_type.value_type()));
- ARROW_ASSIGN_OR_RAISE(out.dictionary, MakeArrayFromScalar(*cast_value, 1));
- return Int32Scalar(0).CastTo(dict_type.index_type()).Value(&out.index);
- }
-
- Status Visit(const SparseUnionType&) { return NotImplemented(); }
- Status Visit(const DenseUnionType&) { return NotImplemented(); }
- Status Visit(const ExtensionType&) { return NotImplemented(); }
-};
-
-} // namespace
-
-Result<std::shared_ptr<Scalar>> Scalar::CastTo(std::shared_ptr<DataType> to) const {
- std::shared_ptr<Scalar> out = MakeNullScalar(to);
- if (is_valid) {
- out->is_valid = true;
- ToTypeVisitor unpack_to_type{*this, to, out.get()};
- RETURN_NOT_OK(VisitTypeInline(*to, &unpack_to_type));
- }
- return out;
-}
-
-} // namespace arrow
+struct CastImplVisitor {
+ Status NotImplemented() {
+ return Status::NotImplemented("cast to ", *to_type_, " from ", *from_.type);
+ }
+
+ const Scalar& from_;
+ const std::shared_ptr<DataType>& to_type_;
+ Scalar* out_;
+};
+
+template <typename ToType>
+struct FromTypeVisitor : CastImplVisitor {
+ using ToScalar = typename TypeTraits<ToType>::ScalarType;
+
+ FromTypeVisitor(const Scalar& from, const std::shared_ptr<DataType>& to_type,
+ Scalar* out)
+ : CastImplVisitor{from, to_type, out} {}
+
+ template <typename FromType>
+ Status Visit(const FromType&) {
+ return CastImpl(checked_cast<const typename TypeTraits<FromType>::ScalarType&>(from_),
+ checked_cast<ToScalar*>(out_));
+ }
+
+ // identity cast only for parameter free types
+ template <typename T1 = ToType>
+ typename std::enable_if<TypeTraits<T1>::is_parameter_free, Status>::type Visit(
+ const ToType&) {
+ checked_cast<ToScalar*>(out_)->value = checked_cast<const ToScalar&>(from_).value;
+ return Status::OK();
+ }
+
+ Status Visit(const NullType&) { return NotImplemented(); }
+ Status Visit(const SparseUnionType&) { return NotImplemented(); }
+ Status Visit(const DenseUnionType&) { return NotImplemented(); }
+ Status Visit(const DictionaryType&) { return NotImplemented(); }
+ Status Visit(const ExtensionType&) { return NotImplemented(); }
+};
+
+struct ToTypeVisitor : CastImplVisitor {
+ ToTypeVisitor(const Scalar& from, const std::shared_ptr<DataType>& to_type, Scalar* out)
+ : CastImplVisitor{from, to_type, out} {}
+
+ template <typename ToType>
+ Status Visit(const ToType&) {
+ FromTypeVisitor<ToType> unpack_from_type{from_, to_type_, out_};
+ return VisitTypeInline(*from_.type, &unpack_from_type);
+ }
+
+ Status Visit(const NullType&) {
+ if (from_.is_valid) {
+ return Status::Invalid("attempting to cast non-null scalar to NullScalar");
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& dict_type) {
+ auto& out = checked_cast<DictionaryScalar*>(out_)->value;
+ ARROW_ASSIGN_OR_RAISE(auto cast_value, from_.CastTo(dict_type.value_type()));
+ ARROW_ASSIGN_OR_RAISE(out.dictionary, MakeArrayFromScalar(*cast_value, 1));
+ return Int32Scalar(0).CastTo(dict_type.index_type()).Value(&out.index);
+ }
+
+ Status Visit(const SparseUnionType&) { return NotImplemented(); }
+ Status Visit(const DenseUnionType&) { return NotImplemented(); }
+ Status Visit(const ExtensionType&) { return NotImplemented(); }
+};
+
+} // namespace
+
+Result<std::shared_ptr<Scalar>> Scalar::CastTo(std::shared_ptr<DataType> to) const {
+ std::shared_ptr<Scalar> out = MakeNullScalar(to);
+ if (is_valid) {
+ out->is_valid = true;
+ ToTypeVisitor unpack_to_type{*this, to, out.get()};
+ RETURN_NOT_OK(VisitTypeInline(*to, &unpack_to_type));
+ }
+ return out;
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
index 24744859686..6a707e11174 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
@@ -1,355 +1,355 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Object model for scalar (non-Array) values. Not intended for use with large
-// amounts of data
-//
-// NOTE: This API is experimental as of the 0.13 version and subject to change
-// without deprecation warnings
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/compare.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-
-/// \brief Base class for scalar values
-///
-/// A Scalar represents a single value with a specific DataType.
-/// Scalars are useful for passing single value inputs to compute functions,
-/// or for representing individual array elements (with a non-trivial
-/// wrapping cost, though).
-struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
- virtual ~Scalar() = default;
-
- explicit Scalar(std::shared_ptr<DataType> type) : type(std::move(type)) {}
-
- /// \brief The type of the scalar value
- std::shared_ptr<DataType> type;
-
- /// \brief Whether the value is valid (not null) or not
- bool is_valid = false;
-
- using util::EqualityComparable<Scalar>::operator==;
- using util::EqualityComparable<Scalar>::Equals;
- bool Equals(const Scalar& other,
- const EqualOptions& options = EqualOptions::Defaults()) const;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Object model for scalar (non-Array) values. Not intended for use with large
+// amounts of data
+//
+// NOTE: This API is experimental as of the 0.13 version and subject to change
+// without deprecation warnings
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+/// \brief Base class for scalar values
+///
+/// A Scalar represents a single value with a specific DataType.
+/// Scalars are useful for passing single value inputs to compute functions,
+/// or for representing individual array elements (with a non-trivial
+/// wrapping cost, though).
+struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
+ virtual ~Scalar() = default;
+
+ explicit Scalar(std::shared_ptr<DataType> type) : type(std::move(type)) {}
+
+ /// \brief The type of the scalar value
+ std::shared_ptr<DataType> type;
+
+ /// \brief Whether the value is valid (not null) or not
+ bool is_valid = false;
+
+ using util::EqualityComparable<Scalar>::operator==;
+ using util::EqualityComparable<Scalar>::Equals;
+ bool Equals(const Scalar& other,
+ const EqualOptions& options = EqualOptions::Defaults()) const;
+
bool ApproxEquals(const Scalar& other,
const EqualOptions& options = EqualOptions::Defaults()) const;
- struct ARROW_EXPORT Hash {
+ struct ARROW_EXPORT Hash {
size_t operator()(const Scalar& scalar) const { return scalar.hash(); }
-
- size_t operator()(const std::shared_ptr<Scalar>& scalar) const {
+
+ size_t operator()(const std::shared_ptr<Scalar>& scalar) const {
return scalar->hash();
- }
- };
-
+ }
+ };
+
size_t hash() const;
- std::string ToString() const;
-
- static Result<std::shared_ptr<Scalar>> Parse(const std::shared_ptr<DataType>& type,
- util::string_view repr);
-
- // TODO(bkietz) add compute::CastOptions
- Result<std::shared_ptr<Scalar>> CastTo(std::shared_ptr<DataType> to) const;
-
- protected:
- Scalar(std::shared_ptr<DataType> type, bool is_valid)
- : type(std::move(type)), is_valid(is_valid) {}
-};
-
-/// \defgroup concrete-scalar-classes Concrete Scalar subclasses
-///
-/// @{
-
-/// \brief A scalar value for NullType. Never valid
-struct ARROW_EXPORT NullScalar : public Scalar {
- public:
- using TypeClass = NullType;
-
- NullScalar() : Scalar{null(), false} {}
-};
-
-/// @}
-
-namespace internal {
-
-struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
- using Scalar::Scalar;
- virtual void* mutable_data() = 0;
- virtual const void* data() const = 0;
-};
-
-template <typename T, typename CType = typename T::c_type>
-struct ARROW_EXPORT PrimitiveScalar : public PrimitiveScalarBase {
- using PrimitiveScalarBase::PrimitiveScalarBase;
- using TypeClass = T;
- using ValueType = CType;
-
- // Non-null constructor.
- PrimitiveScalar(ValueType value, std::shared_ptr<DataType> type)
- : PrimitiveScalarBase(std::move(type), true), value(value) {}
-
- explicit PrimitiveScalar(std::shared_ptr<DataType> type)
- : PrimitiveScalarBase(std::move(type), false) {}
-
- ValueType value{};
-
- void* mutable_data() override { return &value; }
- const void* data() const override { return &value; }
-};
-
-} // namespace internal
-
-/// \addtogroup concrete-scalar-classes Concrete Scalar subclasses
-///
-/// @{
-
-struct ARROW_EXPORT BooleanScalar : public internal::PrimitiveScalar<BooleanType, bool> {
- using Base = internal::PrimitiveScalar<BooleanType, bool>;
- using Base::Base;
-
- explicit BooleanScalar(bool value) : Base(value, boolean()) {}
-
- BooleanScalar() : Base(boolean()) {}
-};
-
-template <typename T>
-struct NumericScalar : public internal::PrimitiveScalar<T> {
- using Base = typename internal::PrimitiveScalar<T>;
- using Base::Base;
- using TypeClass = typename Base::TypeClass;
- using ValueType = typename Base::ValueType;
-
- explicit NumericScalar(ValueType value)
- : Base(value, TypeTraits<T>::type_singleton()) {}
-
- NumericScalar() : Base(TypeTraits<T>::type_singleton()) {}
-};
-
-struct ARROW_EXPORT Int8Scalar : public NumericScalar<Int8Type> {
- using NumericScalar<Int8Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT Int16Scalar : public NumericScalar<Int16Type> {
- using NumericScalar<Int16Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT Int32Scalar : public NumericScalar<Int32Type> {
- using NumericScalar<Int32Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT Int64Scalar : public NumericScalar<Int64Type> {
- using NumericScalar<Int64Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT UInt8Scalar : public NumericScalar<UInt8Type> {
- using NumericScalar<UInt8Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT UInt16Scalar : public NumericScalar<UInt16Type> {
- using NumericScalar<UInt16Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT UInt32Scalar : public NumericScalar<UInt32Type> {
- using NumericScalar<UInt32Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT UInt64Scalar : public NumericScalar<UInt64Type> {
- using NumericScalar<UInt64Type>::NumericScalar;
-};
-
-struct ARROW_EXPORT HalfFloatScalar : public NumericScalar<HalfFloatType> {
- using NumericScalar<HalfFloatType>::NumericScalar;
-};
-
-struct ARROW_EXPORT FloatScalar : public NumericScalar<FloatType> {
- using NumericScalar<FloatType>::NumericScalar;
-};
-
-struct ARROW_EXPORT DoubleScalar : public NumericScalar<DoubleType> {
- using NumericScalar<DoubleType>::NumericScalar;
-};
-
-struct ARROW_EXPORT BaseBinaryScalar : public Scalar {
- using Scalar::Scalar;
- using ValueType = std::shared_ptr<Buffer>;
-
- std::shared_ptr<Buffer> value;
-
- protected:
- BaseBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
- : Scalar{std::move(type), true}, value(std::move(value)) {}
-};
-
-struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar {
- using BaseBinaryScalar::BaseBinaryScalar;
- using TypeClass = BinaryType;
-
- BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
- : BaseBinaryScalar(std::move(value), std::move(type)) {}
-
- explicit BinaryScalar(std::shared_ptr<Buffer> value)
- : BinaryScalar(std::move(value), binary()) {}
-
- BinaryScalar() : BinaryScalar(binary()) {}
-};
-
-struct ARROW_EXPORT StringScalar : public BinaryScalar {
- using BinaryScalar::BinaryScalar;
- using TypeClass = StringType;
-
- explicit StringScalar(std::shared_ptr<Buffer> value)
- : StringScalar(std::move(value), utf8()) {}
-
- explicit StringScalar(std::string s);
-
- StringScalar() : StringScalar(utf8()) {}
-};
-
-struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar {
- using BaseBinaryScalar::BaseBinaryScalar;
- using TypeClass = LargeBinaryType;
-
- LargeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
- : BaseBinaryScalar(std::move(value), std::move(type)) {}
-
- explicit LargeBinaryScalar(std::shared_ptr<Buffer> value)
- : LargeBinaryScalar(std::move(value), large_binary()) {}
-
- LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {}
-};
-
-struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
- using LargeBinaryScalar::LargeBinaryScalar;
- using TypeClass = LargeStringType;
-
- explicit LargeStringScalar(std::shared_ptr<Buffer> value)
- : LargeStringScalar(std::move(value), large_utf8()) {}
-
- explicit LargeStringScalar(std::string s);
-
- LargeStringScalar() : LargeStringScalar(large_utf8()) {}
-};
-
-struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
- using TypeClass = FixedSizeBinaryType;
-
- FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type);
-
- explicit FixedSizeBinaryScalar(std::shared_ptr<DataType> type) : BinaryScalar(type) {}
-};
-
-template <typename T>
-struct ARROW_EXPORT TemporalScalar : internal::PrimitiveScalar<T> {
- using internal::PrimitiveScalar<T>::PrimitiveScalar;
- using ValueType = typename TemporalScalar<T>::ValueType;
-
- explicit TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
- : internal::PrimitiveScalar<T>(std::move(value), type) {}
-};
-
-template <typename T>
-struct ARROW_EXPORT DateScalar : public TemporalScalar<T> {
- using TemporalScalar<T>::TemporalScalar;
- using ValueType = typename TemporalScalar<T>::ValueType;
-
- explicit DateScalar(ValueType value)
- : TemporalScalar<T>(std::move(value), TypeTraits<T>::type_singleton()) {}
- DateScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
-};
-
-struct ARROW_EXPORT Date32Scalar : public DateScalar<Date32Type> {
- using DateScalar<Date32Type>::DateScalar;
-};
-
-struct ARROW_EXPORT Date64Scalar : public DateScalar<Date64Type> {
- using DateScalar<Date64Type>::DateScalar;
-};
-
-template <typename T>
-struct ARROW_EXPORT TimeScalar : public TemporalScalar<T> {
- using TemporalScalar<T>::TemporalScalar;
-};
-
-struct ARROW_EXPORT Time32Scalar : public TimeScalar<Time32Type> {
- using TimeScalar<Time32Type>::TimeScalar;
-};
-
-struct ARROW_EXPORT Time64Scalar : public TimeScalar<Time64Type> {
- using TimeScalar<Time64Type>::TimeScalar;
-};
-
-struct ARROW_EXPORT TimestampScalar : public TemporalScalar<TimestampType> {
- using TemporalScalar<TimestampType>::TemporalScalar;
-};
-
-template <typename T>
-struct ARROW_EXPORT IntervalScalar : public TemporalScalar<T> {
- using TemporalScalar<T>::TemporalScalar;
- using ValueType = typename TemporalScalar<T>::ValueType;
-
- explicit IntervalScalar(ValueType value)
- : TemporalScalar<T>(value, TypeTraits<T>::type_singleton()) {}
- IntervalScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
-};
-
-struct ARROW_EXPORT MonthIntervalScalar : public IntervalScalar<MonthIntervalType> {
- using IntervalScalar<MonthIntervalType>::IntervalScalar;
-};
-
-struct ARROW_EXPORT DayTimeIntervalScalar : public IntervalScalar<DayTimeIntervalType> {
- using IntervalScalar<DayTimeIntervalType>::IntervalScalar;
-};
-
-struct ARROW_EXPORT DurationScalar : public TemporalScalar<DurationType> {
- using TemporalScalar<DurationType>::TemporalScalar;
-};
-
-struct ARROW_EXPORT Decimal128Scalar : public Scalar {
- using Scalar::Scalar;
- using TypeClass = Decimal128Type;
- using ValueType = Decimal128;
-
- Decimal128Scalar(Decimal128 value, std::shared_ptr<DataType> type)
- : Scalar(std::move(type), true), value(value) {}
-
- Decimal128 value;
-};
-
+ std::string ToString() const;
+
+ static Result<std::shared_ptr<Scalar>> Parse(const std::shared_ptr<DataType>& type,
+ util::string_view repr);
+
+ // TODO(bkietz) add compute::CastOptions
+ Result<std::shared_ptr<Scalar>> CastTo(std::shared_ptr<DataType> to) const;
+
+ protected:
+ Scalar(std::shared_ptr<DataType> type, bool is_valid)
+ : type(std::move(type)), is_valid(is_valid) {}
+};
+
+/// \defgroup concrete-scalar-classes Concrete Scalar subclasses
+///
+/// @{
+
+/// \brief A scalar value for NullType. Never valid
+struct ARROW_EXPORT NullScalar : public Scalar {
+ public:
+ using TypeClass = NullType;
+
+ NullScalar() : Scalar{null(), false} {}
+};
+
+/// @}
+
+namespace internal {
+
+struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
+ using Scalar::Scalar;
+ virtual void* mutable_data() = 0;
+ virtual const void* data() const = 0;
+};
+
+template <typename T, typename CType = typename T::c_type>
+struct ARROW_EXPORT PrimitiveScalar : public PrimitiveScalarBase {
+ using PrimitiveScalarBase::PrimitiveScalarBase;
+ using TypeClass = T;
+ using ValueType = CType;
+
+ // Non-null constructor.
+ PrimitiveScalar(ValueType value, std::shared_ptr<DataType> type)
+ : PrimitiveScalarBase(std::move(type), true), value(value) {}
+
+ explicit PrimitiveScalar(std::shared_ptr<DataType> type)
+ : PrimitiveScalarBase(std::move(type), false) {}
+
+ ValueType value{};
+
+ void* mutable_data() override { return &value; }
+ const void* data() const override { return &value; }
+};
+
+} // namespace internal
+
+/// \addtogroup concrete-scalar-classes Concrete Scalar subclasses
+///
+/// @{
+
+struct ARROW_EXPORT BooleanScalar : public internal::PrimitiveScalar<BooleanType, bool> {
+ using Base = internal::PrimitiveScalar<BooleanType, bool>;
+ using Base::Base;
+
+ explicit BooleanScalar(bool value) : Base(value, boolean()) {}
+
+ BooleanScalar() : Base(boolean()) {}
+};
+
+template <typename T>
+struct NumericScalar : public internal::PrimitiveScalar<T> {
+ using Base = typename internal::PrimitiveScalar<T>;
+ using Base::Base;
+ using TypeClass = typename Base::TypeClass;
+ using ValueType = typename Base::ValueType;
+
+ explicit NumericScalar(ValueType value)
+ : Base(value, TypeTraits<T>::type_singleton()) {}
+
+ NumericScalar() : Base(TypeTraits<T>::type_singleton()) {}
+};
+
+struct ARROW_EXPORT Int8Scalar : public NumericScalar<Int8Type> {
+ using NumericScalar<Int8Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT Int16Scalar : public NumericScalar<Int16Type> {
+ using NumericScalar<Int16Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT Int32Scalar : public NumericScalar<Int32Type> {
+ using NumericScalar<Int32Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT Int64Scalar : public NumericScalar<Int64Type> {
+ using NumericScalar<Int64Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT UInt8Scalar : public NumericScalar<UInt8Type> {
+ using NumericScalar<UInt8Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT UInt16Scalar : public NumericScalar<UInt16Type> {
+ using NumericScalar<UInt16Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT UInt32Scalar : public NumericScalar<UInt32Type> {
+ using NumericScalar<UInt32Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT UInt64Scalar : public NumericScalar<UInt64Type> {
+ using NumericScalar<UInt64Type>::NumericScalar;
+};
+
+struct ARROW_EXPORT HalfFloatScalar : public NumericScalar<HalfFloatType> {
+ using NumericScalar<HalfFloatType>::NumericScalar;
+};
+
+struct ARROW_EXPORT FloatScalar : public NumericScalar<FloatType> {
+ using NumericScalar<FloatType>::NumericScalar;
+};
+
+struct ARROW_EXPORT DoubleScalar : public NumericScalar<DoubleType> {
+ using NumericScalar<DoubleType>::NumericScalar;
+};
+
+struct ARROW_EXPORT BaseBinaryScalar : public Scalar {
+ using Scalar::Scalar;
+ using ValueType = std::shared_ptr<Buffer>;
+
+ std::shared_ptr<Buffer> value;
+
+ protected:
+ BaseBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
+ : Scalar{std::move(type), true}, value(std::move(value)) {}
+};
+
+struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar {
+ using BaseBinaryScalar::BaseBinaryScalar;
+ using TypeClass = BinaryType;
+
+ BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(value), std::move(type)) {}
+
+ explicit BinaryScalar(std::shared_ptr<Buffer> value)
+ : BinaryScalar(std::move(value), binary()) {}
+
+ BinaryScalar() : BinaryScalar(binary()) {}
+};
+
+struct ARROW_EXPORT StringScalar : public BinaryScalar {
+ using BinaryScalar::BinaryScalar;
+ using TypeClass = StringType;
+
+ explicit StringScalar(std::shared_ptr<Buffer> value)
+ : StringScalar(std::move(value), utf8()) {}
+
+ explicit StringScalar(std::string s);
+
+ StringScalar() : StringScalar(utf8()) {}
+};
+
+struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar {
+ using BaseBinaryScalar::BaseBinaryScalar;
+ using TypeClass = LargeBinaryType;
+
+ LargeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
+ : BaseBinaryScalar(std::move(value), std::move(type)) {}
+
+ explicit LargeBinaryScalar(std::shared_ptr<Buffer> value)
+ : LargeBinaryScalar(std::move(value), large_binary()) {}
+
+ LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {}
+};
+
+struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
+ using LargeBinaryScalar::LargeBinaryScalar;
+ using TypeClass = LargeStringType;
+
+ explicit LargeStringScalar(std::shared_ptr<Buffer> value)
+ : LargeStringScalar(std::move(value), large_utf8()) {}
+
+ explicit LargeStringScalar(std::string s);
+
+ LargeStringScalar() : LargeStringScalar(large_utf8()) {}
+};
+
+struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
+ using TypeClass = FixedSizeBinaryType;
+
+ FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type);
+
+ explicit FixedSizeBinaryScalar(std::shared_ptr<DataType> type) : BinaryScalar(type) {}
+};
+
+template <typename T>
+struct ARROW_EXPORT TemporalScalar : internal::PrimitiveScalar<T> {
+ using internal::PrimitiveScalar<T>::PrimitiveScalar;
+ using ValueType = typename TemporalScalar<T>::ValueType;
+
+ explicit TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
+ : internal::PrimitiveScalar<T>(std::move(value), type) {}
+};
+
+template <typename T>
+struct ARROW_EXPORT DateScalar : public TemporalScalar<T> {
+ using TemporalScalar<T>::TemporalScalar;
+ using ValueType = typename TemporalScalar<T>::ValueType;
+
+ explicit DateScalar(ValueType value)
+ : TemporalScalar<T>(std::move(value), TypeTraits<T>::type_singleton()) {}
+ DateScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
+};
+
+struct ARROW_EXPORT Date32Scalar : public DateScalar<Date32Type> {
+ using DateScalar<Date32Type>::DateScalar;
+};
+
+struct ARROW_EXPORT Date64Scalar : public DateScalar<Date64Type> {
+ using DateScalar<Date64Type>::DateScalar;
+};
+
+template <typename T>
+struct ARROW_EXPORT TimeScalar : public TemporalScalar<T> {
+ using TemporalScalar<T>::TemporalScalar;
+};
+
+struct ARROW_EXPORT Time32Scalar : public TimeScalar<Time32Type> {
+ using TimeScalar<Time32Type>::TimeScalar;
+};
+
+struct ARROW_EXPORT Time64Scalar : public TimeScalar<Time64Type> {
+ using TimeScalar<Time64Type>::TimeScalar;
+};
+
+struct ARROW_EXPORT TimestampScalar : public TemporalScalar<TimestampType> {
+ using TemporalScalar<TimestampType>::TemporalScalar;
+};
+
+template <typename T>
+struct ARROW_EXPORT IntervalScalar : public TemporalScalar<T> {
+ using TemporalScalar<T>::TemporalScalar;
+ using ValueType = typename TemporalScalar<T>::ValueType;
+
+ explicit IntervalScalar(ValueType value)
+ : TemporalScalar<T>(value, TypeTraits<T>::type_singleton()) {}
+ IntervalScalar() : TemporalScalar<T>(TypeTraits<T>::type_singleton()) {}
+};
+
+struct ARROW_EXPORT MonthIntervalScalar : public IntervalScalar<MonthIntervalType> {
+ using IntervalScalar<MonthIntervalType>::IntervalScalar;
+};
+
+struct ARROW_EXPORT DayTimeIntervalScalar : public IntervalScalar<DayTimeIntervalType> {
+ using IntervalScalar<DayTimeIntervalType>::IntervalScalar;
+};
+
+struct ARROW_EXPORT DurationScalar : public TemporalScalar<DurationType> {
+ using TemporalScalar<DurationType>::TemporalScalar;
+};
+
+struct ARROW_EXPORT Decimal128Scalar : public Scalar {
+ using Scalar::Scalar;
+ using TypeClass = Decimal128Type;
+ using ValueType = Decimal128;
+
+ Decimal128Scalar(Decimal128 value, std::shared_ptr<DataType> type)
+ : Scalar(std::move(type), true), value(value) {}
+
+ Decimal128 value;
+};
+
struct ARROW_EXPORT Decimal256Scalar : public Scalar {
using Scalar::Scalar;
using TypeClass = Decimal256Type;
@@ -361,177 +361,177 @@ struct ARROW_EXPORT Decimal256Scalar : public Scalar {
Decimal256 value;
};
-struct ARROW_EXPORT BaseListScalar : public Scalar {
- using Scalar::Scalar;
- using ValueType = std::shared_ptr<Array>;
-
- BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
-
- std::shared_ptr<Array> value;
-};
-
-struct ARROW_EXPORT ListScalar : public BaseListScalar {
- using TypeClass = ListType;
- using BaseListScalar::BaseListScalar;
-
- explicit ListScalar(std::shared_ptr<Array> value);
-};
-
-struct ARROW_EXPORT LargeListScalar : public BaseListScalar {
- using TypeClass = LargeListType;
- using BaseListScalar::BaseListScalar;
-
- explicit LargeListScalar(std::shared_ptr<Array> value);
-};
-
-struct ARROW_EXPORT MapScalar : public BaseListScalar {
- using TypeClass = MapType;
- using BaseListScalar::BaseListScalar;
-
- explicit MapScalar(std::shared_ptr<Array> value);
-};
-
-struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
- using TypeClass = FixedSizeListType;
- using BaseListScalar::BaseListScalar;
-
- FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
-
- explicit FixedSizeListScalar(std::shared_ptr<Array> value);
-};
-
-struct ARROW_EXPORT StructScalar : public Scalar {
- using TypeClass = StructType;
- using ValueType = std::vector<std::shared_ptr<Scalar>>;
-
- ScalarVector value;
-
- Result<std::shared_ptr<Scalar>> field(FieldRef ref) const;
-
- StructScalar(ValueType value, std::shared_ptr<DataType> type)
- : Scalar(std::move(type), true), value(std::move(value)) {}
-
+struct ARROW_EXPORT BaseListScalar : public Scalar {
+ using Scalar::Scalar;
+ using ValueType = std::shared_ptr<Array>;
+
+ BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+
+ std::shared_ptr<Array> value;
+};
+
+struct ARROW_EXPORT ListScalar : public BaseListScalar {
+ using TypeClass = ListType;
+ using BaseListScalar::BaseListScalar;
+
+ explicit ListScalar(std::shared_ptr<Array> value);
+};
+
+struct ARROW_EXPORT LargeListScalar : public BaseListScalar {
+ using TypeClass = LargeListType;
+ using BaseListScalar::BaseListScalar;
+
+ explicit LargeListScalar(std::shared_ptr<Array> value);
+};
+
+struct ARROW_EXPORT MapScalar : public BaseListScalar {
+ using TypeClass = MapType;
+ using BaseListScalar::BaseListScalar;
+
+ explicit MapScalar(std::shared_ptr<Array> value);
+};
+
+struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
+ using TypeClass = FixedSizeListType;
+ using BaseListScalar::BaseListScalar;
+
+ FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+
+ explicit FixedSizeListScalar(std::shared_ptr<Array> value);
+};
+
+struct ARROW_EXPORT StructScalar : public Scalar {
+ using TypeClass = StructType;
+ using ValueType = std::vector<std::shared_ptr<Scalar>>;
+
+ ScalarVector value;
+
+ Result<std::shared_ptr<Scalar>> field(FieldRef ref) const;
+
+ StructScalar(ValueType value, std::shared_ptr<DataType> type)
+ : Scalar(std::move(type), true), value(std::move(value)) {}
+
static Result<std::shared_ptr<StructScalar>> Make(ValueType value,
std::vector<std::string> field_names);
- explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
-};
-
-struct ARROW_EXPORT UnionScalar : public Scalar {
- using Scalar::Scalar;
- using ValueType = std::shared_ptr<Scalar>;
- ValueType value;
-
- UnionScalar(ValueType value, std::shared_ptr<DataType> type)
- : Scalar(std::move(type), true), value(std::move(value)) {}
-};
-
-struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
- using UnionScalar::UnionScalar;
- using TypeClass = SparseUnionType;
-};
-
-struct ARROW_EXPORT DenseUnionScalar : public UnionScalar {
- using UnionScalar::UnionScalar;
- using TypeClass = DenseUnionType;
-};
-
-struct ARROW_EXPORT DictionaryScalar : public Scalar {
- using TypeClass = DictionaryType;
- struct ValueType {
- std::shared_ptr<Scalar> index;
- std::shared_ptr<Array> dictionary;
- } value;
-
- explicit DictionaryScalar(std::shared_ptr<DataType> type);
-
- DictionaryScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
- : Scalar(std::move(type), is_valid), value(std::move(value)) {}
-
+ explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
+};
+
+struct ARROW_EXPORT UnionScalar : public Scalar {
+ using Scalar::Scalar;
+ using ValueType = std::shared_ptr<Scalar>;
+ ValueType value;
+
+ UnionScalar(ValueType value, std::shared_ptr<DataType> type)
+ : Scalar(std::move(type), true), value(std::move(value)) {}
+};
+
+struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
+ using UnionScalar::UnionScalar;
+ using TypeClass = SparseUnionType;
+};
+
+struct ARROW_EXPORT DenseUnionScalar : public UnionScalar {
+ using UnionScalar::UnionScalar;
+ using TypeClass = DenseUnionType;
+};
+
+struct ARROW_EXPORT DictionaryScalar : public Scalar {
+ using TypeClass = DictionaryType;
+ struct ValueType {
+ std::shared_ptr<Scalar> index;
+ std::shared_ptr<Array> dictionary;
+ } value;
+
+ explicit DictionaryScalar(std::shared_ptr<DataType> type);
+
+ DictionaryScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
+ : Scalar(std::move(type), is_valid), value(std::move(value)) {}
+
static std::shared_ptr<DictionaryScalar> Make(std::shared_ptr<Scalar> index,
std::shared_ptr<Array> dict);
- Result<std::shared_ptr<Scalar>> GetEncodedValue() const;
-};
-
-struct ARROW_EXPORT ExtensionScalar : public Scalar {
- using Scalar::Scalar;
- using TypeClass = ExtensionType;
-};
-
-/// @}
-
-namespace internal {
-
-inline Status CheckBufferLength(...) { return Status::OK(); }
-
-ARROW_EXPORT Status CheckBufferLength(const FixedSizeBinaryType* t,
- const std::shared_ptr<Buffer>* b);
-
-} // namespace internal
-
-template <typename ValueRef>
-struct MakeScalarImpl {
- template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType,
- typename ValueType = typename ScalarType::ValueType,
- typename Enable = typename std::enable_if<
- std::is_constructible<ScalarType, ValueType,
- std::shared_ptr<DataType>>::value &&
- std::is_convertible<ValueRef, ValueType>::value>::type>
- Status Visit(const T& t) {
- ARROW_RETURN_NOT_OK(internal::CheckBufferLength(&t, &value_));
- out_ = std::make_shared<ScalarType>(
- static_cast<ValueType>(static_cast<ValueRef>(value_)), std::move(type_));
- return Status::OK();
- }
-
- Status Visit(const DataType& t) {
- return Status::NotImplemented("constructing scalars of type ", t,
- " from unboxed values");
- }
-
- Result<std::shared_ptr<Scalar>> Finish() && {
- ARROW_RETURN_NOT_OK(VisitTypeInline(*type_, this));
- return std::move(out_);
- }
-
- std::shared_ptr<DataType> type_;
- ValueRef value_;
- std::shared_ptr<Scalar> out_;
-};
-
-/// \defgroup scalar-factories Scalar factory functions
-///
-/// @{
-
-/// \brief Scalar factory for null scalars
-ARROW_EXPORT
-std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type);
-
-/// \brief Scalar factory for non-null scalars
-template <typename Value>
-Result<std::shared_ptr<Scalar>> MakeScalar(std::shared_ptr<DataType> type,
- Value&& value) {
- return MakeScalarImpl<Value&&>{type, std::forward<Value>(value), NULLPTR}.Finish();
-}
-
-/// \brief Type-inferring scalar factory for non-null scalars
-///
-/// Construct a Scalar instance with a DataType determined by the input C++ type.
-/// (for example Int8Scalar for a int8_t input).
-/// Only non-parametric primitive types and String are supported.
-template <typename Value, typename Traits = CTypeTraits<typename std::decay<Value>::type>,
- typename ScalarType = typename Traits::ScalarType,
- typename Enable = decltype(ScalarType(std::declval<Value>(),
- Traits::type_singleton()))>
-std::shared_ptr<Scalar> MakeScalar(Value value) {
- return std::make_shared<ScalarType>(std::move(value), Traits::type_singleton());
-}
-
-inline std::shared_ptr<Scalar> MakeScalar(std::string value) {
- return std::make_shared<StringScalar>(std::move(value));
-}
-
-/// @}
-
-} // namespace arrow
+ Result<std::shared_ptr<Scalar>> GetEncodedValue() const;
+};
+
+struct ARROW_EXPORT ExtensionScalar : public Scalar {
+ using Scalar::Scalar;
+ using TypeClass = ExtensionType;
+};
+
+/// @}
+
+namespace internal {
+
+inline Status CheckBufferLength(...) { return Status::OK(); }
+
+ARROW_EXPORT Status CheckBufferLength(const FixedSizeBinaryType* t,
+ const std::shared_ptr<Buffer>* b);
+
+} // namespace internal
+
+template <typename ValueRef>
+struct MakeScalarImpl {
+ template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType,
+ typename ValueType = typename ScalarType::ValueType,
+ typename Enable = typename std::enable_if<
+ std::is_constructible<ScalarType, ValueType,
+ std::shared_ptr<DataType>>::value &&
+ std::is_convertible<ValueRef, ValueType>::value>::type>
+ Status Visit(const T& t) {
+ ARROW_RETURN_NOT_OK(internal::CheckBufferLength(&t, &value_));
+ out_ = std::make_shared<ScalarType>(
+ static_cast<ValueType>(static_cast<ValueRef>(value_)), std::move(type_));
+ return Status::OK();
+ }
+
+ Status Visit(const DataType& t) {
+ return Status::NotImplemented("constructing scalars of type ", t,
+ " from unboxed values");
+ }
+
+ Result<std::shared_ptr<Scalar>> Finish() && {
+ ARROW_RETURN_NOT_OK(VisitTypeInline(*type_, this));
+ return std::move(out_);
+ }
+
+ std::shared_ptr<DataType> type_;
+ ValueRef value_;
+ std::shared_ptr<Scalar> out_;
+};
+
+/// \defgroup scalar-factories Scalar factory functions
+///
+/// @{
+
+/// \brief Scalar factory for null scalars
+ARROW_EXPORT
+std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type);
+
+/// \brief Scalar factory for non-null scalars
+template <typename Value>
+Result<std::shared_ptr<Scalar>> MakeScalar(std::shared_ptr<DataType> type,
+ Value&& value) {
+ return MakeScalarImpl<Value&&>{type, std::forward<Value>(value), NULLPTR}.Finish();
+}
+
+/// \brief Type-inferring scalar factory for non-null scalars
+///
+/// Construct a Scalar instance with a DataType determined by the input C++ type.
+/// (for example Int8Scalar for a int8_t input).
+/// Only non-parametric primitive types and String are supported.
+template <typename Value, typename Traits = CTypeTraits<typename std::decay<Value>::type>,
+ typename ScalarType = typename Traits::ScalarType,
+ typename Enable = decltype(ScalarType(std::declval<Value>(),
+ Traits::type_singleton()))>
+std::shared_ptr<Scalar> MakeScalar(Value value) {
+ return std::make_shared<ScalarType>(std::move(value), Traits::type_singleton());
+}
+
+inline std::shared_ptr<Scalar> MakeScalar(std::string value) {
+ return std::make_shared<StringScalar>(std::move(value));
+}
+
+/// @}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.cc b/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.cc
index 03d59c3d793..3e20327a7da 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.cc
@@ -1,478 +1,478 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/sparse_tensor.h"
-#include "arrow/tensor/converter.h"
-
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <numeric>
-
-#include "arrow/compare.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-// ----------------------------------------------------------------------
-// SparseIndex
-
-Status SparseIndex::ValidateShape(const std::vector<int64_t>& shape) const {
- if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x >= 0; })) {
- return Status::Invalid("Shape elements must be positive");
- }
-
- return Status::OK();
-}
-
-namespace internal {
-namespace {
-
-template <typename IndexValueType>
-Status CheckSparseIndexMaximumValue(const std::vector<int64_t>& shape) {
- using c_index_value_type = typename IndexValueType::c_type;
- constexpr int64_t type_max =
- static_cast<int64_t>(std::numeric_limits<c_index_value_type>::max());
- auto greater_than_type_max = [&](int64_t x) { return x > type_max; };
- if (std::any_of(shape.begin(), shape.end(), greater_than_type_max)) {
- return Status::Invalid("The bit width of the index value type is too small");
- }
- return Status::OK();
-}
-
-template <>
-Status CheckSparseIndexMaximumValue<Int64Type>(const std::vector<int64_t>& shape) {
- return Status::OK();
-}
-
-template <>
-Status CheckSparseIndexMaximumValue<UInt64Type>(const std::vector<int64_t>& shape) {
- return Status::Invalid("UInt64Type cannot be used as IndexValueType of SparseIndex");
-}
-
-} // namespace
-
-#define CALL_CHECK_MAXIMUM_VALUE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
- return CheckSparseIndexMaximumValue<TYPE_CLASS##Type>(shape);
-
-Status CheckSparseIndexMaximumValue(const std::shared_ptr<DataType>& index_value_type,
- const std::vector<int64_t>& shape) {
- switch (index_value_type->id()) {
- ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_CHECK_MAXIMUM_VALUE);
- default:
- return Status::TypeError("Unsupported SparseTensor index value type");
- }
-}
-
-#undef CALL_CHECK_MAXIMUM_VALUE
-
-Status MakeSparseTensorFromTensor(const Tensor& tensor,
- SparseTensorFormat::type sparse_format_id,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data) {
- switch (sparse_format_id) {
- case SparseTensorFormat::COO:
- return MakeSparseCOOTensorFromTensor(tensor, index_value_type, pool,
- out_sparse_index, out_data);
- case SparseTensorFormat::CSR:
- return MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis::ROW, tensor,
- index_value_type, pool, out_sparse_index,
- out_data);
- case SparseTensorFormat::CSC:
- return MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis::COLUMN, tensor,
- index_value_type, pool, out_sparse_index,
- out_data);
- case SparseTensorFormat::CSF:
- return MakeSparseCSFTensorFromTensor(tensor, index_value_type, pool,
- out_sparse_index, out_data);
-
- // LCOV_EXCL_START: ignore program failure
- default:
- return Status::Invalid("Invalid sparse tensor format");
- // LCOV_EXCL_STOP
- }
-}
-
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// SparseCOOIndex
-
-namespace {
-
-inline Status CheckSparseCOOIndexValidity(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides) {
- if (!is_integer(type->id())) {
- return Status::TypeError("Type of SparseCOOIndex indices must be integer");
- }
- if (shape.size() != 2) {
- return Status::Invalid("SparseCOOIndex indices must be a matrix");
- }
-
- RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(type, shape));
-
- if (!internal::IsTensorStridesContiguous(type, shape, strides)) {
- return Status::Invalid("SparseCOOIndex indices must be contiguous");
- }
- return Status::OK();
-}
-
-void GetCOOIndexTensorRow(const std::shared_ptr<Tensor>& coords, const int64_t row,
- std::vector<int64_t>* out_index) {
- const auto& fw_index_value_type =
- internal::checked_cast<const FixedWidthType&>(*coords->type());
- const size_t indices_elsize = fw_index_value_type.bit_width() / CHAR_BIT;
-
- const auto& shape = coords->shape();
- const int64_t non_zero_length = shape[0];
- DCHECK(0 <= row && row < non_zero_length);
-
- const int64_t ndim = shape[1];
- out_index->resize(ndim);
-
- switch (indices_elsize) {
- case 1: // Int8, UInt8
- for (int64_t i = 0; i < ndim; ++i) {
- (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt8Type>({row, i}));
- }
- break;
- case 2: // Int16, UInt16
- for (int64_t i = 0; i < ndim; ++i) {
- (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt16Type>({row, i}));
- }
- break;
- case 4: // Int32, UInt32
- for (int64_t i = 0; i < ndim; ++i) {
- (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt32Type>({row, i}));
- }
- break;
- case 8: // Int64
- for (int64_t i = 0; i < ndim; ++i) {
- (*out_index)[i] = coords->Value<Int64Type>({row, i});
- }
- break;
- default:
- DCHECK(false) << "Must not reach here";
- break;
- }
-}
-
-bool DetectSparseCOOIndexCanonicality(const std::shared_ptr<Tensor>& coords) {
- DCHECK_EQ(coords->ndim(), 2);
-
- const auto& shape = coords->shape();
- const int64_t non_zero_length = shape[0];
- if (non_zero_length <= 1) return true;
-
- const int64_t ndim = shape[1];
- std::vector<int64_t> last_index, index;
- GetCOOIndexTensorRow(coords, 0, &last_index);
- for (int64_t i = 1; i < non_zero_length; ++i) {
- GetCOOIndexTensorRow(coords, i, &index);
- int64_t j = 0;
- while (j < ndim) {
- if (last_index[j] > index[j]) {
- // last_index > index, so we can detect non-canonical here
- return false;
- }
- if (last_index[j] < index[j]) {
- // last_index < index, so we can skip the remaining dimensions
- break;
- }
- ++j;
- }
- if (j == ndim) {
- // last_index == index, so we can detect non-canonical here
- return false;
- }
- swap(last_index, index);
- }
-
- return true;
-}
-
-} // namespace
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<Tensor>& coords, bool is_canonical) {
- RETURN_NOT_OK(
- CheckSparseCOOIndexValidity(coords->type(), coords->shape(), coords->strides()));
- return std::make_shared<SparseCOOIndex>(coords, is_canonical);
-}
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<Tensor>& coords) {
- RETURN_NOT_OK(
- CheckSparseCOOIndexValidity(coords->type(), coords->shape(), coords->strides()));
- auto is_canonical = DetectSparseCOOIndexCanonicality(coords);
- return std::make_shared<SparseCOOIndex>(coords, is_canonical);
-}
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shape,
- const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data,
- bool is_canonical) {
- RETURN_NOT_OK(
- CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides));
- return std::make_shared<SparseCOOIndex>(
- std::make_shared<Tensor>(indices_type, indices_data, indices_shape,
- indices_strides),
- is_canonical);
-}
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shape,
- const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data) {
- RETURN_NOT_OK(
- CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides));
- auto coords = std::make_shared<Tensor>(indices_type, indices_data, indices_shape,
- indices_strides);
- auto is_canonical = DetectSparseCOOIndexCanonicality(coords);
- return std::make_shared<SparseCOOIndex>(coords, is_canonical);
-}
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indices_data, bool is_canonical) {
- auto ndim = static_cast<int64_t>(shape.size());
- if (!is_integer(indices_type->id())) {
- return Status::TypeError("Type of SparseCOOIndex indices must be integer");
- }
- const int64_t elsize =
- internal::checked_cast<const IntegerType&>(*indices_type).bit_width() / 8;
- std::vector<int64_t> indices_shape({non_zero_length, ndim});
- std::vector<int64_t> indices_strides({elsize * ndim, elsize});
- return Make(indices_type, indices_shape, indices_strides, indices_data, is_canonical);
-}
-
-Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indices_data) {
- auto ndim = static_cast<int64_t>(shape.size());
- if (!is_integer(indices_type->id())) {
- return Status::TypeError("Type of SparseCOOIndex indices must be integer");
- }
- const int64_t elsize = internal::GetByteWidth(*indices_type);
- std::vector<int64_t> indices_shape({non_zero_length, ndim});
- std::vector<int64_t> indices_strides({elsize * ndim, elsize});
- return Make(indices_type, indices_shape, indices_strides, indices_data);
-}
-
-// Constructor with a contiguous NumericTensor
-SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<Tensor>& coords, bool is_canonical)
- : SparseIndexBase(), coords_(coords), is_canonical_(is_canonical) {
- ARROW_CHECK_OK(
- CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()));
-}
-
-std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); }
-
-// ----------------------------------------------------------------------
-// SparseCSXIndex
-
-namespace internal {
-
-Status ValidateSparseCSXIndex(const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape,
- const std::vector<int64_t>& indices_shape,
- char const* type_name) {
- if (!is_integer(indptr_type->id())) {
- return Status::TypeError("Type of ", type_name, " indptr must be integer");
- }
- if (indptr_shape.size() != 1) {
- return Status::Invalid(type_name, " indptr must be a vector");
- }
- if (!is_integer(indices_type->id())) {
- return Status::Invalid("Type of ", type_name, " indices must be integer");
- }
- if (indices_shape.size() != 1) {
- return Status::Invalid(type_name, " indices must be a vector");
- }
-
- RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, indptr_shape));
- RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, indices_shape));
-
- return Status::OK();
-}
-
-void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape,
- const std::vector<int64_t>& indices_shape,
- char const* type_name) {
- ARROW_CHECK_OK(ValidateSparseCSXIndex(indptr_type, indices_type, indptr_shape,
- indices_shape, type_name));
-}
-
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// SparseCSFIndex
-
-namespace {
-
-inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const int64_t num_indptrs,
- const int64_t num_indices,
- const int64_t axis_order_size) {
- if (!is_integer(indptr_type->id())) {
- return Status::TypeError("Type of SparseCSFIndex indptr must be integer");
- }
- if (!is_integer(indices_type->id())) {
- return Status::TypeError("Type of SparseCSFIndex indices must be integer");
- }
- if (num_indptrs + 1 != num_indices) {
- return Status::Invalid(
- "Length of indices must be equal to length of indptrs + 1 for SparseCSFIndex.");
- }
- if (axis_order_size != num_indices) {
- return Status::Invalid(
- "Length of indices must be equal to number of dimensions for SparseCSFIndex.");
- }
- return Status::OK();
-}
-
-} // namespace
-
-Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
- const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
- const std::vector<std::shared_ptr<Buffer>>& indptr_data,
- const std::vector<std::shared_ptr<Buffer>>& indices_data) {
- int64_t ndim = axis_order.size();
- std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
- std::vector<std::shared_ptr<Tensor>> indices(ndim);
-
- for (int64_t i = 0; i < ndim - 1; ++i)
- indptr[i] = std::make_shared<Tensor>(indptr_type, indptr_data[i],
- std::vector<int64_t>({indices_shapes[i] + 1}));
- for (int64_t i = 0; i < ndim; ++i)
- indices[i] = std::make_shared<Tensor>(indices_type, indices_data[i],
- std::vector<int64_t>({indices_shapes[i]}));
-
- RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(),
- indices.size(), axis_order.size()));
-
- for (auto tensor : indptr) {
- RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, tensor->shape()));
- }
-
- for (auto tensor : indices) {
- RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, tensor->shape()));
- }
-
- return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
-}
-
-// Constructor with two index vectors
-SparseCSFIndex::SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
- const std::vector<std::shared_ptr<Tensor>>& indices,
- const std::vector<int64_t>& axis_order)
- : SparseIndexBase(), indptr_(indptr), indices_(indices), axis_order_(axis_order) {
- ARROW_CHECK_OK(CheckSparseCSFIndexValidity(indptr_.front()->type(),
- indices_.front()->type(), indptr_.size(),
- indices_.size(), axis_order_.size()));
-}
-
-std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
-
-bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
- for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
- if (!indices()[i]->Equals(*other.indices()[i])) return false;
- }
- for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
- if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
- }
- return axis_order() == other.axis_order();
-}
-
-// ----------------------------------------------------------------------
-// SparseTensor
-
-// Constructor with all attributes
-SparseTensor::SparseTensor(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape,
- const std::shared_ptr<SparseIndex>& sparse_index,
- const std::vector<std::string>& dim_names)
- : type_(type),
- data_(data),
- shape_(shape),
- sparse_index_(sparse_index),
- dim_names_(dim_names) {
- ARROW_CHECK(is_tensor_supported(type->id()));
-}
-
-const std::string& SparseTensor::dim_name(int i) const {
- static const std::string kEmpty = "";
- if (dim_names_.size() == 0) {
- return kEmpty;
- } else {
- ARROW_CHECK_LT(i, static_cast<int>(dim_names_.size()));
- return dim_names_[i];
- }
-}
-
-int64_t SparseTensor::size() const {
- return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
-}
-
-bool SparseTensor::Equals(const SparseTensor& other, const EqualOptions& opts) const {
- return SparseTensorEquals(*this, other, opts);
-}
-
-Result<std::shared_ptr<Tensor>> SparseTensor::ToTensor(MemoryPool* pool) const {
- switch (format_id()) {
- case SparseTensorFormat::COO:
- return MakeTensorFromSparseCOOTensor(
- pool, internal::checked_cast<const SparseCOOTensor*>(this));
- break;
-
- case SparseTensorFormat::CSR:
- return MakeTensorFromSparseCSRMatrix(
- pool, internal::checked_cast<const SparseCSRMatrix*>(this));
- break;
-
- case SparseTensorFormat::CSC:
- return MakeTensorFromSparseCSCMatrix(
- pool, internal::checked_cast<const SparseCSCMatrix*>(this));
- break;
-
- case SparseTensorFormat::CSF:
- return MakeTensorFromSparseCSFTensor(
- pool, internal::checked_cast<const SparseCSFTensor*>(this));
-
- default:
- return Status::NotImplemented("Unsupported SparseIndex format type");
- }
-}
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/sparse_tensor.h"
+#include "arrow/tensor/converter.h"
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <numeric>
+
+#include "arrow/compare.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+// ----------------------------------------------------------------------
+// SparseIndex
+
+Status SparseIndex::ValidateShape(const std::vector<int64_t>& shape) const {
+ if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x >= 0; })) {
+ return Status::Invalid("Shape elements must be positive");
+ }
+
+ return Status::OK();
+}
+
+namespace internal {
+namespace {
+
+template <typename IndexValueType>
+Status CheckSparseIndexMaximumValue(const std::vector<int64_t>& shape) {
+ using c_index_value_type = typename IndexValueType::c_type;
+ constexpr int64_t type_max =
+ static_cast<int64_t>(std::numeric_limits<c_index_value_type>::max());
+ auto greater_than_type_max = [&](int64_t x) { return x > type_max; };
+ if (std::any_of(shape.begin(), shape.end(), greater_than_type_max)) {
+ return Status::Invalid("The bit width of the index value type is too small");
+ }
+ return Status::OK();
+}
+
+template <>
+Status CheckSparseIndexMaximumValue<Int64Type>(const std::vector<int64_t>& shape) {
+ return Status::OK();
+}
+
+template <>
+Status CheckSparseIndexMaximumValue<UInt64Type>(const std::vector<int64_t>& shape) {
+ return Status::Invalid("UInt64Type cannot be used as IndexValueType of SparseIndex");
+}
+
+} // namespace
+
+#define CALL_CHECK_MAXIMUM_VALUE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: \
+ return CheckSparseIndexMaximumValue<TYPE_CLASS##Type>(shape);
+
+Status CheckSparseIndexMaximumValue(const std::shared_ptr<DataType>& index_value_type,
+ const std::vector<int64_t>& shape) {
+ switch (index_value_type->id()) {
+ ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_CHECK_MAXIMUM_VALUE);
+ default:
+ return Status::TypeError("Unsupported SparseTensor index value type");
+ }
+}
+
+#undef CALL_CHECK_MAXIMUM_VALUE
+
+Status MakeSparseTensorFromTensor(const Tensor& tensor,
+ SparseTensorFormat::type sparse_format_id,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data) {
+ switch (sparse_format_id) {
+ case SparseTensorFormat::COO:
+ return MakeSparseCOOTensorFromTensor(tensor, index_value_type, pool,
+ out_sparse_index, out_data);
+ case SparseTensorFormat::CSR:
+ return MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis::ROW, tensor,
+ index_value_type, pool, out_sparse_index,
+ out_data);
+ case SparseTensorFormat::CSC:
+ return MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis::COLUMN, tensor,
+ index_value_type, pool, out_sparse_index,
+ out_data);
+ case SparseTensorFormat::CSF:
+ return MakeSparseCSFTensorFromTensor(tensor, index_value_type, pool,
+ out_sparse_index, out_data);
+
+ // LCOV_EXCL_START: ignore program failure
+ default:
+ return Status::Invalid("Invalid sparse tensor format");
+ // LCOV_EXCL_STOP
+ }
+}
+
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// SparseCOOIndex
+
+namespace {
+
+inline Status CheckSparseCOOIndexValidity(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides) {
+ if (!is_integer(type->id())) {
+ return Status::TypeError("Type of SparseCOOIndex indices must be integer");
+ }
+ if (shape.size() != 2) {
+ return Status::Invalid("SparseCOOIndex indices must be a matrix");
+ }
+
+ RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(type, shape));
+
+ if (!internal::IsTensorStridesContiguous(type, shape, strides)) {
+ return Status::Invalid("SparseCOOIndex indices must be contiguous");
+ }
+ return Status::OK();
+}
+
+void GetCOOIndexTensorRow(const std::shared_ptr<Tensor>& coords, const int64_t row,
+ std::vector<int64_t>* out_index) {
+ const auto& fw_index_value_type =
+ internal::checked_cast<const FixedWidthType&>(*coords->type());
+ const size_t indices_elsize = fw_index_value_type.bit_width() / CHAR_BIT;
+
+ const auto& shape = coords->shape();
+ const int64_t non_zero_length = shape[0];
+ DCHECK(0 <= row && row < non_zero_length);
+
+ const int64_t ndim = shape[1];
+ out_index->resize(ndim);
+
+ switch (indices_elsize) {
+ case 1: // Int8, UInt8
+ for (int64_t i = 0; i < ndim; ++i) {
+ (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt8Type>({row, i}));
+ }
+ break;
+ case 2: // Int16, UInt16
+ for (int64_t i = 0; i < ndim; ++i) {
+ (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt16Type>({row, i}));
+ }
+ break;
+ case 4: // Int32, UInt32
+ for (int64_t i = 0; i < ndim; ++i) {
+ (*out_index)[i] = static_cast<int64_t>(coords->Value<UInt32Type>({row, i}));
+ }
+ break;
+ case 8: // Int64
+ for (int64_t i = 0; i < ndim; ++i) {
+ (*out_index)[i] = coords->Value<Int64Type>({row, i});
+ }
+ break;
+ default:
+ DCHECK(false) << "Must not reach here";
+ break;
+ }
+}
+
+bool DetectSparseCOOIndexCanonicality(const std::shared_ptr<Tensor>& coords) {
+ DCHECK_EQ(coords->ndim(), 2);
+
+ const auto& shape = coords->shape();
+ const int64_t non_zero_length = shape[0];
+ if (non_zero_length <= 1) return true;
+
+ const int64_t ndim = shape[1];
+ std::vector<int64_t> last_index, index;
+ GetCOOIndexTensorRow(coords, 0, &last_index);
+ for (int64_t i = 1; i < non_zero_length; ++i) {
+ GetCOOIndexTensorRow(coords, i, &index);
+ int64_t j = 0;
+ while (j < ndim) {
+ if (last_index[j] > index[j]) {
+ // last_index > index, so we can detect non-canonical here
+ return false;
+ }
+ if (last_index[j] < index[j]) {
+ // last_index < index, so we can skip the remaining dimensions
+ break;
+ }
+ ++j;
+ }
+ if (j == ndim) {
+ // last_index == index, so we can detect non-canonical here
+ return false;
+ }
+ swap(last_index, index);
+ }
+
+ return true;
+}
+
+} // namespace
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<Tensor>& coords, bool is_canonical) {
+ RETURN_NOT_OK(
+ CheckSparseCOOIndexValidity(coords->type(), coords->shape(), coords->strides()));
+ return std::make_shared<SparseCOOIndex>(coords, is_canonical);
+}
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<Tensor>& coords) {
+ RETURN_NOT_OK(
+ CheckSparseCOOIndexValidity(coords->type(), coords->shape(), coords->strides()));
+ auto is_canonical = DetectSparseCOOIndexCanonicality(coords);
+ return std::make_shared<SparseCOOIndex>(coords, is_canonical);
+}
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shape,
+ const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data,
+ bool is_canonical) {
+ RETURN_NOT_OK(
+ CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides));
+ return std::make_shared<SparseCOOIndex>(
+ std::make_shared<Tensor>(indices_type, indices_data, indices_shape,
+ indices_strides),
+ is_canonical);
+}
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shape,
+ const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data) {
+ RETURN_NOT_OK(
+ CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides));
+ auto coords = std::make_shared<Tensor>(indices_type, indices_data, indices_shape,
+ indices_strides);
+ auto is_canonical = DetectSparseCOOIndexCanonicality(coords);
+ return std::make_shared<SparseCOOIndex>(coords, is_canonical);
+}
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indices_data, bool is_canonical) {
+ auto ndim = static_cast<int64_t>(shape.size());
+ if (!is_integer(indices_type->id())) {
+ return Status::TypeError("Type of SparseCOOIndex indices must be integer");
+ }
+ const int64_t elsize =
+ internal::checked_cast<const IntegerType&>(*indices_type).bit_width() / 8;
+ std::vector<int64_t> indices_shape({non_zero_length, ndim});
+ std::vector<int64_t> indices_strides({elsize * ndim, elsize});
+ return Make(indices_type, indices_shape, indices_strides, indices_data, is_canonical);
+}
+
+Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indices_data) {
+ auto ndim = static_cast<int64_t>(shape.size());
+ if (!is_integer(indices_type->id())) {
+ return Status::TypeError("Type of SparseCOOIndex indices must be integer");
+ }
+ const int64_t elsize = internal::GetByteWidth(*indices_type);
+ std::vector<int64_t> indices_shape({non_zero_length, ndim});
+ std::vector<int64_t> indices_strides({elsize * ndim, elsize});
+ return Make(indices_type, indices_shape, indices_strides, indices_data);
+}
+
+// Constructor with a contiguous NumericTensor
+SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<Tensor>& coords, bool is_canonical)
+ : SparseIndexBase(), coords_(coords), is_canonical_(is_canonical) {
+ ARROW_CHECK_OK(
+ CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()));
+}
+
+std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); }
+
+// ----------------------------------------------------------------------
+// SparseCSXIndex
+
+namespace internal {
+
+Status ValidateSparseCSXIndex(const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape,
+ const std::vector<int64_t>& indices_shape,
+ char const* type_name) {
+ if (!is_integer(indptr_type->id())) {
+ return Status::TypeError("Type of ", type_name, " indptr must be integer");
+ }
+ if (indptr_shape.size() != 1) {
+ return Status::Invalid(type_name, " indptr must be a vector");
+ }
+ if (!is_integer(indices_type->id())) {
+ return Status::Invalid("Type of ", type_name, " indices must be integer");
+ }
+ if (indices_shape.size() != 1) {
+ return Status::Invalid(type_name, " indices must be a vector");
+ }
+
+ RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, indptr_shape));
+ RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, indices_shape));
+
+ return Status::OK();
+}
+
+void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape,
+ const std::vector<int64_t>& indices_shape,
+ char const* type_name) {
+ ARROW_CHECK_OK(ValidateSparseCSXIndex(indptr_type, indices_type, indptr_shape,
+ indices_shape, type_name));
+}
+
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// SparseCSFIndex
+
+namespace {
+
+inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const int64_t num_indptrs,
+ const int64_t num_indices,
+ const int64_t axis_order_size) {
+ if (!is_integer(indptr_type->id())) {
+ return Status::TypeError("Type of SparseCSFIndex indptr must be integer");
+ }
+ if (!is_integer(indices_type->id())) {
+ return Status::TypeError("Type of SparseCSFIndex indices must be integer");
+ }
+ if (num_indptrs + 1 != num_indices) {
+ return Status::Invalid(
+ "Length of indices must be equal to length of indptrs + 1 for SparseCSFIndex.");
+ }
+ if (axis_order_size != num_indices) {
+ return Status::Invalid(
+ "Length of indices must be equal to number of dimensions for SparseCSFIndex.");
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
+ const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+ const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+ const std::vector<std::shared_ptr<Buffer>>& indices_data) {
+ int64_t ndim = axis_order.size();
+ std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
+ std::vector<std::shared_ptr<Tensor>> indices(ndim);
+
+ for (int64_t i = 0; i < ndim - 1; ++i)
+ indptr[i] = std::make_shared<Tensor>(indptr_type, indptr_data[i],
+ std::vector<int64_t>({indices_shapes[i] + 1}));
+ for (int64_t i = 0; i < ndim; ++i)
+ indices[i] = std::make_shared<Tensor>(indices_type, indices_data[i],
+ std::vector<int64_t>({indices_shapes[i]}));
+
+ RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(),
+ indices.size(), axis_order.size()));
+
+ for (auto tensor : indptr) {
+ RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, tensor->shape()));
+ }
+
+ for (auto tensor : indices) {
+ RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, tensor->shape()));
+ }
+
+ return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
+}
+
+// Constructor with two index vectors
+SparseCSFIndex::SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
+ const std::vector<std::shared_ptr<Tensor>>& indices,
+ const std::vector<int64_t>& axis_order)
+ : SparseIndexBase(), indptr_(indptr), indices_(indices), axis_order_(axis_order) {
+ ARROW_CHECK_OK(CheckSparseCSFIndexValidity(indptr_.front()->type(),
+ indices_.front()->type(), indptr_.size(),
+ indices_.size(), axis_order_.size()));
+}
+
+std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
+
+bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
+ for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
+ if (!indices()[i]->Equals(*other.indices()[i])) return false;
+ }
+ for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
+ if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
+ }
+ return axis_order() == other.axis_order();
+}
+
+// ----------------------------------------------------------------------
+// SparseTensor
+
+// Constructor with all attributes
+SparseTensor::SparseTensor(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape,
+ const std::shared_ptr<SparseIndex>& sparse_index,
+ const std::vector<std::string>& dim_names)
+ : type_(type),
+ data_(data),
+ shape_(shape),
+ sparse_index_(sparse_index),
+ dim_names_(dim_names) {
+ ARROW_CHECK(is_tensor_supported(type->id()));
+}
+
+const std::string& SparseTensor::dim_name(int i) const {
+ static const std::string kEmpty = "";
+ if (dim_names_.size() == 0) {
+ return kEmpty;
+ } else {
+ ARROW_CHECK_LT(i, static_cast<int>(dim_names_.size()));
+ return dim_names_[i];
+ }
+}
+
+int64_t SparseTensor::size() const {
+ return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
+}
+
+bool SparseTensor::Equals(const SparseTensor& other, const EqualOptions& opts) const {
+ return SparseTensorEquals(*this, other, opts);
+}
+
+Result<std::shared_ptr<Tensor>> SparseTensor::ToTensor(MemoryPool* pool) const {
+ switch (format_id()) {
+ case SparseTensorFormat::COO:
+ return MakeTensorFromSparseCOOTensor(
+ pool, internal::checked_cast<const SparseCOOTensor*>(this));
+ break;
+
+ case SparseTensorFormat::CSR:
+ return MakeTensorFromSparseCSRMatrix(
+ pool, internal::checked_cast<const SparseCSRMatrix*>(this));
+ break;
+
+ case SparseTensorFormat::CSC:
+ return MakeTensorFromSparseCSCMatrix(
+ pool, internal::checked_cast<const SparseCSCMatrix*>(this));
+ break;
+
+ case SparseTensorFormat::CSF:
+ return MakeTensorFromSparseCSFTensor(
+ pool, internal::checked_cast<const SparseCSFTensor*>(this));
+
+ default:
+ return Status::NotImplemented("Unsupported SparseIndex format type");
+ }
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.h b/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.h
index 1f2f8c0d82e..c0c0be8a224 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/sparse_tensor.h
@@ -1,624 +1,624 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/tensor.h" // IWYU pragma: export
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-namespace internal {
-
-ARROW_EXPORT
-Status CheckSparseIndexMaximumValue(const std::shared_ptr<DataType>& index_value_type,
- const std::vector<int64_t>& shape);
-
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// SparseIndex class
-
-struct SparseTensorFormat {
- /// EXPERIMENTAL: The index format type of SparseTensor
- enum type {
- /// Coordinate list (COO) format.
- COO,
- /// Compressed sparse row (CSR) format.
- CSR,
- /// Compressed sparse column (CSC) format.
- CSC,
- /// Compressed sparse fiber (CSF) format.
- CSF
- };
-};
-
-/// \brief EXPERIMENTAL: The base class for the index of a sparse tensor
-///
-/// SparseIndex describes where the non-zero elements are within a SparseTensor.
-///
-/// There are several ways to represent this. The format_id is used to
-/// distinguish what kind of representation is used. Each possible value of
-/// format_id must have only one corresponding concrete subclass of SparseIndex.
-class ARROW_EXPORT SparseIndex {
- public:
- explicit SparseIndex(SparseTensorFormat::type format_id) : format_id_(format_id) {}
-
- virtual ~SparseIndex() = default;
-
- /// \brief Return the identifier of the format type
- SparseTensorFormat::type format_id() const { return format_id_; }
-
- /// \brief Return the number of non zero values in the sparse tensor related
- /// to this sparse index
- virtual int64_t non_zero_length() const = 0;
-
- /// \brief Return the string representation of the sparse index
- virtual std::string ToString() const = 0;
-
- virtual Status ValidateShape(const std::vector<int64_t>& shape) const;
-
- protected:
- const SparseTensorFormat::type format_id_;
-};
-
-namespace internal {
-template <typename SparseIndexType>
-class SparseIndexBase : public SparseIndex {
- public:
- SparseIndexBase() : SparseIndex(SparseIndexType::format_id) {}
-};
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// SparseCOOIndex class
-
-/// \brief EXPERIMENTAL: The index data for a COO sparse tensor
-///
-/// A COO sparse index manages the location of its non-zero values by their
-/// coordinates.
-class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIndex> {
- public:
- static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO;
-
- /// \brief Make SparseCOOIndex from a coords tensor and canonicality
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<Tensor>& coords, bool is_canonical);
-
- /// \brief Make SparseCOOIndex from a coords tensor with canonicality auto-detection
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<Tensor>& coords);
-
- /// \brief Make SparseCOOIndex from raw properties with canonicality auto-detection
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shape,
- const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data);
-
- /// \brief Make SparseCOOIndex from raw properties
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shape,
- const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data,
- bool is_canonical);
-
- /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data
- /// with canonicality auto-detection
- ///
- /// The indices_data should be in row-major (C-like) order. If not,
- /// use the raw properties constructor.
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indices_data);
-
- /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data
- ///
- /// The indices_data should be in row-major (C-like) order. If not,
- /// use the raw properties constructor.
- static Result<std::shared_ptr<SparseCOOIndex>> Make(
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indices_data, bool is_canonical);
-
- /// \brief Construct SparseCOOIndex from column-major NumericTensor
- explicit SparseCOOIndex(const std::shared_ptr<Tensor>& coords, bool is_canonical);
-
- /// \brief Return a tensor that has the coordinates of the non-zero values
- ///
- /// The returned tensor is a N x D tensor where N is the number of non-zero
- /// values and D is the number of dimensions in the logical data.
- /// The column at index `i` is a D-tuple of coordinates indicating that the
- /// logical value at those coordinates should be found at physical index `i`.
- const std::shared_ptr<Tensor>& indices() const { return coords_; }
-
- /// \brief Return the number of non zero values in the sparse tensor related
- /// to this sparse index
- int64_t non_zero_length() const override { return coords_->shape()[0]; }
-
- /// \brief Return whether a sparse tensor index is canonical, or not.
- /// If a sparse tensor index is canonical, it is sorted in the lexicographical order,
- /// and the corresponding sparse tensor doesn't have duplicated entries.
- bool is_canonical() const { return is_canonical_; }
-
- /// \brief Return a string representation of the sparse index
- std::string ToString() const override;
-
- /// \brief Return whether the COO indices are equal
- bool Equals(const SparseCOOIndex& other) const {
- return indices()->Equals(*other.indices());
- }
-
- inline Status ValidateShape(const std::vector<int64_t>& shape) const override {
- ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape));
-
- if (static_cast<size_t>(coords_->shape()[1]) == shape.size()) {
- return Status::OK();
- }
-
- return Status::Invalid(
- "shape length is inconsistent with the coords matrix in COO index");
- }
-
- protected:
- std::shared_ptr<Tensor> coords_;
- bool is_canonical_;
-};
-
-namespace internal {
-
-/// EXPERIMENTAL: The axis to be compressed
-enum class SparseMatrixCompressedAxis : char {
- /// The value for CSR matrix
- ROW,
- /// The value for CSC matrix
- COLUMN
-};
-
-ARROW_EXPORT
-Status ValidateSparseCSXIndex(const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape,
- const std::vector<int64_t>& indices_shape,
- char const* type_name);
-
-ARROW_EXPORT
-void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape,
- const std::vector<int64_t>& indices_shape,
- char const* type_name);
-
-template <typename SparseIndexType, SparseMatrixCompressedAxis COMPRESSED_AXIS>
-class SparseCSXIndex : public SparseIndexBase<SparseIndexType> {
- public:
- static constexpr SparseMatrixCompressedAxis kCompressedAxis = COMPRESSED_AXIS;
-
- /// \brief Make a subclass of SparseCSXIndex from raw properties
- static Result<std::shared_ptr<SparseIndexType>> Make(
- const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
- std::shared_ptr<Buffer> indptr_data, std::shared_ptr<Buffer> indices_data) {
- ARROW_RETURN_NOT_OK(ValidateSparseCSXIndex(indptr_type, indices_type, indptr_shape,
- indices_shape,
- SparseIndexType::kTypeName));
- return std::make_shared<SparseIndexType>(
- std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
- std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
- }
-
- /// \brief Make a subclass of SparseCSXIndex from raw properties
- static Result<std::shared_ptr<SparseIndexType>> Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
- std::shared_ptr<Buffer> indptr_data, std::shared_ptr<Buffer> indices_data) {
- return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data,
- indices_data);
- }
-
- /// \brief Make a subclass of SparseCSXIndex from sparse tensor's shape properties and
- /// data
- static Result<std::shared_ptr<SparseIndexType>> Make(
- const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indptr_data,
- std::shared_ptr<Buffer> indices_data) {
- std::vector<int64_t> indptr_shape({shape[0] + 1});
- std::vector<int64_t> indices_shape({non_zero_length});
- return Make(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data,
- indices_data);
- }
-
- /// \brief Make a subclass of SparseCSXIndex from sparse tensor's shape properties and
- /// data
- static Result<std::shared_ptr<SparseIndexType>> Make(
- const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
- int64_t non_zero_length, std::shared_ptr<Buffer> indptr_data,
- std::shared_ptr<Buffer> indices_data) {
- return Make(indices_type, indices_type, shape, non_zero_length, indptr_data,
- indices_data);
- }
-
- /// \brief Construct SparseCSXIndex from two index vectors
- explicit SparseCSXIndex(const std::shared_ptr<Tensor>& indptr,
- const std::shared_ptr<Tensor>& indices)
- : SparseIndexBase<SparseIndexType>(), indptr_(indptr), indices_(indices) {
- CheckSparseCSXIndexValidity(indptr_->type(), indices_->type(), indptr_->shape(),
- indices_->shape(), SparseIndexType::kTypeName);
- }
-
- /// \brief Return a 1D tensor of indptr vector
- const std::shared_ptr<Tensor>& indptr() const { return indptr_; }
-
- /// \brief Return a 1D tensor of indices vector
- const std::shared_ptr<Tensor>& indices() const { return indices_; }
-
- /// \brief Return the number of non zero values in the sparse tensor related
- /// to this sparse index
- int64_t non_zero_length() const override { return indices_->shape()[0]; }
-
- /// \brief Return a string representation of the sparse index
- std::string ToString() const override {
- return std::string(SparseIndexType::kTypeName);
- }
-
- /// \brief Return whether the CSR indices are equal
- bool Equals(const SparseIndexType& other) const {
- return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices());
- }
-
- inline Status ValidateShape(const std::vector<int64_t>& shape) const override {
- ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape));
-
- if (shape.size() < 2) {
- return Status::Invalid("shape length is too short");
- }
-
- if (shape.size() > 2) {
- return Status::Invalid("shape length is too long");
- }
-
- if (indptr_->shape()[0] == shape[static_cast<int64_t>(kCompressedAxis)] + 1) {
- return Status::OK();
- }
-
- return Status::Invalid("shape length is inconsistent with the ", ToString());
- }
-
- protected:
- std::shared_ptr<Tensor> indptr_;
- std::shared_ptr<Tensor> indices_;
-};
-
-} // namespace internal
-
-// ----------------------------------------------------------------------
-// SparseCSRIndex class
-
-/// \brief EXPERIMENTAL: The index data for a CSR sparse matrix
-///
-/// A CSR sparse index manages the location of its non-zero values by two
-/// vectors.
-///
-/// The first vector, called indptr, represents the range of the rows; the i-th
-/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector.
-/// So the length of an indptr vector is the number of rows + 1.
-///
-/// The other vector, called indices, represents the column indices of the
-/// corresponding non-zero values. So the length of an indices vector is same
-/// as the number of non-zero-values.
-class ARROW_EXPORT SparseCSRIndex
- : public internal::SparseCSXIndex<SparseCSRIndex,
- internal::SparseMatrixCompressedAxis::ROW> {
- public:
- using BaseClass =
- internal::SparseCSXIndex<SparseCSRIndex, internal::SparseMatrixCompressedAxis::ROW>;
-
- static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR;
- static constexpr char const* kTypeName = "SparseCSRIndex";
-
- using SparseCSXIndex::kCompressedAxis;
- using SparseCSXIndex::Make;
- using SparseCSXIndex::SparseCSXIndex;
-};
-
-// ----------------------------------------------------------------------
-// SparseCSCIndex class
-
-/// \brief EXPERIMENTAL: The index data for a CSC sparse matrix
-///
-/// A CSC sparse index manages the location of its non-zero values by two
-/// vectors.
-///
-/// The first vector, called indptr, represents the range of the column; the i-th
-/// column spans from indptr[i] to indptr[i+1] in the corresponding value vector.
-/// So the length of an indptr vector is the number of columns + 1.
-///
-/// The other vector, called indices, represents the row indices of the
-/// corresponding non-zero values. So the length of an indices vector is same
-/// as the number of non-zero-values.
-class ARROW_EXPORT SparseCSCIndex
- : public internal::SparseCSXIndex<SparseCSCIndex,
- internal::SparseMatrixCompressedAxis::COLUMN> {
- public:
- using BaseClass =
- internal::SparseCSXIndex<SparseCSCIndex,
- internal::SparseMatrixCompressedAxis::COLUMN>;
-
- static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSC;
- static constexpr char const* kTypeName = "SparseCSCIndex";
-
- using SparseCSXIndex::kCompressedAxis;
- using SparseCSXIndex::Make;
- using SparseCSXIndex::SparseCSXIndex;
-};
-
-// ----------------------------------------------------------------------
-// SparseCSFIndex class
-
-/// \brief EXPERIMENTAL: The index data for a CSF sparse tensor
-///
-/// A CSF sparse index manages the location of its non-zero values by set of
-/// prefix trees. Each path from a root to leaf forms one tensor non-zero index.
-/// CSF is implemented with three vectors.
-///
-/// Vectors inptr and indices contain N-1 and N buffers respectively, where N is the
-/// number of dimensions. Axis_order is a vector of integers of length N. Indptr and
-/// indices describe the set of prefix trees. Trees traverse dimensions in order given by
-/// axis_order.
-class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
- public:
- static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
- static constexpr char const* kTypeName = "SparseCSFIndex";
-
- /// \brief Make SparseCSFIndex from raw properties
- static Result<std::shared_ptr<SparseCSFIndex>> Make(
- const std::shared_ptr<DataType>& indptr_type,
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
- const std::vector<std::shared_ptr<Buffer>>& indptr_data,
- const std::vector<std::shared_ptr<Buffer>>& indices_data);
-
- /// \brief Make SparseCSFIndex from raw properties
- static Result<std::shared_ptr<SparseCSFIndex>> Make(
- const std::shared_ptr<DataType>& indices_type,
- const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
- const std::vector<std::shared_ptr<Buffer>>& indptr_data,
- const std::vector<std::shared_ptr<Buffer>>& indices_data) {
- return Make(indices_type, indices_type, indices_shapes, axis_order, indptr_data,
- indices_data);
- }
-
- /// \brief Construct SparseCSFIndex from two index vectors
- explicit SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
- const std::vector<std::shared_ptr<Tensor>>& indices,
- const std::vector<int64_t>& axis_order);
-
- /// \brief Return a 1D vector of indptr tensors
- const std::vector<std::shared_ptr<Tensor>>& indptr() const { return indptr_; }
-
- /// \brief Return a 1D vector of indices tensors
- const std::vector<std::shared_ptr<Tensor>>& indices() const { return indices_; }
-
- /// \brief Return a 1D vector specifying the order of axes
- const std::vector<int64_t>& axis_order() const { return axis_order_; }
-
- /// \brief Return the number of non zero values in the sparse tensor related
- /// to this sparse index
- int64_t non_zero_length() const override { return indices_.back()->shape()[0]; }
-
- /// \brief Return a string representation of the sparse index
- std::string ToString() const override;
-
- /// \brief Return whether the CSF indices are equal
- bool Equals(const SparseCSFIndex& other) const;
-
- protected:
- std::vector<std::shared_ptr<Tensor>> indptr_;
- std::vector<std::shared_ptr<Tensor>> indices_;
- std::vector<int64_t> axis_order_;
-};
-
-// ----------------------------------------------------------------------
-// SparseTensor class
-
-/// \brief EXPERIMENTAL: The base class of sparse tensor container
-class ARROW_EXPORT SparseTensor {
- public:
- virtual ~SparseTensor() = default;
-
- SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); }
-
- /// \brief Return a value type of the sparse tensor
- std::shared_ptr<DataType> type() const { return type_; }
-
- /// \brief Return a buffer that contains the value vector of the sparse tensor
- std::shared_ptr<Buffer> data() const { return data_; }
-
- /// \brief Return an immutable raw data pointer
- const uint8_t* raw_data() const { return data_->data(); }
-
- /// \brief Return a mutable raw data pointer
- uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
-
- /// \brief Return a shape vector of the sparse tensor
- const std::vector<int64_t>& shape() const { return shape_; }
-
- /// \brief Return a sparse index of the sparse tensor
- const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; }
-
- /// \brief Return a number of dimensions of the sparse tensor
- int ndim() const { return static_cast<int>(shape_.size()); }
-
- /// \brief Return a vector of dimension names
- const std::vector<std::string>& dim_names() const { return dim_names_; }
-
- /// \brief Return the name of the i-th dimension
- const std::string& dim_name(int i) const;
-
- /// \brief Total number of value cells in the sparse tensor
- int64_t size() const;
-
- /// \brief Return true if the underlying data buffer is mutable
- bool is_mutable() const { return data_->is_mutable(); }
-
- /// \brief Total number of non-zero cells in the sparse tensor
- int64_t non_zero_length() const {
- return sparse_index_ ? sparse_index_->non_zero_length() : 0;
- }
-
- /// \brief Return whether sparse tensors are equal
- bool Equals(const SparseTensor& other,
- const EqualOptions& = EqualOptions::Defaults()) const;
-
- /// \brief Return dense representation of sparse tensor as tensor
- ///
- /// The returned Tensor has row-major order (C-like).
- Result<std::shared_ptr<Tensor>> ToTensor(MemoryPool* pool) const;
- Result<std::shared_ptr<Tensor>> ToTensor() const {
- return ToTensor(default_memory_pool());
- }
-
- /// \brief Status-return version of ToTensor().
- ARROW_DEPRECATED("Use Result-returning version")
- Status ToTensor(std::shared_ptr<Tensor>* out) const { return ToTensor().Value(out); }
- Status ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const {
- return ToTensor(pool).Value(out);
- }
-
- protected:
- // Constructor with all attributes
- SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape,
- const std::shared_ptr<SparseIndex>& sparse_index,
- const std::vector<std::string>& dim_names);
-
- std::shared_ptr<DataType> type_;
- std::shared_ptr<Buffer> data_;
- std::vector<int64_t> shape_;
- std::shared_ptr<SparseIndex> sparse_index_;
-
- // These names are optional
- std::vector<std::string> dim_names_;
-};
-
-// ----------------------------------------------------------------------
-// SparseTensorImpl class
-
-namespace internal {
-
-ARROW_EXPORT
-Status MakeSparseTensorFromTensor(const Tensor& tensor,
- SparseTensorFormat::type sparse_format_id,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data);
-
-} // namespace internal
-
-/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index
-/// type
-template <typename SparseIndexType>
-class SparseTensorImpl : public SparseTensor {
- public:
- virtual ~SparseTensorImpl() = default;
-
- /// \brief Construct a sparse tensor from physical data buffer and logical index
- SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
- const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names)
- : SparseTensor(type, data, shape, sparse_index, dim_names) {}
-
- /// \brief Construct an empty sparse tensor
- SparseTensorImpl(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<std::string>& dim_names = {})
- : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {}
-
- /// \brief Create a SparseTensor with full parameters
- static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
- const std::shared_ptr<SparseIndexType>& sparse_index,
- const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names) {
- if (!is_tensor_supported(type->id())) {
- return Status::Invalid(type->ToString(),
- " is not valid data type for a sparse tensor");
- }
- ARROW_RETURN_NOT_OK(sparse_index->ValidateShape(shape));
- if (dim_names.size() > 0 && dim_names.size() != shape.size()) {
- return Status::Invalid("dim_names length is inconsistent with shape");
- }
- return std::make_shared<SparseTensorImpl<SparseIndexType>>(sparse_index, type, data,
- shape, dim_names);
- }
-
- /// \brief Create a sparse tensor from a dense tensor
- ///
- /// The dense tensor is re-encoded as a sparse index and a physical
- /// data buffer for the non-zero value.
- static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
- const Tensor& tensor, const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool = default_memory_pool()) {
- std::shared_ptr<SparseIndex> sparse_index;
- std::shared_ptr<Buffer> data;
- ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor(
- tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index,
- &data));
- return std::make_shared<SparseTensorImpl<SparseIndexType>>(
- internal::checked_pointer_cast<SparseIndexType>(sparse_index), tensor.type(),
- data, tensor.shape(), tensor.dim_names_);
- }
-
- static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
- const Tensor& tensor, MemoryPool* pool = default_memory_pool()) {
- return Make(tensor, int64(), pool);
- }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl);
-};
-
-/// \brief EXPERIMENTAL: Type alias for COO sparse tensor
-using SparseCOOTensor = SparseTensorImpl<SparseCOOIndex>;
-
-/// \brief EXPERIMENTAL: Type alias for CSR sparse matrix
-using SparseCSRMatrix = SparseTensorImpl<SparseCSRIndex>;
-
-/// \brief EXPERIMENTAL: Type alias for CSC sparse matrix
-using SparseCSCMatrix = SparseTensorImpl<SparseCSCIndex>;
-
-/// \brief EXPERIMENTAL: Type alias for CSF sparse matrix
-using SparseCSFTensor = SparseTensorImpl<SparseCSFIndex>;
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/tensor.h" // IWYU pragma: export
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+
+ARROW_EXPORT
+Status CheckSparseIndexMaximumValue(const std::shared_ptr<DataType>& index_value_type,
+ const std::vector<int64_t>& shape);
+
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// SparseIndex class
+
+struct SparseTensorFormat {
+ /// EXPERIMENTAL: The index format type of SparseTensor
+ enum type {
+ /// Coordinate list (COO) format.
+ COO,
+ /// Compressed sparse row (CSR) format.
+ CSR,
+ /// Compressed sparse column (CSC) format.
+ CSC,
+ /// Compressed sparse fiber (CSF) format.
+ CSF
+ };
+};
+
+/// \brief EXPERIMENTAL: The base class for the index of a sparse tensor
+///
+/// SparseIndex describes where the non-zero elements are within a SparseTensor.
+///
+/// There are several ways to represent this. The format_id is used to
+/// distinguish what kind of representation is used. Each possible value of
+/// format_id must have only one corresponding concrete subclass of SparseIndex.
+class ARROW_EXPORT SparseIndex {
+ public:
+ explicit SparseIndex(SparseTensorFormat::type format_id) : format_id_(format_id) {}
+
+ virtual ~SparseIndex() = default;
+
+ /// \brief Return the identifier of the format type
+ SparseTensorFormat::type format_id() const { return format_id_; }
+
+ /// \brief Return the number of non zero values in the sparse tensor related
+ /// to this sparse index
+ virtual int64_t non_zero_length() const = 0;
+
+ /// \brief Return the string representation of the sparse index
+ virtual std::string ToString() const = 0;
+
+ virtual Status ValidateShape(const std::vector<int64_t>& shape) const;
+
+ protected:
+ const SparseTensorFormat::type format_id_;
+};
+
+namespace internal {
+template <typename SparseIndexType>
+class SparseIndexBase : public SparseIndex {
+ public:
+ SparseIndexBase() : SparseIndex(SparseIndexType::format_id) {}
+};
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// SparseCOOIndex class
+
+/// \brief EXPERIMENTAL: The index data for a COO sparse tensor
+///
+/// A COO sparse index manages the location of its non-zero values by their
+/// coordinates.
+class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIndex> {
+ public:
+ static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO;
+
+ /// \brief Make SparseCOOIndex from a coords tensor and canonicality
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<Tensor>& coords, bool is_canonical);
+
+ /// \brief Make SparseCOOIndex from a coords tensor with canonicality auto-detection
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<Tensor>& coords);
+
+ /// \brief Make SparseCOOIndex from raw properties with canonicality auto-detection
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shape,
+ const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data);
+
+ /// \brief Make SparseCOOIndex from raw properties
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shape,
+ const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data,
+ bool is_canonical);
+
+ /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data
+ /// with canonicality auto-detection
+ ///
+ /// The indices_data should be in row-major (C-like) order. If not,
+ /// use the raw properties constructor.
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indices_data);
+
+ /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data
+ ///
+ /// The indices_data should be in row-major (C-like) order. If not,
+ /// use the raw properties constructor.
+ static Result<std::shared_ptr<SparseCOOIndex>> Make(
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indices_data, bool is_canonical);
+
+ /// \brief Construct SparseCOOIndex from column-major NumericTensor
+ explicit SparseCOOIndex(const std::shared_ptr<Tensor>& coords, bool is_canonical);
+
+ /// \brief Return a tensor that has the coordinates of the non-zero values
+ ///
+ /// The returned tensor is a N x D tensor where N is the number of non-zero
+ /// values and D is the number of dimensions in the logical data.
+ /// The column at index `i` is a D-tuple of coordinates indicating that the
+ /// logical value at those coordinates should be found at physical index `i`.
+ const std::shared_ptr<Tensor>& indices() const { return coords_; }
+
+ /// \brief Return the number of non zero values in the sparse tensor related
+ /// to this sparse index
+ int64_t non_zero_length() const override { return coords_->shape()[0]; }
+
+ /// \brief Return whether a sparse tensor index is canonical, or not.
+ /// If a sparse tensor index is canonical, it is sorted in the lexicographical order,
+ /// and the corresponding sparse tensor doesn't have duplicated entries.
+ bool is_canonical() const { return is_canonical_; }
+
+ /// \brief Return a string representation of the sparse index
+ std::string ToString() const override;
+
+ /// \brief Return whether the COO indices are equal
+ bool Equals(const SparseCOOIndex& other) const {
+ return indices()->Equals(*other.indices());
+ }
+
+ inline Status ValidateShape(const std::vector<int64_t>& shape) const override {
+ ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape));
+
+ if (static_cast<size_t>(coords_->shape()[1]) == shape.size()) {
+ return Status::OK();
+ }
+
+ return Status::Invalid(
+ "shape length is inconsistent with the coords matrix in COO index");
+ }
+
+ protected:
+ std::shared_ptr<Tensor> coords_;
+ bool is_canonical_;
+};
+
+namespace internal {
+
+/// EXPERIMENTAL: The axis to be compressed
+enum class SparseMatrixCompressedAxis : char {
+ /// The value for CSR matrix
+ ROW,
+ /// The value for CSC matrix
+ COLUMN
+};
+
+ARROW_EXPORT
+Status ValidateSparseCSXIndex(const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape,
+ const std::vector<int64_t>& indices_shape,
+ char const* type_name);
+
+ARROW_EXPORT
+void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape,
+ const std::vector<int64_t>& indices_shape,
+ char const* type_name);
+
+template <typename SparseIndexType, SparseMatrixCompressedAxis COMPRESSED_AXIS>
+class SparseCSXIndex : public SparseIndexBase<SparseIndexType> {
+ public:
+ static constexpr SparseMatrixCompressedAxis kCompressedAxis = COMPRESSED_AXIS;
+
+ /// \brief Make a subclass of SparseCSXIndex from raw properties
+ static Result<std::shared_ptr<SparseIndexType>> Make(
+ const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
+ std::shared_ptr<Buffer> indptr_data, std::shared_ptr<Buffer> indices_data) {
+ ARROW_RETURN_NOT_OK(ValidateSparseCSXIndex(indptr_type, indices_type, indptr_shape,
+ indices_shape,
+ SparseIndexType::kTypeName));
+ return std::make_shared<SparseIndexType>(
+ std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
+ std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
+ }
+
+ /// \brief Make a subclass of SparseCSXIndex from raw properties
+ static Result<std::shared_ptr<SparseIndexType>> Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
+ std::shared_ptr<Buffer> indptr_data, std::shared_ptr<Buffer> indices_data) {
+ return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data,
+ indices_data);
+ }
+
+ /// \brief Make a subclass of SparseCSXIndex from sparse tensor's shape properties and
+ /// data
+ static Result<std::shared_ptr<SparseIndexType>> Make(
+ const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indptr_data,
+ std::shared_ptr<Buffer> indices_data) {
+ std::vector<int64_t> indptr_shape({shape[0] + 1});
+ std::vector<int64_t> indices_shape({non_zero_length});
+ return Make(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data,
+ indices_data);
+ }
+
+ /// \brief Make a subclass of SparseCSXIndex from sparse tensor's shape properties and
+ /// data
+ static Result<std::shared_ptr<SparseIndexType>> Make(
+ const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
+ int64_t non_zero_length, std::shared_ptr<Buffer> indptr_data,
+ std::shared_ptr<Buffer> indices_data) {
+ return Make(indices_type, indices_type, shape, non_zero_length, indptr_data,
+ indices_data);
+ }
+
+ /// \brief Construct SparseCSXIndex from two index vectors
+ explicit SparseCSXIndex(const std::shared_ptr<Tensor>& indptr,
+ const std::shared_ptr<Tensor>& indices)
+ : SparseIndexBase<SparseIndexType>(), indptr_(indptr), indices_(indices) {
+ CheckSparseCSXIndexValidity(indptr_->type(), indices_->type(), indptr_->shape(),
+ indices_->shape(), SparseIndexType::kTypeName);
+ }
+
+ /// \brief Return a 1D tensor of indptr vector
+ const std::shared_ptr<Tensor>& indptr() const { return indptr_; }
+
+ /// \brief Return a 1D tensor of indices vector
+ const std::shared_ptr<Tensor>& indices() const { return indices_; }
+
+ /// \brief Return the number of non zero values in the sparse tensor related
+ /// to this sparse index
+ int64_t non_zero_length() const override { return indices_->shape()[0]; }
+
+ /// \brief Return a string representation of the sparse index
+ std::string ToString() const override {
+ return std::string(SparseIndexType::kTypeName);
+ }
+
+ /// \brief Return whether the CSR indices are equal
+ bool Equals(const SparseIndexType& other) const {
+ return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices());
+ }
+
+ inline Status ValidateShape(const std::vector<int64_t>& shape) const override {
+ ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape));
+
+ if (shape.size() < 2) {
+ return Status::Invalid("shape length is too short");
+ }
+
+ if (shape.size() > 2) {
+ return Status::Invalid("shape length is too long");
+ }
+
+ if (indptr_->shape()[0] == shape[static_cast<int64_t>(kCompressedAxis)] + 1) {
+ return Status::OK();
+ }
+
+ return Status::Invalid("shape length is inconsistent with the ", ToString());
+ }
+
+ protected:
+ std::shared_ptr<Tensor> indptr_;
+ std::shared_ptr<Tensor> indices_;
+};
+
+} // namespace internal
+
+// ----------------------------------------------------------------------
+// SparseCSRIndex class
+
+/// \brief EXPERIMENTAL: The index data for a CSR sparse matrix
+///
+/// A CSR sparse index manages the location of its non-zero values by two
+/// vectors.
+///
+/// The first vector, called indptr, represents the range of the rows; the i-th
+/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector.
+/// So the length of an indptr vector is the number of rows + 1.
+///
+/// The other vector, called indices, represents the column indices of the
+/// corresponding non-zero values. So the length of an indices vector is same
+/// as the number of non-zero-values.
+class ARROW_EXPORT SparseCSRIndex
+ : public internal::SparseCSXIndex<SparseCSRIndex,
+ internal::SparseMatrixCompressedAxis::ROW> {
+ public:
+ using BaseClass =
+ internal::SparseCSXIndex<SparseCSRIndex, internal::SparseMatrixCompressedAxis::ROW>;
+
+ static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR;
+ static constexpr char const* kTypeName = "SparseCSRIndex";
+
+ using SparseCSXIndex::kCompressedAxis;
+ using SparseCSXIndex::Make;
+ using SparseCSXIndex::SparseCSXIndex;
+};
+
+// ----------------------------------------------------------------------
+// SparseCSCIndex class
+
+/// \brief EXPERIMENTAL: The index data for a CSC sparse matrix
+///
+/// A CSC sparse index manages the location of its non-zero values by two
+/// vectors.
+///
+/// The first vector, called indptr, represents the range of the column; the i-th
+/// column spans from indptr[i] to indptr[i+1] in the corresponding value vector.
+/// So the length of an indptr vector is the number of columns + 1.
+///
+/// The other vector, called indices, represents the row indices of the
+/// corresponding non-zero values. So the length of an indices vector is same
+/// as the number of non-zero-values.
+class ARROW_EXPORT SparseCSCIndex
+ : public internal::SparseCSXIndex<SparseCSCIndex,
+ internal::SparseMatrixCompressedAxis::COLUMN> {
+ public:
+ using BaseClass =
+ internal::SparseCSXIndex<SparseCSCIndex,
+ internal::SparseMatrixCompressedAxis::COLUMN>;
+
+ static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSC;
+ static constexpr char const* kTypeName = "SparseCSCIndex";
+
+ using SparseCSXIndex::kCompressedAxis;
+ using SparseCSXIndex::Make;
+ using SparseCSXIndex::SparseCSXIndex;
+};
+
+// ----------------------------------------------------------------------
+// SparseCSFIndex class
+
+/// \brief EXPERIMENTAL: The index data for a CSF sparse tensor
+///
+/// A CSF sparse index manages the location of its non-zero values by set of
+/// prefix trees. Each path from a root to leaf forms one tensor non-zero index.
+/// CSF is implemented with three vectors.
+///
+/// Vectors inptr and indices contain N-1 and N buffers respectively, where N is the
+/// number of dimensions. Axis_order is a vector of integers of length N. Indptr and
+/// indices describe the set of prefix trees. Trees traverse dimensions in order given by
+/// axis_order.
+class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
+ public:
+ static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
+ static constexpr char const* kTypeName = "SparseCSFIndex";
+
+ /// \brief Make SparseCSFIndex from raw properties
+ static Result<std::shared_ptr<SparseCSFIndex>> Make(
+ const std::shared_ptr<DataType>& indptr_type,
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+ const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+ const std::vector<std::shared_ptr<Buffer>>& indices_data);
+
+ /// \brief Make SparseCSFIndex from raw properties
+ static Result<std::shared_ptr<SparseCSFIndex>> Make(
+ const std::shared_ptr<DataType>& indices_type,
+ const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+ const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+ const std::vector<std::shared_ptr<Buffer>>& indices_data) {
+ return Make(indices_type, indices_type, indices_shapes, axis_order, indptr_data,
+ indices_data);
+ }
+
+ /// \brief Construct SparseCSFIndex from two index vectors
+ explicit SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
+ const std::vector<std::shared_ptr<Tensor>>& indices,
+ const std::vector<int64_t>& axis_order);
+
+ /// \brief Return a 1D vector of indptr tensors
+ const std::vector<std::shared_ptr<Tensor>>& indptr() const { return indptr_; }
+
+ /// \brief Return a 1D vector of indices tensors
+ const std::vector<std::shared_ptr<Tensor>>& indices() const { return indices_; }
+
+ /// \brief Return a 1D vector specifying the order of axes
+ const std::vector<int64_t>& axis_order() const { return axis_order_; }
+
+ /// \brief Return the number of non zero values in the sparse tensor related
+ /// to this sparse index
+ int64_t non_zero_length() const override { return indices_.back()->shape()[0]; }
+
+ /// \brief Return a string representation of the sparse index
+ std::string ToString() const override;
+
+ /// \brief Return whether the CSF indices are equal
+ bool Equals(const SparseCSFIndex& other) const;
+
+ protected:
+ std::vector<std::shared_ptr<Tensor>> indptr_;
+ std::vector<std::shared_ptr<Tensor>> indices_;
+ std::vector<int64_t> axis_order_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensor class
+
+/// \brief EXPERIMENTAL: The base class of sparse tensor container
+class ARROW_EXPORT SparseTensor {
+ public:
+ virtual ~SparseTensor() = default;
+
+ SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); }
+
+ /// \brief Return a value type of the sparse tensor
+ std::shared_ptr<DataType> type() const { return type_; }
+
+ /// \brief Return a buffer that contains the value vector of the sparse tensor
+ std::shared_ptr<Buffer> data() const { return data_; }
+
+ /// \brief Return an immutable raw data pointer
+ const uint8_t* raw_data() const { return data_->data(); }
+
+ /// \brief Return a mutable raw data pointer
+ uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
+
+ /// \brief Return a shape vector of the sparse tensor
+ const std::vector<int64_t>& shape() const { return shape_; }
+
+ /// \brief Return a sparse index of the sparse tensor
+ const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; }
+
+ /// \brief Return a number of dimensions of the sparse tensor
+ int ndim() const { return static_cast<int>(shape_.size()); }
+
+ /// \brief Return a vector of dimension names
+ const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+ /// \brief Return the name of the i-th dimension
+ const std::string& dim_name(int i) const;
+
+ /// \brief Total number of value cells in the sparse tensor
+ int64_t size() const;
+
+ /// \brief Return true if the underlying data buffer is mutable
+ bool is_mutable() const { return data_->is_mutable(); }
+
+ /// \brief Total number of non-zero cells in the sparse tensor
+ int64_t non_zero_length() const {
+ return sparse_index_ ? sparse_index_->non_zero_length() : 0;
+ }
+
+ /// \brief Return whether sparse tensors are equal
+ bool Equals(const SparseTensor& other,
+ const EqualOptions& = EqualOptions::Defaults()) const;
+
+ /// \brief Return dense representation of sparse tensor as tensor
+ ///
+ /// The returned Tensor has row-major order (C-like).
+ Result<std::shared_ptr<Tensor>> ToTensor(MemoryPool* pool) const;
+ Result<std::shared_ptr<Tensor>> ToTensor() const {
+ return ToTensor(default_memory_pool());
+ }
+
+ /// \brief Status-return version of ToTensor().
+ ARROW_DEPRECATED("Use Result-returning version")
+ Status ToTensor(std::shared_ptr<Tensor>* out) const { return ToTensor().Value(out); }
+ Status ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const {
+ return ToTensor(pool).Value(out);
+ }
+
+ protected:
+ // Constructor with all attributes
+ SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape,
+ const std::shared_ptr<SparseIndex>& sparse_index,
+ const std::vector<std::string>& dim_names);
+
+ std::shared_ptr<DataType> type_;
+ std::shared_ptr<Buffer> data_;
+ std::vector<int64_t> shape_;
+ std::shared_ptr<SparseIndex> sparse_index_;
+
+ // These names are optional
+ std::vector<std::string> dim_names_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensorImpl class
+
+namespace internal {
+
+ARROW_EXPORT
+Status MakeSparseTensorFromTensor(const Tensor& tensor,
+ SparseTensorFormat::type sparse_format_id,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data);
+
+} // namespace internal
+
+/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index
+/// type
+template <typename SparseIndexType>
+class SparseTensorImpl : public SparseTensor {
+ public:
+ virtual ~SparseTensorImpl() = default;
+
+ /// \brief Construct a sparse tensor from physical data buffer and logical index
+ SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
+ const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names)
+ : SparseTensor(type, data, shape, sparse_index, dim_names) {}
+
+ /// \brief Construct an empty sparse tensor
+ SparseTensorImpl(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<std::string>& dim_names = {})
+ : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {}
+
+ /// \brief Create a SparseTensor with full parameters
+ static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
+ const std::shared_ptr<SparseIndexType>& sparse_index,
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names) {
+ if (!is_tensor_supported(type->id())) {
+ return Status::Invalid(type->ToString(),
+ " is not valid data type for a sparse tensor");
+ }
+ ARROW_RETURN_NOT_OK(sparse_index->ValidateShape(shape));
+ if (dim_names.size() > 0 && dim_names.size() != shape.size()) {
+ return Status::Invalid("dim_names length is inconsistent with shape");
+ }
+ return std::make_shared<SparseTensorImpl<SparseIndexType>>(sparse_index, type, data,
+ shape, dim_names);
+ }
+
+ /// \brief Create a sparse tensor from a dense tensor
+ ///
+ /// The dense tensor is re-encoded as a sparse index and a physical
+ /// data buffer for the non-zero value.
+ static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
+ const Tensor& tensor, const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool = default_memory_pool()) {
+ std::shared_ptr<SparseIndex> sparse_index;
+ std::shared_ptr<Buffer> data;
+ ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor(
+ tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index,
+ &data));
+ return std::make_shared<SparseTensorImpl<SparseIndexType>>(
+ internal::checked_pointer_cast<SparseIndexType>(sparse_index), tensor.type(),
+ data, tensor.shape(), tensor.dim_names_);
+ }
+
+ static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make(
+ const Tensor& tensor, MemoryPool* pool = default_memory_pool()) {
+ return Make(tensor, int64(), pool);
+ }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl);
+};
+
+/// \brief EXPERIMENTAL: Type alias for COO sparse tensor
+using SparseCOOTensor = SparseTensorImpl<SparseCOOIndex>;
+
+/// \brief EXPERIMENTAL: Type alias for CSR sparse matrix
+using SparseCSRMatrix = SparseTensorImpl<SparseCSRIndex>;
+
+/// \brief EXPERIMENTAL: Type alias for CSC sparse matrix
+using SparseCSCMatrix = SparseTensorImpl<SparseCSCIndex>;
+
+/// \brief EXPERIMENTAL: Type alias for CSF sparse matrix
+using SparseCSFTensor = SparseTensorImpl<SparseCSFIndex>;
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/status.cc b/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
index 0f02cb57a23..bc70272c6e3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
@@ -1,143 +1,143 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// A Status encapsulates the result of an operation. It may indicate success,
-// or it may indicate an error with an associated error message.
-//
-// Multiple threads can invoke const methods on a Status without
-// external synchronization, but if any of the threads may call a
-// non-const method, all threads accessing the same Status must use
-// external synchronization.
-
-#include "arrow/status.h"
-
-#include <cassert>
-#include <cstdlib>
-#include <iostream>
-#include <sstream>
-
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-Status::Status(StatusCode code, const std::string& msg)
- : Status::Status(code, msg, nullptr) {}
-
-Status::Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail) {
- ARROW_CHECK_NE(code, StatusCode::OK) << "Cannot construct ok status with message";
- state_ = new State;
- state_->code = code;
- state_->msg = std::move(msg);
- if (detail != nullptr) {
- state_->detail = std::move(detail);
- }
-}
-
-void Status::CopyFrom(const Status& s) {
- delete state_;
- if (s.state_ == nullptr) {
- state_ = nullptr;
- } else {
- state_ = new State(*s.state_);
- }
-}
-
-std::string Status::CodeAsString() const {
- if (state_ == nullptr) {
- return "OK";
- }
- return CodeAsString(code());
-}
-
-std::string Status::CodeAsString(StatusCode code) {
- const char* type;
- switch (code) {
- case StatusCode::OK:
- type = "OK";
- break;
- case StatusCode::OutOfMemory:
- type = "Out of memory";
- break;
- case StatusCode::KeyError:
- type = "Key error";
- break;
- case StatusCode::TypeError:
- type = "Type error";
- break;
- case StatusCode::Invalid:
- type = "Invalid";
- break;
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation. It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#include "arrow/status.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+Status::Status(StatusCode code, const std::string& msg)
+ : Status::Status(code, msg, nullptr) {}
+
+Status::Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail) {
+ ARROW_CHECK_NE(code, StatusCode::OK) << "Cannot construct ok status with message";
+ state_ = new State;
+ state_->code = code;
+ state_->msg = std::move(msg);
+ if (detail != nullptr) {
+ state_->detail = std::move(detail);
+ }
+}
+
+void Status::CopyFrom(const Status& s) {
+ delete state_;
+ if (s.state_ == nullptr) {
+ state_ = nullptr;
+ } else {
+ state_ = new State(*s.state_);
+ }
+}
+
+std::string Status::CodeAsString() const {
+ if (state_ == nullptr) {
+ return "OK";
+ }
+ return CodeAsString(code());
+}
+
+std::string Status::CodeAsString(StatusCode code) {
+ const char* type;
+ switch (code) {
+ case StatusCode::OK:
+ type = "OK";
+ break;
+ case StatusCode::OutOfMemory:
+ type = "Out of memory";
+ break;
+ case StatusCode::KeyError:
+ type = "Key error";
+ break;
+ case StatusCode::TypeError:
+ type = "Type error";
+ break;
+ case StatusCode::Invalid:
+ type = "Invalid";
+ break;
case StatusCode::Cancelled:
type = "Cancelled";
break;
- case StatusCode::IOError:
- type = "IOError";
- break;
- case StatusCode::CapacityError:
- type = "Capacity error";
- break;
- case StatusCode::IndexError:
- type = "Index error";
- break;
- case StatusCode::UnknownError:
- type = "Unknown error";
- break;
- case StatusCode::NotImplemented:
- type = "NotImplemented";
- break;
- case StatusCode::SerializationError:
- type = "Serialization error";
- break;
- case StatusCode::CodeGenError:
- type = "CodeGenError in Gandiva";
- break;
- case StatusCode::ExpressionValidationError:
- type = "ExpressionValidationError";
- break;
- case StatusCode::ExecutionError:
- type = "ExecutionError in Gandiva";
- break;
- default:
- type = "Unknown";
- break;
- }
- return std::string(type);
-}
-
-std::string Status::ToString() const {
- std::string result(CodeAsString());
- if (state_ == nullptr) {
- return result;
- }
- result += ": ";
- result += state_->msg;
- if (state_->detail != nullptr) {
- result += ". Detail: ";
- result += state_->detail->ToString();
- }
-
- return result;
-}
-
-void Status::Abort() const { Abort(std::string()); }
-
-void Status::Abort(const std::string& message) const {
- std::cerr << "-- Arrow Fatal Error --\n";
- if (!message.empty()) {
- std::cerr << message << "\n";
- }
- std::cerr << ToString() << std::endl;
- std::abort();
-}
-
-#ifdef ARROW_EXTRA_ERROR_CONTEXT
-void Status::AddContextLine(const char* filename, int line, const char* expr) {
- ARROW_CHECK(!ok()) << "Cannot add context line to ok status";
- std::stringstream ss;
+ case StatusCode::IOError:
+ type = "IOError";
+ break;
+ case StatusCode::CapacityError:
+ type = "Capacity error";
+ break;
+ case StatusCode::IndexError:
+ type = "Index error";
+ break;
+ case StatusCode::UnknownError:
+ type = "Unknown error";
+ break;
+ case StatusCode::NotImplemented:
+ type = "NotImplemented";
+ break;
+ case StatusCode::SerializationError:
+ type = "Serialization error";
+ break;
+ case StatusCode::CodeGenError:
+ type = "CodeGenError in Gandiva";
+ break;
+ case StatusCode::ExpressionValidationError:
+ type = "ExpressionValidationError";
+ break;
+ case StatusCode::ExecutionError:
+ type = "ExecutionError in Gandiva";
+ break;
+ default:
+ type = "Unknown";
+ break;
+ }
+ return std::string(type);
+}
+
+std::string Status::ToString() const {
+ std::string result(CodeAsString());
+ if (state_ == nullptr) {
+ return result;
+ }
+ result += ": ";
+ result += state_->msg;
+ if (state_->detail != nullptr) {
+ result += ". Detail: ";
+ result += state_->detail->ToString();
+ }
+
+ return result;
+}
+
+void Status::Abort() const { Abort(std::string()); }
+
+void Status::Abort(const std::string& message) const {
+ std::cerr << "-- Arrow Fatal Error --\n";
+ if (!message.empty()) {
+ std::cerr << message << "\n";
+ }
+ std::cerr << ToString() << std::endl;
+ std::abort();
+}
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+void Status::AddContextLine(const char* filename, int line, const char* expr) {
+ ARROW_CHECK(!ok()) << "Cannot add context line to ok status";
+ std::stringstream ss;
ss << "\n" << filename << ":" << line << " " << expr;
- state_->msg += ss.str();
-}
-#endif
-
-} // namespace arrow
+ state_->msg += ss.str();
+}
+#endif
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/status.h b/contrib/libs/apache/arrow/cpp/src/arrow/status.h
index 056d60d6f32..8a3b8ee280d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/status.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/status.h
@@ -1,451 +1,451 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// A Status encapsulates the result of an operation. It may indicate success,
-// or it may indicate an error with an associated error message.
-//
-// Multiple threads can invoke const methods on a Status without
-// external synchronization, but if any of the threads may call a
-// non-const method, all threads accessing the same Status must use
-// external synchronization.
-
-// Adapted from Apache Kudu, TensorFlow
-
-#pragma once
-
-#include <cstring>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/util/compare.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_builder.h"
-#include "arrow/util/visibility.h"
-
-#ifdef ARROW_EXTRA_ERROR_CONTEXT
-
-/// \brief Return with given status if condition is met.
-#define ARROW_RETURN_IF_(condition, status, expr) \
- do { \
- if (ARROW_PREDICT_FALSE(condition)) { \
- ::arrow::Status _st = (status); \
- _st.AddContextLine(__FILE__, __LINE__, expr); \
- return _st; \
- } \
- } while (0)
-
-#else
-
-#define ARROW_RETURN_IF_(condition, status, _) \
- do { \
- if (ARROW_PREDICT_FALSE(condition)) { \
- return (status); \
- } \
- } while (0)
-
-#endif // ARROW_EXTRA_ERROR_CONTEXT
-
-#define ARROW_RETURN_IF(condition, status) \
- ARROW_RETURN_IF_(condition, status, ARROW_STRINGIFY(status))
-
-/// \brief Propagate any non-successful Status to the caller
-#define ARROW_RETURN_NOT_OK(status) \
- do { \
- ::arrow::Status __s = ::arrow::internal::GenericToStatus(status); \
- ARROW_RETURN_IF_(!__s.ok(), __s, ARROW_STRINGIFY(status)); \
- } while (false)
-
-#define RETURN_NOT_OK_ELSE(s, else_) \
- do { \
- ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
- if (!_s.ok()) { \
- else_; \
- return _s; \
- } \
- } while (false)
-
-// This is an internal-use macro and should not be used in public headers.
-#ifndef RETURN_NOT_OK
-#define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
-#endif
-
-namespace arrow {
-
-enum class StatusCode : char {
- OK = 0,
- OutOfMemory = 1,
- KeyError = 2,
- TypeError = 3,
- Invalid = 4,
- IOError = 5,
- CapacityError = 6,
- IndexError = 7,
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation. It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+// Adapted from Apache Kudu, TensorFlow
+
+#pragma once
+
+#include <cstring>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
+#include "arrow/util/visibility.h"
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+/// \brief Return with given status if condition is met.
+#define ARROW_RETURN_IF_(condition, status, expr) \
+ do { \
+ if (ARROW_PREDICT_FALSE(condition)) { \
+ ::arrow::Status _st = (status); \
+ _st.AddContextLine(__FILE__, __LINE__, expr); \
+ return _st; \
+ } \
+ } while (0)
+
+#else
+
+#define ARROW_RETURN_IF_(condition, status, _) \
+ do { \
+ if (ARROW_PREDICT_FALSE(condition)) { \
+ return (status); \
+ } \
+ } while (0)
+
+#endif // ARROW_EXTRA_ERROR_CONTEXT
+
+#define ARROW_RETURN_IF(condition, status) \
+ ARROW_RETURN_IF_(condition, status, ARROW_STRINGIFY(status))
+
+/// \brief Propagate any non-successful Status to the caller
+#define ARROW_RETURN_NOT_OK(status) \
+ do { \
+ ::arrow::Status __s = ::arrow::internal::GenericToStatus(status); \
+ ARROW_RETURN_IF_(!__s.ok(), __s, ARROW_STRINGIFY(status)); \
+ } while (false)
+
+#define RETURN_NOT_OK_ELSE(s, else_) \
+ do { \
+ ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
+ if (!_s.ok()) { \
+ else_; \
+ return _s; \
+ } \
+ } while (false)
+
+// This is an internal-use macro and should not be used in public headers.
+#ifndef RETURN_NOT_OK
+#define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
+#endif
+
+namespace arrow {
+
+enum class StatusCode : char {
+ OK = 0,
+ OutOfMemory = 1,
+ KeyError = 2,
+ TypeError = 3,
+ Invalid = 4,
+ IOError = 5,
+ CapacityError = 6,
+ IndexError = 7,
Cancelled = 8,
- UnknownError = 9,
- NotImplemented = 10,
- SerializationError = 11,
- RError = 13,
- // Gandiva range of errors
- CodeGenError = 40,
- ExpressionValidationError = 41,
- ExecutionError = 42,
- // Continue generic codes.
- AlreadyExists = 45
-};
-
-/// \brief An opaque class that allows subsystems to retain
-/// additional information inside the Status.
-class ARROW_EXPORT StatusDetail {
- public:
- virtual ~StatusDetail() = default;
- /// \brief Return a unique id for the type of the StatusDetail
- /// (effectively a poor man's substitute for RTTI).
- virtual const char* type_id() const = 0;
- /// \brief Produce a human-readable description of this status.
- virtual std::string ToString() const = 0;
-
- bool operator==(const StatusDetail& other) const noexcept {
- return std::string(type_id()) == other.type_id() && ToString() == other.ToString();
- }
-};
-
-/// \brief Status outcome object (success or error)
-///
-/// The Status object is an object holding the outcome of an operation.
-/// The outcome is represented as a StatusCode, either success
-/// (StatusCode::OK) or an error (any other of the StatusCode enumeration values).
-///
-/// Additionally, if an error occurred, a specific error message is generally
-/// attached.
-class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<Status>,
- public util::ToStringOstreamable<Status> {
- public:
- // Create a success status.
- Status() noexcept : state_(NULLPTR) {}
- ~Status() noexcept {
- // ARROW-2400: On certain compilers, splitting off the slow path improves
- // performance significantly.
- if (ARROW_PREDICT_FALSE(state_ != NULL)) {
- DeleteState();
- }
- }
-
- Status(StatusCode code, const std::string& msg);
- /// \brief Pluggable constructor for use by sub-systems. detail cannot be null.
- Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail);
-
- // Copy the specified status.
- inline Status(const Status& s);
- inline Status& operator=(const Status& s);
-
- // Move the specified status.
- inline Status(Status&& s) noexcept;
- inline Status& operator=(Status&& s) noexcept;
-
- inline bool Equals(const Status& s) const;
-
- // AND the statuses.
- inline Status operator&(const Status& s) const noexcept;
- inline Status operator&(Status&& s) const noexcept;
- inline Status& operator&=(const Status& s) noexcept;
- inline Status& operator&=(Status&& s) noexcept;
-
- /// Return a success status
- static Status OK() { return Status(); }
-
- template <typename... Args>
- static Status FromArgs(StatusCode code, Args&&... args) {
- return Status(code, util::StringBuilder(std::forward<Args>(args)...));
- }
-
- template <typename... Args>
- static Status FromDetailAndArgs(StatusCode code, std::shared_ptr<StatusDetail> detail,
- Args&&... args) {
- return Status(code, util::StringBuilder(std::forward<Args>(args)...),
- std::move(detail));
- }
-
- /// Return an error status for out-of-memory conditions
- template <typename... Args>
- static Status OutOfMemory(Args&&... args) {
- return Status::FromArgs(StatusCode::OutOfMemory, std::forward<Args>(args)...);
- }
-
- /// Return an error status for failed key lookups (e.g. column name in a table)
- template <typename... Args>
- static Status KeyError(Args&&... args) {
- return Status::FromArgs(StatusCode::KeyError, std::forward<Args>(args)...);
- }
-
- /// Return an error status for type errors (such as mismatching data types)
- template <typename... Args>
- static Status TypeError(Args&&... args) {
- return Status::FromArgs(StatusCode::TypeError, std::forward<Args>(args)...);
- }
-
- /// Return an error status for unknown errors
- template <typename... Args>
- static Status UnknownError(Args&&... args) {
- return Status::FromArgs(StatusCode::UnknownError, std::forward<Args>(args)...);
- }
-
- /// Return an error status when an operation or a combination of operation and
- /// data types is unimplemented
- template <typename... Args>
- static Status NotImplemented(Args&&... args) {
- return Status::FromArgs(StatusCode::NotImplemented, std::forward<Args>(args)...);
- }
-
- /// Return an error status for invalid data (for example a string that fails parsing)
- template <typename... Args>
- static Status Invalid(Args&&... args) {
- return Status::FromArgs(StatusCode::Invalid, std::forward<Args>(args)...);
- }
-
+ UnknownError = 9,
+ NotImplemented = 10,
+ SerializationError = 11,
+ RError = 13,
+ // Gandiva range of errors
+ CodeGenError = 40,
+ ExpressionValidationError = 41,
+ ExecutionError = 42,
+ // Continue generic codes.
+ AlreadyExists = 45
+};
+
+/// \brief An opaque class that allows subsystems to retain
+/// additional information inside the Status.
+class ARROW_EXPORT StatusDetail {
+ public:
+ virtual ~StatusDetail() = default;
+ /// \brief Return a unique id for the type of the StatusDetail
+ /// (effectively a poor man's substitute for RTTI).
+ virtual const char* type_id() const = 0;
+ /// \brief Produce a human-readable description of this status.
+ virtual std::string ToString() const = 0;
+
+ bool operator==(const StatusDetail& other) const noexcept {
+ return std::string(type_id()) == other.type_id() && ToString() == other.ToString();
+ }
+};
+
+/// \brief Status outcome object (success or error)
+///
+/// The Status object is an object holding the outcome of an operation.
+/// The outcome is represented as a StatusCode, either success
+/// (StatusCode::OK) or an error (any other of the StatusCode enumeration values).
+///
+/// Additionally, if an error occurred, a specific error message is generally
+/// attached.
+class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<Status>,
+ public util::ToStringOstreamable<Status> {
+ public:
+ // Create a success status.
+ Status() noexcept : state_(NULLPTR) {}
+ ~Status() noexcept {
+ // ARROW-2400: On certain compilers, splitting off the slow path improves
+ // performance significantly.
+ if (ARROW_PREDICT_FALSE(state_ != NULL)) {
+ DeleteState();
+ }
+ }
+
+ Status(StatusCode code, const std::string& msg);
+ /// \brief Pluggable constructor for use by sub-systems. detail cannot be null.
+ Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail);
+
+ // Copy the specified status.
+ inline Status(const Status& s);
+ inline Status& operator=(const Status& s);
+
+ // Move the specified status.
+ inline Status(Status&& s) noexcept;
+ inline Status& operator=(Status&& s) noexcept;
+
+ inline bool Equals(const Status& s) const;
+
+ // AND the statuses.
+ inline Status operator&(const Status& s) const noexcept;
+ inline Status operator&(Status&& s) const noexcept;
+ inline Status& operator&=(const Status& s) noexcept;
+ inline Status& operator&=(Status&& s) noexcept;
+
+ /// Return a success status
+ static Status OK() { return Status(); }
+
+ template <typename... Args>
+ static Status FromArgs(StatusCode code, Args&&... args) {
+ return Status(code, util::StringBuilder(std::forward<Args>(args)...));
+ }
+
+ template <typename... Args>
+ static Status FromDetailAndArgs(StatusCode code, std::shared_ptr<StatusDetail> detail,
+ Args&&... args) {
+ return Status(code, util::StringBuilder(std::forward<Args>(args)...),
+ std::move(detail));
+ }
+
+ /// Return an error status for out-of-memory conditions
+ template <typename... Args>
+ static Status OutOfMemory(Args&&... args) {
+ return Status::FromArgs(StatusCode::OutOfMemory, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status for failed key lookups (e.g. column name in a table)
+ template <typename... Args>
+ static Status KeyError(Args&&... args) {
+ return Status::FromArgs(StatusCode::KeyError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status for type errors (such as mismatching data types)
+ template <typename... Args>
+ static Status TypeError(Args&&... args) {
+ return Status::FromArgs(StatusCode::TypeError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status for unknown errors
+ template <typename... Args>
+ static Status UnknownError(Args&&... args) {
+ return Status::FromArgs(StatusCode::UnknownError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status when an operation or a combination of operation and
+ /// data types is unimplemented
+ template <typename... Args>
+ static Status NotImplemented(Args&&... args) {
+ return Status::FromArgs(StatusCode::NotImplemented, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status for invalid data (for example a string that fails parsing)
+ template <typename... Args>
+ static Status Invalid(Args&&... args) {
+ return Status::FromArgs(StatusCode::Invalid, std::forward<Args>(args)...);
+ }
+
/// Return an error status for cancelled operation
template <typename... Args>
static Status Cancelled(Args&&... args) {
return Status::FromArgs(StatusCode::Cancelled, std::forward<Args>(args)...);
}
- /// Return an error status when an index is out of bounds
- template <typename... Args>
- static Status IndexError(Args&&... args) {
- return Status::FromArgs(StatusCode::IndexError, std::forward<Args>(args)...);
- }
-
- /// Return an error status when a container's capacity would exceed its limits
- template <typename... Args>
- static Status CapacityError(Args&&... args) {
- return Status::FromArgs(StatusCode::CapacityError, std::forward<Args>(args)...);
- }
-
- /// Return an error status when some IO-related operation failed
- template <typename... Args>
- static Status IOError(Args&&... args) {
- return Status::FromArgs(StatusCode::IOError, std::forward<Args>(args)...);
- }
-
- /// Return an error status when some (de)serialization operation failed
- template <typename... Args>
- static Status SerializationError(Args&&... args) {
- return Status::FromArgs(StatusCode::SerializationError, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- static Status RError(Args&&... args) {
- return Status::FromArgs(StatusCode::RError, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- static Status CodeGenError(Args&&... args) {
- return Status::FromArgs(StatusCode::CodeGenError, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- static Status ExpressionValidationError(Args&&... args) {
- return Status::FromArgs(StatusCode::ExpressionValidationError,
- std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- static Status ExecutionError(Args&&... args) {
- return Status::FromArgs(StatusCode::ExecutionError, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- static Status AlreadyExists(Args&&... args) {
- return Status::FromArgs(StatusCode::AlreadyExists, std::forward<Args>(args)...);
- }
-
- /// Return true iff the status indicates success.
- bool ok() const { return (state_ == NULLPTR); }
-
- /// Return true iff the status indicates an out-of-memory error.
- bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
- /// Return true iff the status indicates a key lookup error.
- bool IsKeyError() const { return code() == StatusCode::KeyError; }
- /// Return true iff the status indicates invalid data.
- bool IsInvalid() const { return code() == StatusCode::Invalid; }
+ /// Return an error status when an index is out of bounds
+ template <typename... Args>
+ static Status IndexError(Args&&... args) {
+ return Status::FromArgs(StatusCode::IndexError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status when a container's capacity would exceed its limits
+ template <typename... Args>
+ static Status CapacityError(Args&&... args) {
+ return Status::FromArgs(StatusCode::CapacityError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status when some IO-related operation failed
+ template <typename... Args>
+ static Status IOError(Args&&... args) {
+ return Status::FromArgs(StatusCode::IOError, std::forward<Args>(args)...);
+ }
+
+ /// Return an error status when some (de)serialization operation failed
+ template <typename... Args>
+ static Status SerializationError(Args&&... args) {
+ return Status::FromArgs(StatusCode::SerializationError, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ static Status RError(Args&&... args) {
+ return Status::FromArgs(StatusCode::RError, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ static Status CodeGenError(Args&&... args) {
+ return Status::FromArgs(StatusCode::CodeGenError, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ static Status ExpressionValidationError(Args&&... args) {
+ return Status::FromArgs(StatusCode::ExpressionValidationError,
+ std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ static Status ExecutionError(Args&&... args) {
+ return Status::FromArgs(StatusCode::ExecutionError, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ static Status AlreadyExists(Args&&... args) {
+ return Status::FromArgs(StatusCode::AlreadyExists, std::forward<Args>(args)...);
+ }
+
+ /// Return true iff the status indicates success.
+ bool ok() const { return (state_ == NULLPTR); }
+
+ /// Return true iff the status indicates an out-of-memory error.
+ bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
+ /// Return true iff the status indicates a key lookup error.
+ bool IsKeyError() const { return code() == StatusCode::KeyError; }
+ /// Return true iff the status indicates invalid data.
+ bool IsInvalid() const { return code() == StatusCode::Invalid; }
/// Return true iff the status indicates a cancelled operation.
bool IsCancelled() const { return code() == StatusCode::Cancelled; }
- /// Return true iff the status indicates an IO-related failure.
- bool IsIOError() const { return code() == StatusCode::IOError; }
- /// Return true iff the status indicates a container reaching capacity limits.
- bool IsCapacityError() const { return code() == StatusCode::CapacityError; }
- /// Return true iff the status indicates an out of bounds index.
- bool IsIndexError() const { return code() == StatusCode::IndexError; }
- /// Return true iff the status indicates a type error.
- bool IsTypeError() const { return code() == StatusCode::TypeError; }
- /// Return true iff the status indicates an unknown error.
- bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
- /// Return true iff the status indicates an unimplemented operation.
- bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
- /// Return true iff the status indicates a (de)serialization failure
- bool IsSerializationError() const { return code() == StatusCode::SerializationError; }
- /// Return true iff the status indicates a R-originated error.
- bool IsRError() const { return code() == StatusCode::RError; }
-
- bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; }
-
- bool IsExpressionValidationError() const {
- return code() == StatusCode::ExpressionValidationError;
- }
-
- bool IsExecutionError() const { return code() == StatusCode::ExecutionError; }
- bool IsAlreadyExists() const { return code() == StatusCode::AlreadyExists; }
-
- /// \brief Return a string representation of this status suitable for printing.
- ///
- /// The string "OK" is returned for success.
- std::string ToString() const;
-
- /// \brief Return a string representation of the status code, without the message
- /// text or POSIX code information.
- std::string CodeAsString() const;
- static std::string CodeAsString(StatusCode);
-
- /// \brief Return the StatusCode value attached to this status.
- StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
-
- /// \brief Return the specific error message attached to this status.
+ /// Return true iff the status indicates an IO-related failure.
+ bool IsIOError() const { return code() == StatusCode::IOError; }
+ /// Return true iff the status indicates a container reaching capacity limits.
+ bool IsCapacityError() const { return code() == StatusCode::CapacityError; }
+ /// Return true iff the status indicates an out of bounds index.
+ bool IsIndexError() const { return code() == StatusCode::IndexError; }
+ /// Return true iff the status indicates a type error.
+ bool IsTypeError() const { return code() == StatusCode::TypeError; }
+ /// Return true iff the status indicates an unknown error.
+ bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
+ /// Return true iff the status indicates an unimplemented operation.
+ bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
+ /// Return true iff the status indicates a (de)serialization failure
+ bool IsSerializationError() const { return code() == StatusCode::SerializationError; }
+ /// Return true iff the status indicates a R-originated error.
+ bool IsRError() const { return code() == StatusCode::RError; }
+
+ bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; }
+
+ bool IsExpressionValidationError() const {
+ return code() == StatusCode::ExpressionValidationError;
+ }
+
+ bool IsExecutionError() const { return code() == StatusCode::ExecutionError; }
+ bool IsAlreadyExists() const { return code() == StatusCode::AlreadyExists; }
+
+ /// \brief Return a string representation of this status suitable for printing.
+ ///
+ /// The string "OK" is returned for success.
+ std::string ToString() const;
+
+ /// \brief Return a string representation of the status code, without the message
+ /// text or POSIX code information.
+ std::string CodeAsString() const;
+ static std::string CodeAsString(StatusCode);
+
+ /// \brief Return the StatusCode value attached to this status.
+ StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
+
+ /// \brief Return the specific error message attached to this status.
const std::string& message() const {
static const std::string no_message = "";
return ok() ? no_message : state_->msg;
}
-
- /// \brief Return the status detail attached to this message.
- const std::shared_ptr<StatusDetail>& detail() const {
- static std::shared_ptr<StatusDetail> no_detail = NULLPTR;
- return state_ ? state_->detail : no_detail;
- }
-
- /// \brief Return a new Status copying the existing status, but
- /// updating with the existing detail.
- Status WithDetail(std::shared_ptr<StatusDetail> new_detail) const {
- return Status(code(), message(), std::move(new_detail));
- }
-
- /// \brief Return a new Status with changed message, copying the
- /// existing status code and detail.
- template <typename... Args>
- Status WithMessage(Args&&... args) const {
- return FromArgs(code(), std::forward<Args>(args)...).WithDetail(detail());
- }
-
- [[noreturn]] void Abort() const;
- [[noreturn]] void Abort(const std::string& message) const;
-
-#ifdef ARROW_EXTRA_ERROR_CONTEXT
- void AddContextLine(const char* filename, int line, const char* expr);
-#endif
-
- private:
- struct State {
- StatusCode code;
- std::string msg;
- std::shared_ptr<StatusDetail> detail;
- };
- // OK status has a `NULL` state_. Otherwise, `state_` points to
- // a `State` structure containing the error code and message(s)
- State* state_;
-
- void DeleteState() {
- delete state_;
- state_ = NULLPTR;
- }
- void CopyFrom(const Status& s);
- inline void MoveFrom(Status& s);
-};
-
-void Status::MoveFrom(Status& s) {
- delete state_;
- state_ = s.state_;
- s.state_ = NULLPTR;
-}
-
-Status::Status(const Status& s)
- : state_((s.state_ == NULLPTR) ? NULLPTR : new State(*s.state_)) {}
-
-Status& Status::operator=(const Status& s) {
- // The following condition catches both aliasing (when this == &s),
- // and the common case where both s and *this are ok.
- if (state_ != s.state_) {
- CopyFrom(s);
- }
- return *this;
-}
-
-Status::Status(Status&& s) noexcept : state_(s.state_) { s.state_ = NULLPTR; }
-
-Status& Status::operator=(Status&& s) noexcept {
- MoveFrom(s);
- return *this;
-}
-
-bool Status::Equals(const Status& s) const {
- if (state_ == s.state_) {
- return true;
- }
-
- if (ok() || s.ok()) {
- return false;
- }
-
- if (detail() != s.detail()) {
- if ((detail() && !s.detail()) || (!detail() && s.detail())) {
- return false;
- }
- return *detail() == *s.detail();
- }
-
- return code() == s.code() && message() == s.message();
-}
-
-/// \cond FALSE
-// (note: emits warnings on Doxygen < 1.8.15,
-// see https://github.com/doxygen/doxygen/issues/6295)
-Status Status::operator&(const Status& s) const noexcept {
- if (ok()) {
- return s;
- } else {
- return *this;
- }
-}
-
-Status Status::operator&(Status&& s) const noexcept {
- if (ok()) {
- return std::move(s);
- } else {
- return *this;
- }
-}
-
-Status& Status::operator&=(const Status& s) noexcept {
- if (ok() && !s.ok()) {
- CopyFrom(s);
- }
- return *this;
-}
-
-Status& Status::operator&=(Status&& s) noexcept {
- if (ok() && !s.ok()) {
- MoveFrom(s);
- }
- return *this;
-}
-/// \endcond
-
-namespace internal {
-
-// Extract Status from Status or Result<T>
-// Useful for the status check macros such as RETURN_NOT_OK.
+
+ /// \brief Return the status detail attached to this message.
+ const std::shared_ptr<StatusDetail>& detail() const {
+ static std::shared_ptr<StatusDetail> no_detail = NULLPTR;
+ return state_ ? state_->detail : no_detail;
+ }
+
+ /// \brief Return a new Status copying the existing status, but
+ /// updating with the existing detail.
+ Status WithDetail(std::shared_ptr<StatusDetail> new_detail) const {
+ return Status(code(), message(), std::move(new_detail));
+ }
+
+ /// \brief Return a new Status with changed message, copying the
+ /// existing status code and detail.
+ template <typename... Args>
+ Status WithMessage(Args&&... args) const {
+ return FromArgs(code(), std::forward<Args>(args)...).WithDetail(detail());
+ }
+
+ [[noreturn]] void Abort() const;
+ [[noreturn]] void Abort(const std::string& message) const;
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+ void AddContextLine(const char* filename, int line, const char* expr);
+#endif
+
+ private:
+ struct State {
+ StatusCode code;
+ std::string msg;
+ std::shared_ptr<StatusDetail> detail;
+ };
+ // OK status has a `NULL` state_. Otherwise, `state_` points to
+ // a `State` structure containing the error code and message(s)
+ State* state_;
+
+ void DeleteState() {
+ delete state_;
+ state_ = NULLPTR;
+ }
+ void CopyFrom(const Status& s);
+ inline void MoveFrom(Status& s);
+};
+
+void Status::MoveFrom(Status& s) {
+ delete state_;
+ state_ = s.state_;
+ s.state_ = NULLPTR;
+}
+
+Status::Status(const Status& s)
+ : state_((s.state_ == NULLPTR) ? NULLPTR : new State(*s.state_)) {}
+
+Status& Status::operator=(const Status& s) {
+ // The following condition catches both aliasing (when this == &s),
+ // and the common case where both s and *this are ok.
+ if (state_ != s.state_) {
+ CopyFrom(s);
+ }
+ return *this;
+}
+
+Status::Status(Status&& s) noexcept : state_(s.state_) { s.state_ = NULLPTR; }
+
+Status& Status::operator=(Status&& s) noexcept {
+ MoveFrom(s);
+ return *this;
+}
+
+bool Status::Equals(const Status& s) const {
+ if (state_ == s.state_) {
+ return true;
+ }
+
+ if (ok() || s.ok()) {
+ return false;
+ }
+
+ if (detail() != s.detail()) {
+ if ((detail() && !s.detail()) || (!detail() && s.detail())) {
+ return false;
+ }
+ return *detail() == *s.detail();
+ }
+
+ return code() == s.code() && message() == s.message();
+}
+
+/// \cond FALSE
+// (note: emits warnings on Doxygen < 1.8.15,
+// see https://github.com/doxygen/doxygen/issues/6295)
+Status Status::operator&(const Status& s) const noexcept {
+ if (ok()) {
+ return s;
+ } else {
+ return *this;
+ }
+}
+
+Status Status::operator&(Status&& s) const noexcept {
+ if (ok()) {
+ return std::move(s);
+ } else {
+ return *this;
+ }
+}
+
+Status& Status::operator&=(const Status& s) noexcept {
+ if (ok() && !s.ok()) {
+ CopyFrom(s);
+ }
+ return *this;
+}
+
+Status& Status::operator&=(Status&& s) noexcept {
+ if (ok() && !s.ok()) {
+ MoveFrom(s);
+ }
+ return *this;
+}
+/// \endcond
+
+namespace internal {
+
+// Extract Status from Status or Result<T>
+// Useful for the status check macros such as RETURN_NOT_OK.
inline const Status& GenericToStatus(const Status& st) { return st; }
-inline Status GenericToStatus(Status&& st) { return std::move(st); }
-
-} // namespace internal
-
-} // namespace arrow
+inline Status GenericToStatus(Status&& st) { return std::move(st); }
+
+} // namespace internal
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/stl_allocator.h b/contrib/libs/apache/arrow/cpp/src/arrow/stl_allocator.h
index b5ad2b53460..bb3c5b0ab38 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/stl_allocator.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/stl_allocator.h
@@ -1,153 +1,153 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cstddef>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/memory_pool.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace stl {
-
-/// \brief A STL allocator delegating allocations to a Arrow MemoryPool
-template <class T>
-class allocator {
- public:
- using value_type = T;
- using pointer = T*;
- using const_pointer = const T*;
- using reference = T&;
- using const_reference = const T&;
- using size_type = std::size_t;
- using difference_type = std::ptrdiff_t;
-
- template <class U>
- struct rebind {
- using other = allocator<U>;
- };
-
- /// \brief Construct an allocator from the default MemoryPool
- allocator() noexcept : pool_(default_memory_pool()) {}
- /// \brief Construct an allocator from the given MemoryPool
- explicit allocator(MemoryPool* pool) noexcept : pool_(pool) {}
-
- template <class U>
- allocator(const allocator<U>& rhs) noexcept : pool_(rhs.pool()) {}
-
- ~allocator() { pool_ = NULLPTR; }
-
- pointer address(reference r) const noexcept { return std::addressof(r); }
-
- const_pointer address(const_reference r) const noexcept { return std::addressof(r); }
-
- pointer allocate(size_type n, const void* /*hint*/ = NULLPTR) {
- uint8_t* data;
- Status s = pool_->Allocate(n * sizeof(T), &data);
- if (!s.ok()) throw std::bad_alloc();
- return reinterpret_cast<pointer>(data);
- }
-
- void deallocate(pointer p, size_type n) {
- pool_->Free(reinterpret_cast<uint8_t*>(p), n * sizeof(T));
- }
-
- size_type size_max() const noexcept { return size_type(-1) / sizeof(T); }
-
- template <class U, class... Args>
- void construct(U* p, Args&&... args) {
- new (reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...);
- }
-
- template <class U>
- void destroy(U* p) {
- p->~U();
- }
-
- MemoryPool* pool() const noexcept { return pool_; }
-
- private:
- MemoryPool* pool_;
-};
-
-/// \brief A MemoryPool implementation delegating allocations to a STL allocator
-///
-/// Note that STL allocators don't provide a resizing operation, and therefore
-/// any buffer resizes will do a full reallocation and copy.
-template <typename Allocator = std::allocator<uint8_t>>
-class STLMemoryPool : public MemoryPool {
- public:
- /// \brief Construct a memory pool from the given allocator
- explicit STLMemoryPool(const Allocator& alloc) : alloc_(alloc) {}
-
- Status Allocate(int64_t size, uint8_t** out) override {
- try {
- *out = alloc_.allocate(size);
- } catch (std::bad_alloc& e) {
- return Status::OutOfMemory(e.what());
- }
- stats_.UpdateAllocatedBytes(size);
- return Status::OK();
- }
-
- Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
- uint8_t* old_ptr = *ptr;
- try {
- *ptr = alloc_.allocate(new_size);
- } catch (std::bad_alloc& e) {
- return Status::OutOfMemory(e.what());
- }
- memcpy(*ptr, old_ptr, std::min(old_size, new_size));
- alloc_.deallocate(old_ptr, old_size);
- stats_.UpdateAllocatedBytes(new_size - old_size);
- return Status::OK();
- }
-
- void Free(uint8_t* buffer, int64_t size) override {
- alloc_.deallocate(buffer, size);
- stats_.UpdateAllocatedBytes(-size);
- }
-
- int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
-
- int64_t max_memory() const override { return stats_.max_memory(); }
-
- std::string backend_name() const override { return "stl"; }
-
- private:
- Allocator alloc_;
- arrow::internal::MemoryPoolStats stats_;
-};
-
-template <class T1, class T2>
-bool operator==(const allocator<T1>& lhs, const allocator<T2>& rhs) noexcept {
- return lhs.pool() == rhs.pool();
-}
-
-template <class T1, class T2>
-bool operator!=(const allocator<T1>& lhs, const allocator<T2>& rhs) noexcept {
- return !(lhs == rhs);
-}
-
-} // namespace stl
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/memory_pool.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace stl {
+
+/// \brief A STL allocator delegating allocations to a Arrow MemoryPool
+template <class T>
+class allocator {
+ public:
+ using value_type = T;
+ using pointer = T*;
+ using const_pointer = const T*;
+ using reference = T&;
+ using const_reference = const T&;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+
+ template <class U>
+ struct rebind {
+ using other = allocator<U>;
+ };
+
+ /// \brief Construct an allocator from the default MemoryPool
+ allocator() noexcept : pool_(default_memory_pool()) {}
+ /// \brief Construct an allocator from the given MemoryPool
+ explicit allocator(MemoryPool* pool) noexcept : pool_(pool) {}
+
+ template <class U>
+ allocator(const allocator<U>& rhs) noexcept : pool_(rhs.pool()) {}
+
+ ~allocator() { pool_ = NULLPTR; }
+
+ pointer address(reference r) const noexcept { return std::addressof(r); }
+
+ const_pointer address(const_reference r) const noexcept { return std::addressof(r); }
+
+ pointer allocate(size_type n, const void* /*hint*/ = NULLPTR) {
+ uint8_t* data;
+ Status s = pool_->Allocate(n * sizeof(T), &data);
+ if (!s.ok()) throw std::bad_alloc();
+ return reinterpret_cast<pointer>(data);
+ }
+
+ void deallocate(pointer p, size_type n) {
+ pool_->Free(reinterpret_cast<uint8_t*>(p), n * sizeof(T));
+ }
+
+ size_type size_max() const noexcept { return size_type(-1) / sizeof(T); }
+
+ template <class U, class... Args>
+ void construct(U* p, Args&&... args) {
+ new (reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...);
+ }
+
+ template <class U>
+ void destroy(U* p) {
+ p->~U();
+ }
+
+ MemoryPool* pool() const noexcept { return pool_; }
+
+ private:
+ MemoryPool* pool_;
+};
+
+/// \brief A MemoryPool implementation delegating allocations to a STL allocator
+///
+/// Note that STL allocators don't provide a resizing operation, and therefore
+/// any buffer resizes will do a full reallocation and copy.
+template <typename Allocator = std::allocator<uint8_t>>
+class STLMemoryPool : public MemoryPool {
+ public:
+ /// \brief Construct a memory pool from the given allocator
+ explicit STLMemoryPool(const Allocator& alloc) : alloc_(alloc) {}
+
+ Status Allocate(int64_t size, uint8_t** out) override {
+ try {
+ *out = alloc_.allocate(size);
+ } catch (std::bad_alloc& e) {
+ return Status::OutOfMemory(e.what());
+ }
+ stats_.UpdateAllocatedBytes(size);
+ return Status::OK();
+ }
+
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+ uint8_t* old_ptr = *ptr;
+ try {
+ *ptr = alloc_.allocate(new_size);
+ } catch (std::bad_alloc& e) {
+ return Status::OutOfMemory(e.what());
+ }
+ memcpy(*ptr, old_ptr, std::min(old_size, new_size));
+ alloc_.deallocate(old_ptr, old_size);
+ stats_.UpdateAllocatedBytes(new_size - old_size);
+ return Status::OK();
+ }
+
+ void Free(uint8_t* buffer, int64_t size) override {
+ alloc_.deallocate(buffer, size);
+ stats_.UpdateAllocatedBytes(-size);
+ }
+
+ int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
+
+ int64_t max_memory() const override { return stats_.max_memory(); }
+
+ std::string backend_name() const override { return "stl"; }
+
+ private:
+ Allocator alloc_;
+ arrow::internal::MemoryPoolStats stats_;
+};
+
+template <class T1, class T2>
+bool operator==(const allocator<T1>& lhs, const allocator<T2>& rhs) noexcept {
+ return lhs.pool() == rhs.pool();
+}
+
+template <class T1, class T2>
+bool operator!=(const allocator<T1>& lhs, const allocator<T2>& rhs) noexcept {
+ return !(lhs == rhs);
+}
+
+} // namespace stl
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/symbols.map b/contrib/libs/apache/arrow/cpp/src/arrow/symbols.map
index 7262cc6a898..3c7e7e71f44 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/symbols.map
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/symbols.map
@@ -1,38 +1,38 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{
- global:
- extern "C++" {
- # The leading asterisk is required for symbols such as
- # "typeinfo for arrow::SomeClass".
- # Unfortunately this will also catch template specializations
- # (from e.g. STL or Flatbuffers) involving Arrow types.
- *arrow::*;
- *arrow_vendored::*;
- };
- # Also export C-level helpers
- arrow_*;
- pyarrow_*;
-
- # Symbols marked as 'local' are not exported by the DSO and thus may not
- # be used by client applications. Everything except the above falls here.
- # This ensures we hide symbols of static dependencies.
- local:
- *;
-
-};
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+ global:
+ extern "C++" {
+ # The leading asterisk is required for symbols such as
+ # "typeinfo for arrow::SomeClass".
+ # Unfortunately this will also catch template specializations
+ # (from e.g. STL or Flatbuffers) involving Arrow types.
+ *arrow::*;
+ *arrow_vendored::*;
+ };
+ # Also export C-level helpers
+ arrow_*;
+ pyarrow_*;
+
+ # Symbols marked as 'local' are not exported by the DSO and thus may not
+ # be used by client applications. Everything except the above falls here.
+ # This ensures we hide symbols of static dependencies.
+ local:
+ *;
+
+};
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table.cc b/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
index d4c7802c834..3b58d93d6f6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
@@ -1,640 +1,640 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table.h"
-
-#include <algorithm>
-#include <cstdlib>
-#include <limits>
-#include <memory>
-#include <sstream>
-#include <utility>
-
-#include "arrow/array/array_base.h"
-#include "arrow/array/array_binary.h"
-#include "arrow/array/array_nested.h"
-#include "arrow/array/concatenate.h"
-#include "arrow/array/util.h"
-#include "arrow/chunked_array.h"
-#include "arrow/pretty_print.h"
-#include "arrow/record_batch.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/vector.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-class KeyValueMetadata;
-class MemoryPool;
-struct ArrayData;
-
-// ----------------------------------------------------------------------
-// Table methods
-
-/// \class SimpleTable
-/// \brief A basic, non-lazy in-memory table, like SimpleRecordBatch
-class SimpleTable : public Table {
- public:
- SimpleTable(std::shared_ptr<Schema> schema,
- std::vector<std::shared_ptr<ChunkedArray>> columns, int64_t num_rows = -1)
- : columns_(std::move(columns)) {
- schema_ = std::move(schema);
- if (num_rows < 0) {
- if (columns_.size() == 0) {
- num_rows_ = 0;
- } else {
- num_rows_ = columns_[0]->length();
- }
- } else {
- num_rows_ = num_rows;
- }
- }
-
- SimpleTable(std::shared_ptr<Schema> schema,
- const std::vector<std::shared_ptr<Array>>& columns, int64_t num_rows = -1) {
- schema_ = std::move(schema);
- if (num_rows < 0) {
- if (columns.size() == 0) {
- num_rows_ = 0;
- } else {
- num_rows_ = columns[0]->length();
- }
- } else {
- num_rows_ = num_rows;
- }
-
- columns_.resize(columns.size());
- for (size_t i = 0; i < columns.size(); ++i) {
- columns_[i] = std::make_shared<ChunkedArray>(columns[i]);
- }
- }
-
- std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/array/util.h"
+#include "arrow/chunked_array.h"
+#include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/vector.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+class KeyValueMetadata;
+class MemoryPool;
+struct ArrayData;
+
+// ----------------------------------------------------------------------
+// Table methods
+
+/// \class SimpleTable
+/// \brief A basic, non-lazy in-memory table, like SimpleRecordBatch
+class SimpleTable : public Table {
+ public:
+ SimpleTable(std::shared_ptr<Schema> schema,
+ std::vector<std::shared_ptr<ChunkedArray>> columns, int64_t num_rows = -1)
+ : columns_(std::move(columns)) {
+ schema_ = std::move(schema);
+ if (num_rows < 0) {
+ if (columns_.size() == 0) {
+ num_rows_ = 0;
+ } else {
+ num_rows_ = columns_[0]->length();
+ }
+ } else {
+ num_rows_ = num_rows;
+ }
+ }
+
+ SimpleTable(std::shared_ptr<Schema> schema,
+ const std::vector<std::shared_ptr<Array>>& columns, int64_t num_rows = -1) {
+ schema_ = std::move(schema);
+ if (num_rows < 0) {
+ if (columns.size() == 0) {
+ num_rows_ = 0;
+ } else {
+ num_rows_ = columns[0]->length();
+ }
+ } else {
+ num_rows_ = num_rows;
+ }
+
+ columns_.resize(columns.size());
+ for (size_t i = 0; i < columns.size(); ++i) {
+ columns_[i] = std::make_shared<ChunkedArray>(columns[i]);
+ }
+ }
+
+ std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
+
const std::vector<std::shared_ptr<ChunkedArray>>& columns() const override {
return columns_;
}
- std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
- auto sliced = columns_;
- int64_t num_rows = length;
- for (auto& column : sliced) {
- column = column->Slice(offset, length);
- num_rows = column->length();
- }
+ std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
+ auto sliced = columns_;
+ int64_t num_rows = length;
+ for (auto& column : sliced) {
+ column = column->Slice(offset, length);
+ num_rows = column->length();
+ }
return Table::Make(schema_, std::move(sliced), num_rows);
- }
-
- Result<std::shared_ptr<Table>> RemoveColumn(int i) const override {
- ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
-
+ }
+
+ Result<std::shared_ptr<Table>> RemoveColumn(int i) const override {
+ ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
+
return Table::Make(std::move(new_schema), internal::DeleteVectorElement(columns_, i),
- this->num_rows());
- }
-
- Result<std::shared_ptr<Table>> AddColumn(
- int i, std::shared_ptr<Field> field_arg,
- std::shared_ptr<ChunkedArray> col) const override {
- DCHECK(col != nullptr);
-
- if (col->length() != num_rows_) {
- return Status::Invalid(
- "Added column's length must match table's length. Expected length ", num_rows_,
- " but got length ", col->length());
- }
-
- if (!field_arg->type()->Equals(col->type())) {
- return Status::Invalid("Field type did not match data type");
- }
-
- ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field_arg));
+ this->num_rows());
+ }
+
+ Result<std::shared_ptr<Table>> AddColumn(
+ int i, std::shared_ptr<Field> field_arg,
+ std::shared_ptr<ChunkedArray> col) const override {
+ DCHECK(col != nullptr);
+
+ if (col->length() != num_rows_) {
+ return Status::Invalid(
+ "Added column's length must match table's length. Expected length ", num_rows_,
+ " but got length ", col->length());
+ }
+
+ if (!field_arg->type()->Equals(col->type())) {
+ return Status::Invalid("Field type did not match data type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field_arg));
return Table::Make(std::move(new_schema),
- internal::AddVectorElement(columns_, i, std::move(col)));
- }
-
- Result<std::shared_ptr<Table>> SetColumn(
- int i, std::shared_ptr<Field> field_arg,
- std::shared_ptr<ChunkedArray> col) const override {
- DCHECK(col != nullptr);
-
- if (col->length() != num_rows_) {
- return Status::Invalid(
- "Added column's length must match table's length. Expected length ", num_rows_,
- " but got length ", col->length());
- }
-
- if (!field_arg->type()->Equals(col->type())) {
- return Status::Invalid("Field type did not match data type");
- }
-
- ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field_arg));
+ internal::AddVectorElement(columns_, i, std::move(col)));
+ }
+
+ Result<std::shared_ptr<Table>> SetColumn(
+ int i, std::shared_ptr<Field> field_arg,
+ std::shared_ptr<ChunkedArray> col) const override {
+ DCHECK(col != nullptr);
+
+ if (col->length() != num_rows_) {
+ return Status::Invalid(
+ "Added column's length must match table's length. Expected length ", num_rows_,
+ " but got length ", col->length());
+ }
+
+ if (!field_arg->type()->Equals(col->type())) {
+ return Status::Invalid("Field type did not match data type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field_arg));
return Table::Make(std::move(new_schema),
- internal::ReplaceVectorElement(columns_, i, std::move(col)));
- }
-
- std::shared_ptr<Table> ReplaceSchemaMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
- auto new_schema = schema_->WithMetadata(metadata);
+ internal::ReplaceVectorElement(columns_, i, std::move(col)));
+ }
+
+ std::shared_ptr<Table> ReplaceSchemaMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
+ auto new_schema = schema_->WithMetadata(metadata);
return Table::Make(std::move(new_schema), columns_);
- }
-
- Result<std::shared_ptr<Table>> Flatten(MemoryPool* pool) const override {
- std::vector<std::shared_ptr<Field>> flattened_fields;
- std::vector<std::shared_ptr<ChunkedArray>> flattened_columns;
- for (int i = 0; i < num_columns(); ++i) {
- std::vector<std::shared_ptr<Field>> new_fields = field(i)->Flatten();
- ARROW_ASSIGN_OR_RAISE(auto new_columns, column(i)->Flatten(pool));
- DCHECK_EQ(new_columns.size(), new_fields.size());
- for (size_t j = 0; j < new_columns.size(); ++j) {
- flattened_fields.push_back(new_fields[j]);
- flattened_columns.push_back(new_columns[j]);
- }
- }
- auto flattened_schema =
- std::make_shared<Schema>(std::move(flattened_fields), schema_->metadata());
- return Table::Make(std::move(flattened_schema), std::move(flattened_columns));
- }
-
- Status Validate() const override {
- RETURN_NOT_OK(ValidateMeta());
- for (int i = 0; i < num_columns(); ++i) {
- const ChunkedArray* col = columns_[i].get();
- Status st = col->Validate();
- if (!st.ok()) {
- std::stringstream ss;
- ss << "Column " << i << ": " << st.message();
- return st.WithMessage(ss.str());
- }
- }
- return Status::OK();
- }
-
- Status ValidateFull() const override {
- RETURN_NOT_OK(ValidateMeta());
- for (int i = 0; i < num_columns(); ++i) {
- const ChunkedArray* col = columns_[i].get();
- Status st = col->ValidateFull();
- if (!st.ok()) {
- std::stringstream ss;
- ss << "Column " << i << ": " << st.message();
- return st.WithMessage(ss.str());
- }
- }
- return Status::OK();
- }
-
- protected:
- Status ValidateMeta() const {
- // Make sure columns and schema are consistent
- if (static_cast<int>(columns_.size()) != schema_->num_fields()) {
- return Status::Invalid("Number of columns did not match schema");
- }
- for (int i = 0; i < num_columns(); ++i) {
- const ChunkedArray* col = columns_[i].get();
- if (col == nullptr) {
- return Status::Invalid("Column ", i, " was null");
- }
- if (!col->type()->Equals(*schema_->field(i)->type())) {
- return Status::Invalid("Column data for field ", i, " with type ",
- col->type()->ToString(), " is inconsistent with schema ",
- schema_->field(i)->type()->ToString());
- }
- }
-
- // Make sure columns are all the same length, and validate them
- for (int i = 0; i < num_columns(); ++i) {
- const ChunkedArray* col = columns_[i].get();
- if (col->length() != num_rows_) {
- return Status::Invalid("Column ", i, " named ", field(i)->name(),
- " expected length ", num_rows_, " but got length ",
- col->length());
- }
- Status st = col->Validate();
- if (!st.ok()) {
- std::stringstream ss;
- ss << "Column " << i << ": " << st.message();
- return st.WithMessage(ss.str());
- }
- }
- return Status::OK();
- }
-
- private:
- std::vector<std::shared_ptr<ChunkedArray>> columns_;
-};
-
-Table::Table() : num_rows_(0) {}
-
-std::vector<std::shared_ptr<Field>> Table::fields() const {
- std::vector<std::shared_ptr<Field>> result;
- for (int i = 0; i < this->num_columns(); ++i) {
- result.emplace_back(this->field(i));
- }
- return result;
-}
-
-std::shared_ptr<Table> Table::Make(std::shared_ptr<Schema> schema,
- std::vector<std::shared_ptr<ChunkedArray>> columns,
- int64_t num_rows) {
- return std::make_shared<SimpleTable>(std::move(schema), std::move(columns), num_rows);
-}
-
-std::shared_ptr<Table> Table::Make(std::shared_ptr<Schema> schema,
- const std::vector<std::shared_ptr<Array>>& arrays,
- int64_t num_rows) {
- return std::make_shared<SimpleTable>(std::move(schema), arrays, num_rows);
-}
-
-Result<std::shared_ptr<Table>> Table::FromRecordBatchReader(RecordBatchReader* reader) {
- std::shared_ptr<Table> table = nullptr;
- RETURN_NOT_OK(reader->ReadAll(&table));
- return table;
-}
-
-Result<std::shared_ptr<Table>> Table::FromRecordBatches(
- std::shared_ptr<Schema> schema,
- const std::vector<std::shared_ptr<RecordBatch>>& batches) {
- const int nbatches = static_cast<int>(batches.size());
- const int ncolumns = static_cast<int>(schema->num_fields());
-
- int64_t num_rows = 0;
- for (int i = 0; i < nbatches; ++i) {
- if (!batches[i]->schema()->Equals(*schema, false)) {
- return Status::Invalid("Schema at index ", static_cast<int>(i),
- " was different: \n", schema->ToString(), "\nvs\n",
- batches[i]->schema()->ToString());
- }
- num_rows += batches[i]->num_rows();
- }
-
- std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
- std::vector<std::shared_ptr<Array>> column_arrays(nbatches);
-
- for (int i = 0; i < ncolumns; ++i) {
- for (int j = 0; j < nbatches; ++j) {
- column_arrays[j] = batches[j]->column(i);
- }
- columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
- }
-
- return Table::Make(std::move(schema), std::move(columns), num_rows);
-}
-
-Result<std::shared_ptr<Table>> Table::FromRecordBatches(
- const std::vector<std::shared_ptr<RecordBatch>>& batches) {
- if (batches.size() == 0) {
- return Status::Invalid("Must pass at least one record batch or an explicit Schema");
- }
-
- return FromRecordBatches(batches[0]->schema(), batches);
-}
-
-Result<std::shared_ptr<Table>> Table::FromChunkedStructArray(
- const std::shared_ptr<ChunkedArray>& array) {
- auto type = array->type();
- if (type->id() != Type::STRUCT) {
- return Status::Invalid("Expected a chunked struct array, got ", *type);
- }
- int num_columns = type->num_fields();
- int num_chunks = array->num_chunks();
-
- const auto& struct_chunks = array->chunks();
- std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns);
- for (int i = 0; i < num_columns; ++i) {
- ArrayVector chunks(num_chunks);
- std::transform(struct_chunks.begin(), struct_chunks.end(), chunks.begin(),
- [i](const std::shared_ptr<Array>& struct_chunk) {
- return static_cast<const StructArray&>(*struct_chunk).field(i);
- });
- columns[i] = std::make_shared<ChunkedArray>(std::move(chunks));
- }
-
- return Table::Make(::arrow::schema(type->fields()), std::move(columns),
- array->length());
-}
-
-std::vector<std::string> Table::ColumnNames() const {
- std::vector<std::string> names(num_columns());
- for (int i = 0; i < num_columns(); ++i) {
- names[i] = field(i)->name();
- }
- return names;
-}
-
-Result<std::shared_ptr<Table>> Table::RenameColumns(
- const std::vector<std::string>& names) const {
- if (names.size() != static_cast<size_t>(num_columns())) {
- return Status::Invalid("tried to rename a table of ", num_columns(),
- " columns but only ", names.size(), " names were provided");
- }
- std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns());
- std::vector<std::shared_ptr<Field>> fields(num_columns());
- for (int i = 0; i < num_columns(); ++i) {
- columns[i] = column(i);
- fields[i] = field(i)->WithName(names[i]);
- }
- return Table::Make(::arrow::schema(std::move(fields)), std::move(columns), num_rows());
-}
-
-Result<std::shared_ptr<Table>> Table::SelectColumns(
- const std::vector<int>& indices) const {
- int n = static_cast<int>(indices.size());
-
- std::vector<std::shared_ptr<ChunkedArray>> columns(n);
- std::vector<std::shared_ptr<Field>> fields(n);
- for (int i = 0; i < n; i++) {
- int pos = indices[i];
- if (pos < 0 || pos > num_columns() - 1) {
- return Status::Invalid("Invalid column index ", pos, " to select columns.");
- }
- columns[i] = column(pos);
- fields[i] = field(pos);
- }
-
- auto new_schema =
- std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
+ }
+
+ Result<std::shared_ptr<Table>> Flatten(MemoryPool* pool) const override {
+ std::vector<std::shared_ptr<Field>> flattened_fields;
+ std::vector<std::shared_ptr<ChunkedArray>> flattened_columns;
+ for (int i = 0; i < num_columns(); ++i) {
+ std::vector<std::shared_ptr<Field>> new_fields = field(i)->Flatten();
+ ARROW_ASSIGN_OR_RAISE(auto new_columns, column(i)->Flatten(pool));
+ DCHECK_EQ(new_columns.size(), new_fields.size());
+ for (size_t j = 0; j < new_columns.size(); ++j) {
+ flattened_fields.push_back(new_fields[j]);
+ flattened_columns.push_back(new_columns[j]);
+ }
+ }
+ auto flattened_schema =
+ std::make_shared<Schema>(std::move(flattened_fields), schema_->metadata());
+ return Table::Make(std::move(flattened_schema), std::move(flattened_columns));
+ }
+
+ Status Validate() const override {
+ RETURN_NOT_OK(ValidateMeta());
+ for (int i = 0; i < num_columns(); ++i) {
+ const ChunkedArray* col = columns_[i].get();
+ Status st = col->Validate();
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Column " << i << ": " << st.message();
+ return st.WithMessage(ss.str());
+ }
+ }
+ return Status::OK();
+ }
+
+ Status ValidateFull() const override {
+ RETURN_NOT_OK(ValidateMeta());
+ for (int i = 0; i < num_columns(); ++i) {
+ const ChunkedArray* col = columns_[i].get();
+ Status st = col->ValidateFull();
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Column " << i << ": " << st.message();
+ return st.WithMessage(ss.str());
+ }
+ }
+ return Status::OK();
+ }
+
+ protected:
+ Status ValidateMeta() const {
+ // Make sure columns and schema are consistent
+ if (static_cast<int>(columns_.size()) != schema_->num_fields()) {
+ return Status::Invalid("Number of columns did not match schema");
+ }
+ for (int i = 0; i < num_columns(); ++i) {
+ const ChunkedArray* col = columns_[i].get();
+ if (col == nullptr) {
+ return Status::Invalid("Column ", i, " was null");
+ }
+ if (!col->type()->Equals(*schema_->field(i)->type())) {
+ return Status::Invalid("Column data for field ", i, " with type ",
+ col->type()->ToString(), " is inconsistent with schema ",
+ schema_->field(i)->type()->ToString());
+ }
+ }
+
+ // Make sure columns are all the same length, and validate them
+ for (int i = 0; i < num_columns(); ++i) {
+ const ChunkedArray* col = columns_[i].get();
+ if (col->length() != num_rows_) {
+ return Status::Invalid("Column ", i, " named ", field(i)->name(),
+ " expected length ", num_rows_, " but got length ",
+ col->length());
+ }
+ Status st = col->Validate();
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Column " << i << ": " << st.message();
+ return st.WithMessage(ss.str());
+ }
+ }
+ return Status::OK();
+ }
+
+ private:
+ std::vector<std::shared_ptr<ChunkedArray>> columns_;
+};
+
+Table::Table() : num_rows_(0) {}
+
+std::vector<std::shared_ptr<Field>> Table::fields() const {
+ std::vector<std::shared_ptr<Field>> result;
+ for (int i = 0; i < this->num_columns(); ++i) {
+ result.emplace_back(this->field(i));
+ }
+ return result;
+}
+
+std::shared_ptr<Table> Table::Make(std::shared_ptr<Schema> schema,
+ std::vector<std::shared_ptr<ChunkedArray>> columns,
+ int64_t num_rows) {
+ return std::make_shared<SimpleTable>(std::move(schema), std::move(columns), num_rows);
+}
+
+std::shared_ptr<Table> Table::Make(std::shared_ptr<Schema> schema,
+ const std::vector<std::shared_ptr<Array>>& arrays,
+ int64_t num_rows) {
+ return std::make_shared<SimpleTable>(std::move(schema), arrays, num_rows);
+}
+
+Result<std::shared_ptr<Table>> Table::FromRecordBatchReader(RecordBatchReader* reader) {
+ std::shared_ptr<Table> table = nullptr;
+ RETURN_NOT_OK(reader->ReadAll(&table));
+ return table;
+}
+
+Result<std::shared_ptr<Table>> Table::FromRecordBatches(
+ std::shared_ptr<Schema> schema,
+ const std::vector<std::shared_ptr<RecordBatch>>& batches) {
+ const int nbatches = static_cast<int>(batches.size());
+ const int ncolumns = static_cast<int>(schema->num_fields());
+
+ int64_t num_rows = 0;
+ for (int i = 0; i < nbatches; ++i) {
+ if (!batches[i]->schema()->Equals(*schema, false)) {
+ return Status::Invalid("Schema at index ", static_cast<int>(i),
+ " was different: \n", schema->ToString(), "\nvs\n",
+ batches[i]->schema()->ToString());
+ }
+ num_rows += batches[i]->num_rows();
+ }
+
+ std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
+ std::vector<std::shared_ptr<Array>> column_arrays(nbatches);
+
+ for (int i = 0; i < ncolumns; ++i) {
+ for (int j = 0; j < nbatches; ++j) {
+ column_arrays[j] = batches[j]->column(i);
+ }
+ columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
+ }
+
+ return Table::Make(std::move(schema), std::move(columns), num_rows);
+}
+
+Result<std::shared_ptr<Table>> Table::FromRecordBatches(
+ const std::vector<std::shared_ptr<RecordBatch>>& batches) {
+ if (batches.size() == 0) {
+ return Status::Invalid("Must pass at least one record batch or an explicit Schema");
+ }
+
+ return FromRecordBatches(batches[0]->schema(), batches);
+}
+
+Result<std::shared_ptr<Table>> Table::FromChunkedStructArray(
+ const std::shared_ptr<ChunkedArray>& array) {
+ auto type = array->type();
+ if (type->id() != Type::STRUCT) {
+ return Status::Invalid("Expected a chunked struct array, got ", *type);
+ }
+ int num_columns = type->num_fields();
+ int num_chunks = array->num_chunks();
+
+ const auto& struct_chunks = array->chunks();
+ std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns);
+ for (int i = 0; i < num_columns; ++i) {
+ ArrayVector chunks(num_chunks);
+ std::transform(struct_chunks.begin(), struct_chunks.end(), chunks.begin(),
+ [i](const std::shared_ptr<Array>& struct_chunk) {
+ return static_cast<const StructArray&>(*struct_chunk).field(i);
+ });
+ columns[i] = std::make_shared<ChunkedArray>(std::move(chunks));
+ }
+
+ return Table::Make(::arrow::schema(type->fields()), std::move(columns),
+ array->length());
+}
+
+std::vector<std::string> Table::ColumnNames() const {
+ std::vector<std::string> names(num_columns());
+ for (int i = 0; i < num_columns(); ++i) {
+ names[i] = field(i)->name();
+ }
+ return names;
+}
+
+Result<std::shared_ptr<Table>> Table::RenameColumns(
+ const std::vector<std::string>& names) const {
+ if (names.size() != static_cast<size_t>(num_columns())) {
+ return Status::Invalid("tried to rename a table of ", num_columns(),
+ " columns but only ", names.size(), " names were provided");
+ }
+ std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns());
+ std::vector<std::shared_ptr<Field>> fields(num_columns());
+ for (int i = 0; i < num_columns(); ++i) {
+ columns[i] = column(i);
+ fields[i] = field(i)->WithName(names[i]);
+ }
+ return Table::Make(::arrow::schema(std::move(fields)), std::move(columns), num_rows());
+}
+
+Result<std::shared_ptr<Table>> Table::SelectColumns(
+ const std::vector<int>& indices) const {
+ int n = static_cast<int>(indices.size());
+
+ std::vector<std::shared_ptr<ChunkedArray>> columns(n);
+ std::vector<std::shared_ptr<Field>> fields(n);
+ for (int i = 0; i < n; i++) {
+ int pos = indices[i];
+ if (pos < 0 || pos > num_columns() - 1) {
+ return Status::Invalid("Invalid column index ", pos, " to select columns.");
+ }
+ columns[i] = column(pos);
+ fields[i] = field(pos);
+ }
+
+ auto new_schema =
+ std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
return Table::Make(std::move(new_schema), std::move(columns), num_rows());
-}
-
-std::string Table::ToString() const {
- std::stringstream ss;
- ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
- return ss.str();
-}
-
-Result<std::shared_ptr<Table>> ConcatenateTables(
- const std::vector<std::shared_ptr<Table>>& tables,
- const ConcatenateTablesOptions options, MemoryPool* memory_pool) {
- if (tables.size() == 0) {
- return Status::Invalid("Must pass at least one table");
- }
-
- std::vector<std::shared_ptr<Table>> promoted_tables;
- const std::vector<std::shared_ptr<Table>>* tables_to_concat = &tables;
- if (options.unify_schemas) {
- std::vector<std::shared_ptr<Schema>> schemas;
- schemas.reserve(tables.size());
- for (const auto& t : tables) {
- schemas.push_back(t->schema());
- }
-
- ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Schema> unified_schema,
- UnifySchemas(schemas, options.field_merge_options));
-
- promoted_tables.reserve(tables.size());
- for (const auto& t : tables) {
- promoted_tables.emplace_back();
- ARROW_ASSIGN_OR_RAISE(promoted_tables.back(),
- PromoteTableToSchema(t, unified_schema, memory_pool));
- }
- tables_to_concat = &promoted_tables;
- } else {
- auto first_schema = tables[0]->schema();
- for (size_t i = 1; i < tables.size(); ++i) {
- if (!tables[i]->schema()->Equals(*first_schema, false)) {
- return Status::Invalid("Schema at index ", i, " was different: \n",
- first_schema->ToString(), "\nvs\n",
- tables[i]->schema()->ToString());
- }
- }
- }
-
- std::shared_ptr<Schema> schema = tables_to_concat->front()->schema();
-
- const int ncolumns = schema->num_fields();
-
- std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
- for (int i = 0; i < ncolumns; ++i) {
- std::vector<std::shared_ptr<Array>> column_arrays;
- for (const auto& table : *tables_to_concat) {
- const std::vector<std::shared_ptr<Array>>& chunks = table->column(i)->chunks();
- for (const auto& chunk : chunks) {
- column_arrays.push_back(chunk);
- }
- }
- columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
- }
+}
+
+std::string Table::ToString() const {
+ std::stringstream ss;
+ ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));
+ return ss.str();
+}
+
+Result<std::shared_ptr<Table>> ConcatenateTables(
+ const std::vector<std::shared_ptr<Table>>& tables,
+ const ConcatenateTablesOptions options, MemoryPool* memory_pool) {
+ if (tables.size() == 0) {
+ return Status::Invalid("Must pass at least one table");
+ }
+
+ std::vector<std::shared_ptr<Table>> promoted_tables;
+ const std::vector<std::shared_ptr<Table>>* tables_to_concat = &tables;
+ if (options.unify_schemas) {
+ std::vector<std::shared_ptr<Schema>> schemas;
+ schemas.reserve(tables.size());
+ for (const auto& t : tables) {
+ schemas.push_back(t->schema());
+ }
+
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Schema> unified_schema,
+ UnifySchemas(schemas, options.field_merge_options));
+
+ promoted_tables.reserve(tables.size());
+ for (const auto& t : tables) {
+ promoted_tables.emplace_back();
+ ARROW_ASSIGN_OR_RAISE(promoted_tables.back(),
+ PromoteTableToSchema(t, unified_schema, memory_pool));
+ }
+ tables_to_concat = &promoted_tables;
+ } else {
+ auto first_schema = tables[0]->schema();
+ for (size_t i = 1; i < tables.size(); ++i) {
+ if (!tables[i]->schema()->Equals(*first_schema, false)) {
+ return Status::Invalid("Schema at index ", i, " was different: \n",
+ first_schema->ToString(), "\nvs\n",
+ tables[i]->schema()->ToString());
+ }
+ }
+ }
+
+ std::shared_ptr<Schema> schema = tables_to_concat->front()->schema();
+
+ const int ncolumns = schema->num_fields();
+
+ std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
+ for (int i = 0; i < ncolumns; ++i) {
+ std::vector<std::shared_ptr<Array>> column_arrays;
+ for (const auto& table : *tables_to_concat) {
+ const std::vector<std::shared_ptr<Array>>& chunks = table->column(i)->chunks();
+ for (const auto& chunk : chunks) {
+ column_arrays.push_back(chunk);
+ }
+ }
+ columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
+ }
return Table::Make(std::move(schema), std::move(columns));
-}
-
-Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>& table,
- const std::shared_ptr<Schema>& schema,
- MemoryPool* pool) {
- const std::shared_ptr<Schema> current_schema = table->schema();
- if (current_schema->Equals(*schema, /*check_metadata=*/false)) {
- return table->ReplaceSchemaMetadata(schema->metadata());
- }
-
- // fields_seen[i] == true iff that field is also in `schema`.
- std::vector<bool> fields_seen(current_schema->num_fields(), false);
-
- std::vector<std::shared_ptr<ChunkedArray>> columns;
- columns.reserve(schema->num_fields());
- const int64_t num_rows = table->num_rows();
- auto AppendColumnOfNulls = [pool, &columns,
- num_rows](const std::shared_ptr<DataType>& type) {
- // TODO(bkietz): share the zero-filled buffers as much as possible across
- // the null-filled arrays created here.
- ARROW_ASSIGN_OR_RAISE(auto array_of_nulls, MakeArrayOfNull(type, num_rows, pool));
- columns.push_back(std::make_shared<ChunkedArray>(array_of_nulls));
- return Status::OK();
- };
-
- for (const auto& field : schema->fields()) {
- const std::vector<int> field_indices =
- current_schema->GetAllFieldIndices(field->name());
- if (field_indices.empty()) {
- RETURN_NOT_OK(AppendColumnOfNulls(field->type()));
- continue;
- }
-
- if (field_indices.size() > 1) {
- return Status::Invalid(
- "PromoteTableToSchema cannot handle schemas with duplicate fields: ",
- field->name());
- }
-
- const int field_index = field_indices[0];
- const auto& current_field = current_schema->field(field_index);
- if (!field->nullable() && current_field->nullable()) {
- return Status::Invalid("Unable to promote field ", current_field->name(),
- ": it was nullable but the target schema was not.");
- }
-
- fields_seen[field_index] = true;
- if (current_field->type()->Equals(field->type())) {
- columns.push_back(table->column(field_index));
- continue;
- }
-
- if (current_field->type()->id() == Type::NA) {
- RETURN_NOT_OK(AppendColumnOfNulls(field->type()));
- continue;
- }
-
- return Status::Invalid("Unable to promote field ", field->name(),
- ": incompatible types: ", field->type()->ToString(), " vs ",
- current_field->type()->ToString());
- }
-
- auto unseen_field_iter = std::find(fields_seen.begin(), fields_seen.end(), false);
- if (unseen_field_iter != fields_seen.end()) {
- const size_t unseen_field_index = unseen_field_iter - fields_seen.begin();
- return Status::Invalid(
- "Incompatible schemas: field ",
- current_schema->field(static_cast<int>(unseen_field_index))->name(),
- " did not exist in the new schema.");
- }
-
- return Table::Make(schema, std::move(columns));
-}
-
-bool Table::Equals(const Table& other, bool check_metadata) const {
- if (this == &other) {
- return true;
- }
- if (!schema_->Equals(*other.schema(), check_metadata)) {
- return false;
- }
- if (this->num_columns() != other.num_columns()) {
- return false;
- }
-
- for (int i = 0; i < this->num_columns(); i++) {
- if (!this->column(i)->Equals(other.column(i))) {
- return false;
- }
- }
- return true;
-}
-
-Result<std::shared_ptr<Table>> Table::CombineChunks(MemoryPool* pool) const {
- const int ncolumns = num_columns();
- std::vector<std::shared_ptr<ChunkedArray>> compacted_columns(ncolumns);
- for (int i = 0; i < ncolumns; ++i) {
- const auto& col = column(i);
- if (col->num_chunks() <= 1) {
- compacted_columns[i] = col;
- continue;
- }
-
- if (is_binary_like(col->type()->id())) {
- // ARROW-5744 Allow binary columns to be combined into multiple chunks to avoid
- // buffer overflow
- ArrayVector chunks;
- int chunk_i = 0;
- while (chunk_i < col->num_chunks()) {
- ArrayVector safe_chunks;
- int64_t data_length = 0;
- for (; chunk_i < col->num_chunks(); ++chunk_i) {
- const auto& chunk = col->chunk(chunk_i);
- data_length += checked_cast<const BinaryArray&>(*chunk).total_values_length();
- if (data_length >= kBinaryMemoryLimit) {
- break;
- }
- safe_chunks.push_back(chunk);
- }
- chunks.emplace_back();
- ARROW_ASSIGN_OR_RAISE(chunks.back(), Concatenate(safe_chunks, pool));
- }
- compacted_columns[i] = std::make_shared<ChunkedArray>(std::move(chunks));
- } else {
- ARROW_ASSIGN_OR_RAISE(auto compacted, Concatenate(col->chunks(), pool));
- compacted_columns[i] = std::make_shared<ChunkedArray>(compacted);
- }
- }
+}
+
+Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>& table,
+ const std::shared_ptr<Schema>& schema,
+ MemoryPool* pool) {
+ const std::shared_ptr<Schema> current_schema = table->schema();
+ if (current_schema->Equals(*schema, /*check_metadata=*/false)) {
+ return table->ReplaceSchemaMetadata(schema->metadata());
+ }
+
+ // fields_seen[i] == true iff that field is also in `schema`.
+ std::vector<bool> fields_seen(current_schema->num_fields(), false);
+
+ std::vector<std::shared_ptr<ChunkedArray>> columns;
+ columns.reserve(schema->num_fields());
+ const int64_t num_rows = table->num_rows();
+ auto AppendColumnOfNulls = [pool, &columns,
+ num_rows](const std::shared_ptr<DataType>& type) {
+ // TODO(bkietz): share the zero-filled buffers as much as possible across
+ // the null-filled arrays created here.
+ ARROW_ASSIGN_OR_RAISE(auto array_of_nulls, MakeArrayOfNull(type, num_rows, pool));
+ columns.push_back(std::make_shared<ChunkedArray>(array_of_nulls));
+ return Status::OK();
+ };
+
+ for (const auto& field : schema->fields()) {
+ const std::vector<int> field_indices =
+ current_schema->GetAllFieldIndices(field->name());
+ if (field_indices.empty()) {
+ RETURN_NOT_OK(AppendColumnOfNulls(field->type()));
+ continue;
+ }
+
+ if (field_indices.size() > 1) {
+ return Status::Invalid(
+ "PromoteTableToSchema cannot handle schemas with duplicate fields: ",
+ field->name());
+ }
+
+ const int field_index = field_indices[0];
+ const auto& current_field = current_schema->field(field_index);
+ if (!field->nullable() && current_field->nullable()) {
+ return Status::Invalid("Unable to promote field ", current_field->name(),
+ ": it was nullable but the target schema was not.");
+ }
+
+ fields_seen[field_index] = true;
+ if (current_field->type()->Equals(field->type())) {
+ columns.push_back(table->column(field_index));
+ continue;
+ }
+
+ if (current_field->type()->id() == Type::NA) {
+ RETURN_NOT_OK(AppendColumnOfNulls(field->type()));
+ continue;
+ }
+
+ return Status::Invalid("Unable to promote field ", field->name(),
+ ": incompatible types: ", field->type()->ToString(), " vs ",
+ current_field->type()->ToString());
+ }
+
+ auto unseen_field_iter = std::find(fields_seen.begin(), fields_seen.end(), false);
+ if (unseen_field_iter != fields_seen.end()) {
+ const size_t unseen_field_index = unseen_field_iter - fields_seen.begin();
+ return Status::Invalid(
+ "Incompatible schemas: field ",
+ current_schema->field(static_cast<int>(unseen_field_index))->name(),
+ " did not exist in the new schema.");
+ }
+
+ return Table::Make(schema, std::move(columns));
+}
+
+bool Table::Equals(const Table& other, bool check_metadata) const {
+ if (this == &other) {
+ return true;
+ }
+ if (!schema_->Equals(*other.schema(), check_metadata)) {
+ return false;
+ }
+ if (this->num_columns() != other.num_columns()) {
+ return false;
+ }
+
+ for (int i = 0; i < this->num_columns(); i++) {
+ if (!this->column(i)->Equals(other.column(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Result<std::shared_ptr<Table>> Table::CombineChunks(MemoryPool* pool) const {
+ const int ncolumns = num_columns();
+ std::vector<std::shared_ptr<ChunkedArray>> compacted_columns(ncolumns);
+ for (int i = 0; i < ncolumns; ++i) {
+ const auto& col = column(i);
+ if (col->num_chunks() <= 1) {
+ compacted_columns[i] = col;
+ continue;
+ }
+
+ if (is_binary_like(col->type()->id())) {
+ // ARROW-5744 Allow binary columns to be combined into multiple chunks to avoid
+ // buffer overflow
+ ArrayVector chunks;
+ int chunk_i = 0;
+ while (chunk_i < col->num_chunks()) {
+ ArrayVector safe_chunks;
+ int64_t data_length = 0;
+ for (; chunk_i < col->num_chunks(); ++chunk_i) {
+ const auto& chunk = col->chunk(chunk_i);
+ data_length += checked_cast<const BinaryArray&>(*chunk).total_values_length();
+ if (data_length >= kBinaryMemoryLimit) {
+ break;
+ }
+ safe_chunks.push_back(chunk);
+ }
+ chunks.emplace_back();
+ ARROW_ASSIGN_OR_RAISE(chunks.back(), Concatenate(safe_chunks, pool));
+ }
+ compacted_columns[i] = std::make_shared<ChunkedArray>(std::move(chunks));
+ } else {
+ ARROW_ASSIGN_OR_RAISE(auto compacted, Concatenate(col->chunks(), pool));
+ compacted_columns[i] = std::make_shared<ChunkedArray>(compacted);
+ }
+ }
return Table::Make(schema(), std::move(compacted_columns), num_rows_);
-}
-
-// ----------------------------------------------------------------------
-// Convert a table to a sequence of record batches
-
-TableBatchReader::TableBatchReader(const Table& table)
- : table_(table),
- column_data_(table.num_columns()),
- chunk_numbers_(table.num_columns(), 0),
- chunk_offsets_(table.num_columns(), 0),
- absolute_row_position_(0),
- max_chunksize_(std::numeric_limits<int64_t>::max()) {
- for (int i = 0; i < table.num_columns(); ++i) {
- column_data_[i] = table.column(i).get();
- }
-}
-
-std::shared_ptr<Schema> TableBatchReader::schema() const { return table_.schema(); }
-
-void TableBatchReader::set_chunksize(int64_t chunksize) { max_chunksize_ = chunksize; }
-
-Status TableBatchReader::ReadNext(std::shared_ptr<RecordBatch>* out) {
- if (absolute_row_position_ == table_.num_rows()) {
- *out = nullptr;
- return Status::OK();
- }
-
- // Determine the minimum contiguous slice across all columns
- int64_t chunksize = std::min(table_.num_rows(), max_chunksize_);
- std::vector<const Array*> chunks(table_.num_columns());
- for (int i = 0; i < table_.num_columns(); ++i) {
- auto chunk = column_data_[i]->chunk(chunk_numbers_[i]).get();
- int64_t chunk_remaining = chunk->length() - chunk_offsets_[i];
-
- if (chunk_remaining < chunksize) {
- chunksize = chunk_remaining;
- }
-
- chunks[i] = chunk;
- }
-
- // Slice chunks and advance chunk index as appropriate
- std::vector<std::shared_ptr<ArrayData>> batch_data(table_.num_columns());
-
- for (int i = 0; i < table_.num_columns(); ++i) {
- // Exhausted chunk
- const Array* chunk = chunks[i];
- const int64_t offset = chunk_offsets_[i];
- std::shared_ptr<ArrayData> slice_data;
- if ((chunk->length() - offset) == chunksize) {
- ++chunk_numbers_[i];
- chunk_offsets_[i] = 0;
- if (offset > 0) {
- // Need to slice
- slice_data = chunk->Slice(offset, chunksize)->data();
- } else {
- // No slice
- slice_data = chunk->data();
- }
- } else {
- chunk_offsets_[i] += chunksize;
- slice_data = chunk->Slice(offset, chunksize)->data();
- }
- batch_data[i] = std::move(slice_data);
- }
-
- absolute_row_position_ += chunksize;
- *out = RecordBatch::Make(table_.schema(), chunksize, std::move(batch_data));
-
- return Status::OK();
-}
-
-} // namespace arrow
+}
+
+// ----------------------------------------------------------------------
+// Convert a table to a sequence of record batches
+
+TableBatchReader::TableBatchReader(const Table& table)
+ : table_(table),
+ column_data_(table.num_columns()),
+ chunk_numbers_(table.num_columns(), 0),
+ chunk_offsets_(table.num_columns(), 0),
+ absolute_row_position_(0),
+ max_chunksize_(std::numeric_limits<int64_t>::max()) {
+ for (int i = 0; i < table.num_columns(); ++i) {
+ column_data_[i] = table.column(i).get();
+ }
+}
+
+std::shared_ptr<Schema> TableBatchReader::schema() const { return table_.schema(); }
+
+void TableBatchReader::set_chunksize(int64_t chunksize) { max_chunksize_ = chunksize; }
+
+Status TableBatchReader::ReadNext(std::shared_ptr<RecordBatch>* out) {
+ if (absolute_row_position_ == table_.num_rows()) {
+ *out = nullptr;
+ return Status::OK();
+ }
+
+ // Determine the minimum contiguous slice across all columns
+ int64_t chunksize = std::min(table_.num_rows(), max_chunksize_);
+ std::vector<const Array*> chunks(table_.num_columns());
+ for (int i = 0; i < table_.num_columns(); ++i) {
+ auto chunk = column_data_[i]->chunk(chunk_numbers_[i]).get();
+ int64_t chunk_remaining = chunk->length() - chunk_offsets_[i];
+
+ if (chunk_remaining < chunksize) {
+ chunksize = chunk_remaining;
+ }
+
+ chunks[i] = chunk;
+ }
+
+ // Slice chunks and advance chunk index as appropriate
+ std::vector<std::shared_ptr<ArrayData>> batch_data(table_.num_columns());
+
+ for (int i = 0; i < table_.num_columns(); ++i) {
+ // Exhausted chunk
+ const Array* chunk = chunks[i];
+ const int64_t offset = chunk_offsets_[i];
+ std::shared_ptr<ArrayData> slice_data;
+ if ((chunk->length() - offset) == chunksize) {
+ ++chunk_numbers_[i];
+ chunk_offsets_[i] = 0;
+ if (offset > 0) {
+ // Need to slice
+ slice_data = chunk->Slice(offset, chunksize)->data();
+ } else {
+ // No slice
+ slice_data = chunk->data();
+ }
+ } else {
+ chunk_offsets_[i] += chunksize;
+ slice_data = chunk->Slice(offset, chunksize)->data();
+ }
+ batch_data[i] = std::move(slice_data);
+ }
+
+ absolute_row_position_ += chunksize;
+ *out = RecordBatch::Make(table_.schema(), chunksize, std::move(batch_data));
+
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table.h b/contrib/libs/apache/arrow/cpp/src/arrow/table.h
index f1e5f23eed8..20bd9042e51 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table.h
@@ -1,295 +1,295 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/chunked_array.h" // IWYU pragma: keep
-#include "arrow/record_batch.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Array;
-class ChunkedArray;
-class KeyValueMetadata;
-class MemoryPool;
-
-/// \class Table
-/// \brief Logical table as sequence of chunked arrays
-class ARROW_EXPORT Table {
- public:
- virtual ~Table() = default;
-
- /// \brief Construct a Table from schema and columns
- ///
- /// If columns is zero-length, the table's number of rows is zero
- ///
- /// \param[in] schema The table schema (column types)
- /// \param[in] columns The table's columns as chunked arrays
- /// \param[in] num_rows number of rows in table, -1 (default) to infer from columns
- static std::shared_ptr<Table> Make(std::shared_ptr<Schema> schema,
- std::vector<std::shared_ptr<ChunkedArray>> columns,
- int64_t num_rows = -1);
-
- /// \brief Construct a Table from schema and arrays
- ///
- /// \param[in] schema The table schema (column types)
- /// \param[in] arrays The table's columns as arrays
- /// \param[in] num_rows number of rows in table, -1 (default) to infer from columns
- static std::shared_ptr<Table> Make(std::shared_ptr<Schema> schema,
- const std::vector<std::shared_ptr<Array>>& arrays,
- int64_t num_rows = -1);
-
- /// \brief Construct a Table from a RecordBatchReader.
- ///
- /// \param[in] reader the arrow::Schema for each batch
- static Result<std::shared_ptr<Table>> FromRecordBatchReader(RecordBatchReader* reader);
-
- /// \brief Construct a Table from RecordBatches, using schema supplied by the first
- /// RecordBatch.
- ///
- /// \param[in] batches a std::vector of record batches
- static Result<std::shared_ptr<Table>> FromRecordBatches(
- const std::vector<std::shared_ptr<RecordBatch>>& batches);
-
- /// \brief Construct a Table from RecordBatches, using supplied schema. There may be
- /// zero record batches
- ///
- /// \param[in] schema the arrow::Schema for each batch
- /// \param[in] batches a std::vector of record batches
- static Result<std::shared_ptr<Table>> FromRecordBatches(
- std::shared_ptr<Schema> schema,
- const std::vector<std::shared_ptr<RecordBatch>>& batches);
-
- /// \brief Construct a Table from a chunked StructArray. One column will be produced
- /// for each field of the StructArray.
- ///
- /// \param[in] array a chunked StructArray
- static Result<std::shared_ptr<Table>> FromChunkedStructArray(
- const std::shared_ptr<ChunkedArray>& array);
-
- /// \brief Return the table schema
- std::shared_ptr<Schema> schema() const { return schema_; }
-
- /// \brief Return a column by index
- virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
-
- /// \brief Return vector of all columns for table
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/chunked_array.h" // IWYU pragma: keep
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class KeyValueMetadata;
+class MemoryPool;
+
+/// \class Table
+/// \brief Logical table as sequence of chunked arrays
+class ARROW_EXPORT Table {
+ public:
+ virtual ~Table() = default;
+
+ /// \brief Construct a Table from schema and columns
+ ///
+ /// If columns is zero-length, the table's number of rows is zero
+ ///
+ /// \param[in] schema The table schema (column types)
+ /// \param[in] columns The table's columns as chunked arrays
+ /// \param[in] num_rows number of rows in table, -1 (default) to infer from columns
+ static std::shared_ptr<Table> Make(std::shared_ptr<Schema> schema,
+ std::vector<std::shared_ptr<ChunkedArray>> columns,
+ int64_t num_rows = -1);
+
+ /// \brief Construct a Table from schema and arrays
+ ///
+ /// \param[in] schema The table schema (column types)
+ /// \param[in] arrays The table's columns as arrays
+ /// \param[in] num_rows number of rows in table, -1 (default) to infer from columns
+ static std::shared_ptr<Table> Make(std::shared_ptr<Schema> schema,
+ const std::vector<std::shared_ptr<Array>>& arrays,
+ int64_t num_rows = -1);
+
+ /// \brief Construct a Table from a RecordBatchReader.
+ ///
+ /// \param[in] reader the arrow::Schema for each batch
+ static Result<std::shared_ptr<Table>> FromRecordBatchReader(RecordBatchReader* reader);
+
+ /// \brief Construct a Table from RecordBatches, using schema supplied by the first
+ /// RecordBatch.
+ ///
+ /// \param[in] batches a std::vector of record batches
+ static Result<std::shared_ptr<Table>> FromRecordBatches(
+ const std::vector<std::shared_ptr<RecordBatch>>& batches);
+
+ /// \brief Construct a Table from RecordBatches, using supplied schema. There may be
+ /// zero record batches
+ ///
+ /// \param[in] schema the arrow::Schema for each batch
+ /// \param[in] batches a std::vector of record batches
+ static Result<std::shared_ptr<Table>> FromRecordBatches(
+ std::shared_ptr<Schema> schema,
+ const std::vector<std::shared_ptr<RecordBatch>>& batches);
+
+ /// \brief Construct a Table from a chunked StructArray. One column will be produced
+ /// for each field of the StructArray.
+ ///
+ /// \param[in] array a chunked StructArray
+ static Result<std::shared_ptr<Table>> FromChunkedStructArray(
+ const std::shared_ptr<ChunkedArray>& array);
+
+ /// \brief Return the table schema
+ std::shared_ptr<Schema> schema() const { return schema_; }
+
+ /// \brief Return a column by index
+ virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
+
+ /// \brief Return vector of all columns for table
virtual const std::vector<std::shared_ptr<ChunkedArray>>& columns() const = 0;
-
- /// Return a column's field by index
- std::shared_ptr<Field> field(int i) const { return schema_->field(i); }
-
- /// \brief Return vector of all fields for table
- std::vector<std::shared_ptr<Field>> fields() const;
-
- /// \brief Construct a zero-copy slice of the table with the
- /// indicated offset and length
- ///
- /// \param[in] offset the index of the first row in the constructed
- /// slice
- /// \param[in] length the number of rows of the slice. If there are not enough
- /// rows in the table, the length will be adjusted accordingly
- ///
- /// \return a new object wrapped in std::shared_ptr<Table>
- virtual std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const = 0;
-
- /// \brief Slice from first row at offset until end of the table
- std::shared_ptr<Table> Slice(int64_t offset) const { return Slice(offset, num_rows_); }
-
- /// \brief Return a column by name
- /// \param[in] name field name
- /// \return an Array or null if no field was found
- std::shared_ptr<ChunkedArray> GetColumnByName(const std::string& name) const {
- auto i = schema_->GetFieldIndex(name);
- return i == -1 ? NULLPTR : column(i);
- }
-
- /// \brief Remove column from the table, producing a new Table
- virtual Result<std::shared_ptr<Table>> RemoveColumn(int i) const = 0;
-
- /// \brief Add column to the table, producing a new Table
- virtual Result<std::shared_ptr<Table>> AddColumn(
- int i, std::shared_ptr<Field> field_arg,
- std::shared_ptr<ChunkedArray> column) const = 0;
-
- /// \brief Replace a column in the table, producing a new Table
- virtual Result<std::shared_ptr<Table>> SetColumn(
- int i, std::shared_ptr<Field> field_arg,
- std::shared_ptr<ChunkedArray> column) const = 0;
-
- /// \brief Return names of all columns
- std::vector<std::string> ColumnNames() const;
-
- /// \brief Rename columns with provided names
- Result<std::shared_ptr<Table>> RenameColumns(
- const std::vector<std::string>& names) const;
-
- /// \brief Return new table with specified columns
- Result<std::shared_ptr<Table>> SelectColumns(const std::vector<int>& indices) const;
-
+
+ /// Return a column's field by index
+ std::shared_ptr<Field> field(int i) const { return schema_->field(i); }
+
+ /// \brief Return vector of all fields for table
+ std::vector<std::shared_ptr<Field>> fields() const;
+
+ /// \brief Construct a zero-copy slice of the table with the
+ /// indicated offset and length
+ ///
+ /// \param[in] offset the index of the first row in the constructed
+ /// slice
+ /// \param[in] length the number of rows of the slice. If there are not enough
+ /// rows in the table, the length will be adjusted accordingly
+ ///
+ /// \return a new object wrapped in std::shared_ptr<Table>
+ virtual std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const = 0;
+
+ /// \brief Slice from first row at offset until end of the table
+ std::shared_ptr<Table> Slice(int64_t offset) const { return Slice(offset, num_rows_); }
+
+ /// \brief Return a column by name
+ /// \param[in] name field name
+ /// \return an Array or null if no field was found
+ std::shared_ptr<ChunkedArray> GetColumnByName(const std::string& name) const {
+ auto i = schema_->GetFieldIndex(name);
+ return i == -1 ? NULLPTR : column(i);
+ }
+
+ /// \brief Remove column from the table, producing a new Table
+ virtual Result<std::shared_ptr<Table>> RemoveColumn(int i) const = 0;
+
+ /// \brief Add column to the table, producing a new Table
+ virtual Result<std::shared_ptr<Table>> AddColumn(
+ int i, std::shared_ptr<Field> field_arg,
+ std::shared_ptr<ChunkedArray> column) const = 0;
+
+ /// \brief Replace a column in the table, producing a new Table
+ virtual Result<std::shared_ptr<Table>> SetColumn(
+ int i, std::shared_ptr<Field> field_arg,
+ std::shared_ptr<ChunkedArray> column) const = 0;
+
+ /// \brief Return names of all columns
+ std::vector<std::string> ColumnNames() const;
+
+ /// \brief Rename columns with provided names
+ Result<std::shared_ptr<Table>> RenameColumns(
+ const std::vector<std::string>& names) const;
+
+ /// \brief Return new table with specified columns
+ Result<std::shared_ptr<Table>> SelectColumns(const std::vector<int>& indices) const;
+
/// \brief Replace schema key-value metadata with new metadata
- /// \since 0.5.0
- ///
- /// \param[in] metadata new KeyValueMetadata
- /// \return new Table
- virtual std::shared_ptr<Table> ReplaceSchemaMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const = 0;
-
- /// \brief Flatten the table, producing a new Table. Any column with a
- /// struct type will be flattened into multiple columns
- ///
- /// \param[in] pool The pool for buffer allocations, if any
- virtual Result<std::shared_ptr<Table>> Flatten(
- MemoryPool* pool = default_memory_pool()) const = 0;
-
- /// \return PrettyPrint representation suitable for debugging
- std::string ToString() const;
-
- /// \brief Perform cheap validation checks to determine obvious inconsistencies
- /// within the table's schema and internal data.
- ///
- /// This is O(k*m) where k is the total number of field descendents,
- /// and m is the number of chunks.
- ///
- /// \return Status
- virtual Status Validate() const = 0;
-
- /// \brief Perform extensive validation checks to determine inconsistencies
- /// within the table's schema and internal data.
- ///
- /// This is O(k*n) where k is the total number of field descendents,
- /// and n is the number of rows.
- ///
- /// \return Status
- virtual Status ValidateFull() const = 0;
-
- /// \brief Return the number of columns in the table
- int num_columns() const { return schema_->num_fields(); }
-
- /// \brief Return the number of rows (equal to each column's logical length)
- int64_t num_rows() const { return num_rows_; }
-
- /// \brief Determine if tables are equal
- ///
- /// Two tables can be equal only if they have equal schemas.
- /// However, they may be equal even if they have different chunkings.
- bool Equals(const Table& other, bool check_metadata = false) const;
-
- /// \brief Make a new table by combining the chunks this table has.
- ///
- /// All the underlying chunks in the ChunkedArray of each column are
- /// concatenated into zero or one chunk.
- ///
- /// \param[in] pool The pool for buffer allocations
- Result<std::shared_ptr<Table>> CombineChunks(
- MemoryPool* pool = default_memory_pool()) const;
-
- protected:
- Table();
-
- std::shared_ptr<Schema> schema_;
- int64_t num_rows_;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Table);
-};
-
-/// \brief Compute a stream of record batches from a (possibly chunked) Table
-///
-/// The conversion is zero-copy: each record batch is a view over a slice
-/// of the table's columns.
-class ARROW_EXPORT TableBatchReader : public RecordBatchReader {
- public:
- /// \brief Construct a TableBatchReader for the given table
- explicit TableBatchReader(const Table& table);
-
- std::shared_ptr<Schema> schema() const override;
-
- Status ReadNext(std::shared_ptr<RecordBatch>* out) override;
-
- /// \brief Set the desired maximum chunk size of record batches
- ///
- /// The actual chunk size of each record batch may be smaller, depending
- /// on actual chunking characteristics of each table column.
- void set_chunksize(int64_t chunksize);
-
- private:
- const Table& table_;
- std::vector<ChunkedArray*> column_data_;
- std::vector<int> chunk_numbers_;
- std::vector<int64_t> chunk_offsets_;
- int64_t absolute_row_position_;
- int64_t max_chunksize_;
-};
-
-/// \defgroup concat-tables ConcatenateTables function.
-///
-/// ConcatenateTables function.
-/// @{
-
-/// \brief Controls the behavior of ConcatenateTables().
-struct ARROW_EXPORT ConcatenateTablesOptions {
- /// If true, the schemas of the tables will be first unified with fields of
- /// the same name being merged, according to `field_merge_options`, then each
- /// table will be promoted to the unified schema before being concatenated.
- /// Otherwise, all tables should have the same schema. Each column in the output table
- /// is the result of concatenating the corresponding columns in all input tables.
- bool unify_schemas = false;
-
- Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults();
-
- static ConcatenateTablesOptions Defaults() { return {}; }
-};
-
-/// \brief Construct table from multiple input tables.
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> ConcatenateTables(
- const std::vector<std::shared_ptr<Table>>& tables,
- ConcatenateTablesOptions options = ConcatenateTablesOptions::Defaults(),
- MemoryPool* memory_pool = default_memory_pool());
-
-/// \brief Promotes a table to conform to the given schema.
-///
-/// If a field in the schema does not have a corresponding column in the
-/// table, a column of nulls will be added to the resulting table.
-/// If the corresponding column is of type Null, it will be promoted to
-/// the type specified by schema, with null values filled.
-/// Returns an error:
-/// - if the corresponding column's type is not compatible with the
-/// schema.
-/// - if there is a column in the table that does not exist in the schema.
-///
-/// \param[in] table the input Table
-/// \param[in] schema the target schema to promote to
-/// \param[in] pool The memory pool to be used if null-filled arrays need to
-/// be created.
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> PromoteTableToSchema(
- const std::shared_ptr<Table>& table, const std::shared_ptr<Schema>& schema,
- MemoryPool* pool = default_memory_pool());
-
-} // namespace arrow
+ /// \since 0.5.0
+ ///
+ /// \param[in] metadata new KeyValueMetadata
+ /// \return new Table
+ virtual std::shared_ptr<Table> ReplaceSchemaMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const = 0;
+
+ /// \brief Flatten the table, producing a new Table. Any column with a
+ /// struct type will be flattened into multiple columns
+ ///
+ /// \param[in] pool The pool for buffer allocations, if any
+ virtual Result<std::shared_ptr<Table>> Flatten(
+ MemoryPool* pool = default_memory_pool()) const = 0;
+
+ /// \return PrettyPrint representation suitable for debugging
+ std::string ToString() const;
+
+ /// \brief Perform cheap validation checks to determine obvious inconsistencies
+ /// within the table's schema and internal data.
+ ///
+ /// This is O(k*m) where k is the total number of field descendents,
+ /// and m is the number of chunks.
+ ///
+ /// \return Status
+ virtual Status Validate() const = 0;
+
+ /// \brief Perform extensive validation checks to determine inconsistencies
+ /// within the table's schema and internal data.
+ ///
+ /// This is O(k*n) where k is the total number of field descendents,
+ /// and n is the number of rows.
+ ///
+ /// \return Status
+ virtual Status ValidateFull() const = 0;
+
+ /// \brief Return the number of columns in the table
+ int num_columns() const { return schema_->num_fields(); }
+
+ /// \brief Return the number of rows (equal to each column's logical length)
+ int64_t num_rows() const { return num_rows_; }
+
+ /// \brief Determine if tables are equal
+ ///
+ /// Two tables can be equal only if they have equal schemas.
+ /// However, they may be equal even if they have different chunkings.
+ bool Equals(const Table& other, bool check_metadata = false) const;
+
+ /// \brief Make a new table by combining the chunks this table has.
+ ///
+ /// All the underlying chunks in the ChunkedArray of each column are
+ /// concatenated into zero or one chunk.
+ ///
+ /// \param[in] pool The pool for buffer allocations
+ Result<std::shared_ptr<Table>> CombineChunks(
+ MemoryPool* pool = default_memory_pool()) const;
+
+ protected:
+ Table();
+
+ std::shared_ptr<Schema> schema_;
+ int64_t num_rows_;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Table);
+};
+
+/// \brief Compute a stream of record batches from a (possibly chunked) Table
+///
+/// The conversion is zero-copy: each record batch is a view over a slice
+/// of the table's columns.
+class ARROW_EXPORT TableBatchReader : public RecordBatchReader {
+ public:
+ /// \brief Construct a TableBatchReader for the given table
+ explicit TableBatchReader(const Table& table);
+
+ std::shared_ptr<Schema> schema() const override;
+
+ Status ReadNext(std::shared_ptr<RecordBatch>* out) override;
+
+ /// \brief Set the desired maximum chunk size of record batches
+ ///
+ /// The actual chunk size of each record batch may be smaller, depending
+ /// on actual chunking characteristics of each table column.
+ void set_chunksize(int64_t chunksize);
+
+ private:
+ const Table& table_;
+ std::vector<ChunkedArray*> column_data_;
+ std::vector<int> chunk_numbers_;
+ std::vector<int64_t> chunk_offsets_;
+ int64_t absolute_row_position_;
+ int64_t max_chunksize_;
+};
+
+/// \defgroup concat-tables ConcatenateTables function.
+///
+/// ConcatenateTables function.
+/// @{
+
+/// \brief Controls the behavior of ConcatenateTables().
+struct ARROW_EXPORT ConcatenateTablesOptions {
+ /// If true, the schemas of the tables will be first unified with fields of
+ /// the same name being merged, according to `field_merge_options`, then each
+ /// table will be promoted to the unified schema before being concatenated.
+ /// Otherwise, all tables should have the same schema. Each column in the output table
+ /// is the result of concatenating the corresponding columns in all input tables.
+ bool unify_schemas = false;
+
+ Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults();
+
+ static ConcatenateTablesOptions Defaults() { return {}; }
+};
+
+/// \brief Construct table from multiple input tables.
+ARROW_EXPORT
+Result<std::shared_ptr<Table>> ConcatenateTables(
+ const std::vector<std::shared_ptr<Table>>& tables,
+ ConcatenateTablesOptions options = ConcatenateTablesOptions::Defaults(),
+ MemoryPool* memory_pool = default_memory_pool());
+
+/// \brief Promotes a table to conform to the given schema.
+///
+/// If a field in the schema does not have a corresponding column in the
+/// table, a column of nulls will be added to the resulting table.
+/// If the corresponding column is of type Null, it will be promoted to
+/// the type specified by schema, with null values filled.
+/// Returns an error:
+/// - if the corresponding column's type is not compatible with the
+/// schema.
+/// - if there is a column in the table that does not exist in the schema.
+///
+/// \param[in] table the input Table
+/// \param[in] schema the target schema to promote to
+/// \param[in] pool The memory pool to be used if null-filled arrays need to
+/// be created.
+ARROW_EXPORT
+Result<std::shared_ptr<Table>> PromoteTableToSchema(
+ const std::shared_ptr<Table>& table, const std::shared_ptr<Schema>& schema,
+ MemoryPool* pool = default_memory_pool());
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
index c026c355758..5ba83f06b47 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
@@ -1,113 +1,113 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table_builder.h"
-
-#include <memory>
-#include <utility>
-
-#include "arrow/array/array_base.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table_builder.h"
+
+#include <memory>
+#include <utility>
+
+#include "arrow/array/array_base.h"
#include "arrow/array/builder_base.h"
-#include "arrow/record_batch.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-// ----------------------------------------------------------------------
-// RecordBatchBuilder
-
-RecordBatchBuilder::RecordBatchBuilder(const std::shared_ptr<Schema>& schema,
- MemoryPool* pool, int64_t initial_capacity)
- : schema_(schema), initial_capacity_(initial_capacity), pool_(pool) {}
-
-Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
- std::unique_ptr<RecordBatchBuilder>* builder) {
- return Make(schema, pool, kMinBuilderCapacity, builder);
-}
-
-Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
- int64_t initial_capacity,
- std::unique_ptr<RecordBatchBuilder>* builder) {
- builder->reset(new RecordBatchBuilder(schema, pool, initial_capacity));
- RETURN_NOT_OK((*builder)->CreateBuilders());
- return (*builder)->InitBuilders();
-}
-
-Status RecordBatchBuilder::Flush(bool reset_builders,
- std::shared_ptr<RecordBatch>* batch) {
- std::vector<std::shared_ptr<Array>> fields;
- fields.resize(this->num_fields());
-
- int64_t length = 0;
- for (int i = 0; i < this->num_fields(); ++i) {
- RETURN_NOT_OK(raw_field_builders_[i]->Finish(&fields[i]));
- if (i > 0 && fields[i]->length() != length) {
- return Status::Invalid("All fields must be same length when calling Flush");
- }
- length = fields[i]->length();
- }
-
- // For certain types like dictionaries, types may not be fully
- // determined before we have flushed. Make sure that the RecordBatch
- // gets the correct types in schema.
- // See: #ARROW-9969
- std::vector<std::shared_ptr<Field>> schema_fields(schema_->fields());
- for (int i = 0; i < this->num_fields(); ++i) {
- if (!schema_fields[i]->type()->Equals(fields[i]->type())) {
- schema_fields[i] = schema_fields[i]->WithType(fields[i]->type());
- }
- }
- std::shared_ptr<Schema> schema =
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// RecordBatchBuilder
+
+RecordBatchBuilder::RecordBatchBuilder(const std::shared_ptr<Schema>& schema,
+ MemoryPool* pool, int64_t initial_capacity)
+ : schema_(schema), initial_capacity_(initial_capacity), pool_(pool) {}
+
+Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
+ std::unique_ptr<RecordBatchBuilder>* builder) {
+ return Make(schema, pool, kMinBuilderCapacity, builder);
+}
+
+Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
+ int64_t initial_capacity,
+ std::unique_ptr<RecordBatchBuilder>* builder) {
+ builder->reset(new RecordBatchBuilder(schema, pool, initial_capacity));
+ RETURN_NOT_OK((*builder)->CreateBuilders());
+ return (*builder)->InitBuilders();
+}
+
+Status RecordBatchBuilder::Flush(bool reset_builders,
+ std::shared_ptr<RecordBatch>* batch) {
+ std::vector<std::shared_ptr<Array>> fields;
+ fields.resize(this->num_fields());
+
+ int64_t length = 0;
+ for (int i = 0; i < this->num_fields(); ++i) {
+ RETURN_NOT_OK(raw_field_builders_[i]->Finish(&fields[i]));
+ if (i > 0 && fields[i]->length() != length) {
+ return Status::Invalid("All fields must be same length when calling Flush");
+ }
+ length = fields[i]->length();
+ }
+
+ // For certain types like dictionaries, types may not be fully
+ // determined before we have flushed. Make sure that the RecordBatch
+ // gets the correct types in schema.
+ // See: #ARROW-9969
+ std::vector<std::shared_ptr<Field>> schema_fields(schema_->fields());
+ for (int i = 0; i < this->num_fields(); ++i) {
+ if (!schema_fields[i]->type()->Equals(fields[i]->type())) {
+ schema_fields[i] = schema_fields[i]->WithType(fields[i]->type());
+ }
+ }
+ std::shared_ptr<Schema> schema =
std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
-
+
*batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
- if (reset_builders) {
- return InitBuilders();
- } else {
- return Status::OK();
- }
-}
-
-Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
- return Flush(true, batch);
-}
-
-void RecordBatchBuilder::SetInitialCapacity(int64_t capacity) {
- ARROW_CHECK_GT(capacity, 0) << "Initial capacity must be positive";
- initial_capacity_ = capacity;
-}
-
-Status RecordBatchBuilder::CreateBuilders() {
- field_builders_.resize(this->num_fields());
- raw_field_builders_.resize(this->num_fields());
- for (int i = 0; i < this->num_fields(); ++i) {
- RETURN_NOT_OK(MakeBuilder(pool_, schema_->field(i)->type(), &field_builders_[i]));
- raw_field_builders_[i] = field_builders_[i].get();
- }
- return Status::OK();
-}
-
-Status RecordBatchBuilder::InitBuilders() {
- for (int i = 0; i < this->num_fields(); ++i) {
- RETURN_NOT_OK(raw_field_builders_[i]->Reserve(initial_capacity_));
- }
- return Status::OK();
-}
-
-} // namespace arrow
+ if (reset_builders) {
+ return InitBuilders();
+ } else {
+ return Status::OK();
+ }
+}
+
+Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
+ return Flush(true, batch);
+}
+
+void RecordBatchBuilder::SetInitialCapacity(int64_t capacity) {
+ ARROW_CHECK_GT(capacity, 0) << "Initial capacity must be positive";
+ initial_capacity_ = capacity;
+}
+
+Status RecordBatchBuilder::CreateBuilders() {
+ field_builders_.resize(this->num_fields());
+ raw_field_builders_.resize(this->num_fields());
+ for (int i = 0; i < this->num_fields(); ++i) {
+ RETURN_NOT_OK(MakeBuilder(pool_, schema_->field(i)->type(), &field_builders_[i]));
+ raw_field_builders_[i] = field_builders_[i].get();
+ }
+ return Status::OK();
+}
+
+Status RecordBatchBuilder::InitBuilders() {
+ for (int i = 0; i < this->num_fields(); ++i) {
+ RETURN_NOT_OK(raw_field_builders_[i]->Reserve(initial_capacity_));
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.h
index db130d38950..7be37716c79 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.h
@@ -1,110 +1,110 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/array/builder_base.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class MemoryPool;
-class RecordBatch;
-
-/// \class RecordBatchBuilder
-/// \brief Helper class for creating record batches iteratively given a known
-/// schema
-class ARROW_EXPORT RecordBatchBuilder {
- public:
- /// \brief Create an initialize a RecordBatchBuilder
- /// \param[in] schema The schema for the record batch
- /// \param[in] pool A MemoryPool to use for allocations
- /// \param[in] builder the created builder instance
- static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
- std::unique_ptr<RecordBatchBuilder>* builder);
-
- /// \brief Create an initialize a RecordBatchBuilder
- /// \param[in] schema The schema for the record batch
- /// \param[in] pool A MemoryPool to use for allocations
- /// \param[in] initial_capacity The initial capacity for the builders
- /// \param[in] builder the created builder instance
- static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
- int64_t initial_capacity,
- std::unique_ptr<RecordBatchBuilder>* builder);
-
- /// \brief Get base pointer to field builder
- /// \param i the field index
- /// \return pointer to ArrayBuilder
- ArrayBuilder* GetField(int i) { return raw_field_builders_[i]; }
-
- /// \brief Return field builder casted to indicated specific builder type
- /// \param i the field index
- /// \return pointer to template type
- template <typename T>
- T* GetFieldAs(int i) {
- return internal::checked_cast<T*>(raw_field_builders_[i]);
- }
-
- /// \brief Finish current batch and optionally reset
- /// \param[in] reset_builders the resulting RecordBatch
- /// \param[out] batch the resulting RecordBatch
- /// \return Status
- Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
-
- /// \brief Finish current batch and reset
- /// \param[out] batch the resulting RecordBatch
- /// \return Status
- Status Flush(std::shared_ptr<RecordBatch>* batch);
-
- /// \brief Set the initial capacity for new builders
- void SetInitialCapacity(int64_t capacity);
-
- /// \brief The initial capacity for builders
- int64_t initial_capacity() const { return initial_capacity_; }
-
- /// \brief The number of fields in the schema
- int num_fields() const { return schema_->num_fields(); }
-
- /// \brief The number of fields in the schema
- std::shared_ptr<Schema> schema() const { return schema_; }
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatchBuilder);
-
- RecordBatchBuilder(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
- int64_t initial_capacity);
-
- Status CreateBuilders();
- Status InitBuilders();
-
- std::shared_ptr<Schema> schema_;
- int64_t initial_capacity_;
- MemoryPool* pool_;
-
- std::vector<std::unique_ptr<ArrayBuilder>> field_builders_;
- std::vector<ArrayBuilder*> raw_field_builders_;
-};
-
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+class RecordBatch;
+
+/// \class RecordBatchBuilder
+/// \brief Helper class for creating record batches iteratively given a known
+/// schema
+class ARROW_EXPORT RecordBatchBuilder {
+ public:
+ /// \brief Create an initialize a RecordBatchBuilder
+ /// \param[in] schema The schema for the record batch
+ /// \param[in] pool A MemoryPool to use for allocations
+ /// \param[in] builder the created builder instance
+ static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
+ std::unique_ptr<RecordBatchBuilder>* builder);
+
+ /// \brief Create an initialize a RecordBatchBuilder
+ /// \param[in] schema The schema for the record batch
+ /// \param[in] pool A MemoryPool to use for allocations
+ /// \param[in] initial_capacity The initial capacity for the builders
+ /// \param[in] builder the created builder instance
+ static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
+ int64_t initial_capacity,
+ std::unique_ptr<RecordBatchBuilder>* builder);
+
+ /// \brief Get base pointer to field builder
+ /// \param i the field index
+ /// \return pointer to ArrayBuilder
+ ArrayBuilder* GetField(int i) { return raw_field_builders_[i]; }
+
+ /// \brief Return field builder casted to indicated specific builder type
+ /// \param i the field index
+ /// \return pointer to template type
+ template <typename T>
+ T* GetFieldAs(int i) {
+ return internal::checked_cast<T*>(raw_field_builders_[i]);
+ }
+
+ /// \brief Finish current batch and optionally reset
+ /// \param[in] reset_builders the resulting RecordBatch
+ /// \param[out] batch the resulting RecordBatch
+ /// \return Status
+ Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
+
+ /// \brief Finish current batch and reset
+ /// \param[out] batch the resulting RecordBatch
+ /// \return Status
+ Status Flush(std::shared_ptr<RecordBatch>* batch);
+
+ /// \brief Set the initial capacity for new builders
+ void SetInitialCapacity(int64_t capacity);
+
+ /// \brief The initial capacity for builders
+ int64_t initial_capacity() const { return initial_capacity_; }
+
+ /// \brief The number of fields in the schema
+ int num_fields() const { return schema_->num_fields(); }
+
+ /// \brief The number of fields in the schema
+ std::shared_ptr<Schema> schema() const { return schema_; }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatchBuilder);
+
+ RecordBatchBuilder(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
+ int64_t initial_capacity);
+
+ Status CreateBuilders();
+ Status InitBuilders();
+
+ std::shared_ptr<Schema> schema_;
+ int64_t initial_capacity_;
+ MemoryPool* pool_;
+
+ std::vector<std::unique_ptr<ArrayBuilder>> field_builders_;
+ std::vector<ArrayBuilder*> raw_field_builders_;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
index d591bacff02..235d9621210 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
@@ -1,50 +1,50 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/tensor.h"
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <numeric>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace internal {
-
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace internal {
+
Status ComputeRowMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides) {
- const int byte_width = GetByteWidth(type);
+ const int byte_width = GetByteWidth(type);
const size_t ndim = shape.size();
int64_t remaining = 0;
@@ -56,28 +56,28 @@ Status ComputeRowMajorStrides(const FixedWidthType& type,
"Row-major strides computed from shape would not fit in 64-bit integer");
}
}
- }
-
- if (remaining == 0) {
- strides->assign(shape.size(), byte_width);
+ }
+
+ if (remaining == 0) {
+ strides->assign(shape.size(), byte_width);
return Status::OK();
- }
-
+ }
+
strides->push_back(remaining);
for (size_t i = 1; i < ndim; ++i) {
remaining /= shape[i];
- strides->push_back(remaining);
- }
+ strides->push_back(remaining);
+ }
return Status::OK();
-}
-
+}
+
Status ComputeColumnMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides) {
const int byte_width = internal::GetByteWidth(type);
const size_t ndim = shape.size();
-
+
int64_t total = 0;
if (!shape.empty() && shape.back() > 0) {
total = byte_width;
@@ -87,8 +87,8 @@ Status ComputeColumnMajorStrides(const FixedWidthType& type,
"Column-major strides computed from shape would not fit in 64-bit "
"integer");
}
- }
- }
+ }
+ }
if (total == 0) {
strides->assign(shape.size(), byte_width);
@@ -97,71 +97,71 @@ Status ComputeColumnMajorStrides(const FixedWidthType& type,
total = byte_width;
for (size_t i = 0; i < ndim - 1; ++i) {
- strides->push_back(total);
+ strides->push_back(total);
total *= shape[i];
- }
+ }
strides->push_back(total);
return Status::OK();
-}
-
+}
+
} // namespace internal
-namespace {
-
-inline bool IsTensorStridesRowMajor(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides) {
- std::vector<int64_t> c_strides;
- const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
+namespace {
+
+inline bool IsTensorStridesRowMajor(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides) {
+ std::vector<int64_t> c_strides;
+ const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
if (internal::ComputeRowMajorStrides(fw_type, shape, &c_strides).ok()) {
return strides == c_strides;
} else {
return false;
}
-}
-
-inline bool IsTensorStridesColumnMajor(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides) {
- std::vector<int64_t> f_strides;
- const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
+}
+
+inline bool IsTensorStridesColumnMajor(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides) {
+ std::vector<int64_t> f_strides;
+ const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
if (internal::ComputeColumnMajorStrides(fw_type, shape, &f_strides).ok()) {
return strides == f_strides;
} else {
return false;
}
-}
-
-inline Status CheckTensorValidity(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape) {
- if (!type) {
- return Status::Invalid("Null type is supplied");
- }
- if (!is_tensor_supported(type->id())) {
- return Status::Invalid(type->ToString(), " is not valid data type for a tensor");
- }
- if (!data) {
- return Status::Invalid("Null data is supplied");
- }
- if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x >= 0; })) {
- return Status::Invalid("Shape elements must be positive");
- }
- return Status::OK();
-}
-
-Status CheckTensorStridesValidity(const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides,
- const std::shared_ptr<DataType>& type) {
- if (strides.size() != shape.size()) {
- return Status::Invalid("strides must have the same length as shape");
- }
- if (data->size() == 0 && std::find(shape.begin(), shape.end(), 0) != shape.end()) {
- return Status::OK();
- }
-
+}
+
+inline Status CheckTensorValidity(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape) {
+ if (!type) {
+ return Status::Invalid("Null type is supplied");
+ }
+ if (!is_tensor_supported(type->id())) {
+ return Status::Invalid(type->ToString(), " is not valid data type for a tensor");
+ }
+ if (!data) {
+ return Status::Invalid("Null data is supplied");
+ }
+ if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x >= 0; })) {
+ return Status::Invalid("Shape elements must be positive");
+ }
+ return Status::OK();
+}
+
+Status CheckTensorStridesValidity(const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides,
+ const std::shared_ptr<DataType>& type) {
+ if (strides.size() != shape.size()) {
+ return Status::Invalid("strides must have the same length as shape");
+ }
+ if (data->size() == 0 && std::find(shape.begin(), shape.end(), 0) != shape.end()) {
+ return Status::OK();
+ }
+
// Check the largest offset can be computed without overflow
const size_t ndim = shape.size();
int64_t largest_offset = 0;
@@ -181,162 +181,162 @@ Status CheckTensorStridesValidity(const std::shared_ptr<Buffer>& data,
return Status::Invalid(
"offsets computed from shape and strides would not fit in 64-bit integer");
- }
+ }
- const int byte_width = internal::GetByteWidth(*type);
+ const int byte_width = internal::GetByteWidth(*type);
if (largest_offset > data->size() - byte_width) {
- return Status::Invalid("strides must not involve buffer over run");
- }
- return Status::OK();
-}
-
-} // namespace
-
-namespace internal {
-
-bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides) {
- return IsTensorStridesRowMajor(type, shape, strides) ||
- IsTensorStridesColumnMajor(type, shape, strides);
-}
-
-Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides,
- const std::vector<std::string>& dim_names) {
- RETURN_NOT_OK(CheckTensorValidity(type, data, shape));
- if (!strides.empty()) {
- RETURN_NOT_OK(CheckTensorStridesValidity(data, shape, strides, type));
+ return Status::Invalid("strides must not involve buffer over run");
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+namespace internal {
+
+bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides) {
+ return IsTensorStridesRowMajor(type, shape, strides) ||
+ IsTensorStridesColumnMajor(type, shape, strides);
+}
+
+Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides,
+ const std::vector<std::string>& dim_names) {
+ RETURN_NOT_OK(CheckTensorValidity(type, data, shape));
+ if (!strides.empty()) {
+ RETURN_NOT_OK(CheckTensorStridesValidity(data, shape, strides, type));
} else {
std::vector<int64_t> tmp_strides;
RETURN_NOT_OK(ComputeRowMajorStrides(checked_cast<const FixedWidthType&>(*type),
shape, &tmp_strides));
- }
- if (dim_names.size() > shape.size()) {
- return Status::Invalid("too many dim_names are supplied");
- }
- return Status::OK();
-}
-
-} // namespace internal
-
-/// Constructor with strides and dimension names
-Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
- const std::vector<std::string>& dim_names)
- : type_(type), data_(data), shape_(shape), strides_(strides), dim_names_(dim_names) {
- ARROW_CHECK(is_tensor_supported(type->id()));
- if (shape.size() > 0 && strides.size() == 0) {
+ }
+ if (dim_names.size() > shape.size()) {
+ return Status::Invalid("too many dim_names are supplied");
+ }
+ return Status::OK();
+}
+
+} // namespace internal
+
+/// Constructor with strides and dimension names
+Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
+ const std::vector<std::string>& dim_names)
+ : type_(type), data_(data), shape_(shape), strides_(strides), dim_names_(dim_names) {
+ ARROW_CHECK(is_tensor_supported(type->id()));
+ if (shape.size() > 0 && strides.size() == 0) {
ARROW_CHECK_OK(internal::ComputeRowMajorStrides(
checked_cast<const FixedWidthType&>(*type_), shape, &strides_));
- }
-}
-
-Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<int64_t>& strides)
- : Tensor(type, data, shape, strides, {}) {}
-
-Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape)
- : Tensor(type, data, shape, {}, {}) {}
-
-const std::string& Tensor::dim_name(int i) const {
- static const std::string kEmpty = "";
- if (dim_names_.size() == 0) {
- return kEmpty;
- } else {
- ARROW_CHECK_LT(i, static_cast<int>(dim_names_.size()));
- return dim_names_[i];
- }
-}
-
-int64_t Tensor::size() const {
- return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
-}
-
-bool Tensor::is_contiguous() const {
- return internal::IsTensorStridesContiguous(type_, shape_, strides_);
-}
-
-bool Tensor::is_row_major() const {
- return IsTensorStridesRowMajor(type_, shape_, strides_);
-}
-
-bool Tensor::is_column_major() const {
- return IsTensorStridesColumnMajor(type_, shape_, strides_);
-}
-
-Type::type Tensor::type_id() const { return type_->id(); }
-
-bool Tensor::Equals(const Tensor& other, const EqualOptions& opts) const {
- return TensorEquals(*this, other, opts);
-}
-
-namespace {
-
-template <typename TYPE>
-int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& tensor) {
- using c_type = typename TYPE::c_type;
- c_type const zero = c_type(0);
- int64_t nnz = 0;
- if (dim_index == tensor.ndim() - 1) {
- for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
- auto const* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];
- auto& elem = *reinterpret_cast<c_type const*>(ptr);
- if (elem != zero) ++nnz;
- }
- return nnz;
- }
- for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
- nnz += StridedTensorCountNonZero<TYPE>(dim_index + 1, offset, tensor);
- offset += tensor.strides()[dim_index];
- }
- return nnz;
-}
-
-template <typename TYPE>
-int64_t ContiguousTensorCountNonZero(const Tensor& tensor) {
- using c_type = typename TYPE::c_type;
- auto* data = reinterpret_cast<c_type const*>(tensor.raw_data());
- return std::count_if(data, data + tensor.size(),
- [](c_type const& x) { return x != 0; });
-}
-
-template <typename TYPE>
-inline int64_t TensorCountNonZero(const Tensor& tensor) {
- if (tensor.is_contiguous()) {
- return ContiguousTensorCountNonZero<TYPE>(tensor);
- } else {
- return StridedTensorCountNonZero<TYPE>(0, 0, tensor);
- }
-}
-
-struct NonZeroCounter {
- explicit NonZeroCounter(const Tensor& tensor) : tensor_(tensor) {}
-
- template <typename TYPE>
- enable_if_number<TYPE, Status> Visit(const TYPE& type) {
- result = TensorCountNonZero<TYPE>(tensor_);
- return Status::OK();
- }
-
- Status Visit(const DataType& type) {
- ARROW_CHECK(!is_tensor_supported(type.id()));
- return Status::NotImplemented("Tensor of ", type.ToString(), " is not implemented");
- }
-
- const Tensor& tensor_;
- int64_t result;
-};
-
-} // namespace
-
-Result<int64_t> Tensor::CountNonZero() const {
- NonZeroCounter counter(*this);
- RETURN_NOT_OK(VisitTypeInline(*type(), &counter));
- return counter.result;
-}
-
-} // namespace arrow
+ }
+}
+
+Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<int64_t>& strides)
+ : Tensor(type, data, shape, strides, {}) {}
+
+Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape)
+ : Tensor(type, data, shape, {}, {}) {}
+
+const std::string& Tensor::dim_name(int i) const {
+ static const std::string kEmpty = "";
+ if (dim_names_.size() == 0) {
+ return kEmpty;
+ } else {
+ ARROW_CHECK_LT(i, static_cast<int>(dim_names_.size()));
+ return dim_names_[i];
+ }
+}
+
+int64_t Tensor::size() const {
+ return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
+}
+
+bool Tensor::is_contiguous() const {
+ return internal::IsTensorStridesContiguous(type_, shape_, strides_);
+}
+
+bool Tensor::is_row_major() const {
+ return IsTensorStridesRowMajor(type_, shape_, strides_);
+}
+
+bool Tensor::is_column_major() const {
+ return IsTensorStridesColumnMajor(type_, shape_, strides_);
+}
+
+Type::type Tensor::type_id() const { return type_->id(); }
+
+bool Tensor::Equals(const Tensor& other, const EqualOptions& opts) const {
+ return TensorEquals(*this, other, opts);
+}
+
+namespace {
+
+template <typename TYPE>
+int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& tensor) {
+ using c_type = typename TYPE::c_type;
+ c_type const zero = c_type(0);
+ int64_t nnz = 0;
+ if (dim_index == tensor.ndim() - 1) {
+ for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
+ auto const* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];
+ auto& elem = *reinterpret_cast<c_type const*>(ptr);
+ if (elem != zero) ++nnz;
+ }
+ return nnz;
+ }
+ for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
+ nnz += StridedTensorCountNonZero<TYPE>(dim_index + 1, offset, tensor);
+ offset += tensor.strides()[dim_index];
+ }
+ return nnz;
+}
+
+template <typename TYPE>
+int64_t ContiguousTensorCountNonZero(const Tensor& tensor) {
+ using c_type = typename TYPE::c_type;
+ auto* data = reinterpret_cast<c_type const*>(tensor.raw_data());
+ return std::count_if(data, data + tensor.size(),
+ [](c_type const& x) { return x != 0; });
+}
+
+template <typename TYPE>
+inline int64_t TensorCountNonZero(const Tensor& tensor) {
+ if (tensor.is_contiguous()) {
+ return ContiguousTensorCountNonZero<TYPE>(tensor);
+ } else {
+ return StridedTensorCountNonZero<TYPE>(0, 0, tensor);
+ }
+}
+
+struct NonZeroCounter {
+ explicit NonZeroCounter(const Tensor& tensor) : tensor_(tensor) {}
+
+ template <typename TYPE>
+ enable_if_number<TYPE, Status> Visit(const TYPE& type) {
+ result = TensorCountNonZero<TYPE>(tensor_);
+ return Status::OK();
+ }
+
+ Status Visit(const DataType& type) {
+ ARROW_CHECK(!is_tensor_supported(type.id()));
+ return Status::NotImplemented("Tensor of ", type.ToString(), " is not implemented");
+ }
+
+ const Tensor& tensor_;
+ int64_t result;
+};
+
+} // namespace
+
+Result<int64_t> Tensor::CountNonZero() const {
+ NonZeroCounter counter(*this);
+ RETURN_NOT_OK(VisitTypeInline(*type(), &counter));
+ return counter.result;
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
index 91e9ad26066..c10e4d3898e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
@@ -1,250 +1,250 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/compare.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-static inline bool is_tensor_supported(Type::type type_id) {
- switch (type_id) {
- case Type::UINT8:
- case Type::INT8:
- case Type::UINT16:
- case Type::INT16:
- case Type::UINT32:
- case Type::INT32:
- case Type::UINT64:
- case Type::INT64:
- case Type::HALF_FLOAT:
- case Type::FLOAT:
- case Type::DOUBLE:
- return true;
- default:
- break;
- }
- return false;
-}
-
-namespace internal {
-
-ARROW_EXPORT
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+static inline bool is_tensor_supported(Type::type type_id) {
+ switch (type_id) {
+ case Type::UINT8:
+ case Type::INT8:
+ case Type::UINT16:
+ case Type::INT16:
+ case Type::UINT32:
+ case Type::INT32:
+ case Type::UINT64:
+ case Type::INT64:
+ case Type::HALF_FLOAT:
+ case Type::FLOAT:
+ case Type::DOUBLE:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+namespace internal {
+
+ARROW_EXPORT
Status ComputeRowMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides);
-
-ARROW_EXPORT
+
+ARROW_EXPORT
Status ComputeColumnMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides);
ARROW_EXPORT
-bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides);
-
-ARROW_EXPORT
-Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
- const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides,
- const std::vector<std::string>& dim_names);
-
-} // namespace internal
-
-class ARROW_EXPORT Tensor {
- public:
- /// \brief Create a Tensor with full parameters
- ///
- /// This factory function will return Status::Invalid when the parameters are
- /// inconsistent
- ///
- /// \param[in] type The data type of the tensor values
- /// \param[in] data The buffer of the tensor content
- /// \param[in] shape The shape of the tensor
- /// \param[in] strides The strides of the tensor
- /// (if this is empty, the data assumed to be row-major)
- /// \param[in] dim_names The names of the tensor dimensions
- static inline Result<std::shared_ptr<Tensor>> Make(
- const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<int64_t>& strides = {},
- const std::vector<std::string>& dim_names = {}) {
- ARROW_RETURN_NOT_OK(
- internal::ValidateTensorParameters(type, data, shape, strides, dim_names));
- return std::make_shared<Tensor>(type, data, shape, strides, dim_names);
- }
-
- virtual ~Tensor() = default;
-
- /// Constructor with no dimension names or strides, data assumed to be row-major
- Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape);
-
- /// Constructor with non-negative strides
- Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
-
- /// Constructor with non-negative strides and dimension names
- Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
- const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
- const std::vector<std::string>& dim_names);
-
- std::shared_ptr<DataType> type() const { return type_; }
- std::shared_ptr<Buffer> data() const { return data_; }
-
- const uint8_t* raw_data() const { return data_->data(); }
- uint8_t* raw_mutable_data() { return data_->mutable_data(); }
-
- const std::vector<int64_t>& shape() const { return shape_; }
- const std::vector<int64_t>& strides() const { return strides_; }
-
- int ndim() const { return static_cast<int>(shape_.size()); }
-
- const std::vector<std::string>& dim_names() const { return dim_names_; }
- const std::string& dim_name(int i) const;
-
- /// Total number of value cells in the tensor
- int64_t size() const;
-
- /// Return true if the underlying data buffer is mutable
- bool is_mutable() const { return data_->is_mutable(); }
-
- /// Either row major or column major
- bool is_contiguous() const;
-
- /// AKA "C order"
- bool is_row_major() const;
-
- /// AKA "Fortran order"
- bool is_column_major() const;
-
- Type::type type_id() const;
-
- bool Equals(const Tensor& other, const EqualOptions& = EqualOptions::Defaults()) const;
-
- /// Compute the number of non-zero values in the tensor
- Result<int64_t> CountNonZero() const;
-
- /// Compute the number of non-zero values in the tensor
- ARROW_DEPRECATED("Use Result-returning version")
- Status CountNonZero(int64_t* result) const { return CountNonZero().Value(result); }
-
- /// Return the offset of the given index on the given strides
- static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
- const std::vector<int64_t>& index) {
- const int64_t n = static_cast<int64_t>(index.size());
- int64_t offset = 0;
- for (int64_t i = 0; i < n; ++i) {
- offset += index[i] * strides[i];
- }
- return offset;
- }
-
- int64_t CalculateValueOffset(const std::vector<int64_t>& index) const {
- return Tensor::CalculateValueOffset(strides_, index);
- }
-
- /// Returns the value at the given index without data-type and bounds checks
- template <typename ValueType>
- const typename ValueType::c_type& Value(const std::vector<int64_t>& index) const {
- using c_type = typename ValueType::c_type;
- const int64_t offset = CalculateValueOffset(index);
- const c_type* ptr = reinterpret_cast<const c_type*>(raw_data() + offset);
- return *ptr;
- }
-
+bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides);
+
+ARROW_EXPORT
+Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides,
+ const std::vector<std::string>& dim_names);
+
+} // namespace internal
+
+class ARROW_EXPORT Tensor {
+ public:
+ /// \brief Create a Tensor with full parameters
+ ///
+ /// This factory function will return Status::Invalid when the parameters are
+ /// inconsistent
+ ///
+ /// \param[in] type The data type of the tensor values
+ /// \param[in] data The buffer of the tensor content
+ /// \param[in] shape The shape of the tensor
+ /// \param[in] strides The strides of the tensor
+ /// (if this is empty, the data assumed to be row-major)
+ /// \param[in] dim_names The names of the tensor dimensions
+ static inline Result<std::shared_ptr<Tensor>> Make(
+ const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<int64_t>& strides = {},
+ const std::vector<std::string>& dim_names = {}) {
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateTensorParameters(type, data, shape, strides, dim_names));
+ return std::make_shared<Tensor>(type, data, shape, strides, dim_names);
+ }
+
+ virtual ~Tensor() = default;
+
+ /// Constructor with no dimension names or strides, data assumed to be row-major
+ Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape);
+
+ /// Constructor with non-negative strides
+ Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
+
+ /// Constructor with non-negative strides and dimension names
+ Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+ const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
+ const std::vector<std::string>& dim_names);
+
+ std::shared_ptr<DataType> type() const { return type_; }
+ std::shared_ptr<Buffer> data() const { return data_; }
+
+ const uint8_t* raw_data() const { return data_->data(); }
+ uint8_t* raw_mutable_data() { return data_->mutable_data(); }
+
+ const std::vector<int64_t>& shape() const { return shape_; }
+ const std::vector<int64_t>& strides() const { return strides_; }
+
+ int ndim() const { return static_cast<int>(shape_.size()); }
+
+ const std::vector<std::string>& dim_names() const { return dim_names_; }
+ const std::string& dim_name(int i) const;
+
+ /// Total number of value cells in the tensor
+ int64_t size() const;
+
+ /// Return true if the underlying data buffer is mutable
+ bool is_mutable() const { return data_->is_mutable(); }
+
+ /// Either row major or column major
+ bool is_contiguous() const;
+
+ /// AKA "C order"
+ bool is_row_major() const;
+
+ /// AKA "Fortran order"
+ bool is_column_major() const;
+
+ Type::type type_id() const;
+
+ bool Equals(const Tensor& other, const EqualOptions& = EqualOptions::Defaults()) const;
+
+ /// Compute the number of non-zero values in the tensor
+ Result<int64_t> CountNonZero() const;
+
+ /// Compute the number of non-zero values in the tensor
+ ARROW_DEPRECATED("Use Result-returning version")
+ Status CountNonZero(int64_t* result) const { return CountNonZero().Value(result); }
+
+ /// Return the offset of the given index on the given strides
+ static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
+ const std::vector<int64_t>& index) {
+ const int64_t n = static_cast<int64_t>(index.size());
+ int64_t offset = 0;
+ for (int64_t i = 0; i < n; ++i) {
+ offset += index[i] * strides[i];
+ }
+ return offset;
+ }
+
+ int64_t CalculateValueOffset(const std::vector<int64_t>& index) const {
+ return Tensor::CalculateValueOffset(strides_, index);
+ }
+
+ /// Returns the value at the given index without data-type and bounds checks
+ template <typename ValueType>
+ const typename ValueType::c_type& Value(const std::vector<int64_t>& index) const {
+ using c_type = typename ValueType::c_type;
+ const int64_t offset = CalculateValueOffset(index);
+ const c_type* ptr = reinterpret_cast<const c_type*>(raw_data() + offset);
+ return *ptr;
+ }
+
Status Validate() const {
return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_);
}
- protected:
- Tensor() {}
-
- std::shared_ptr<DataType> type_;
- std::shared_ptr<Buffer> data_;
- std::vector<int64_t> shape_;
- std::vector<int64_t> strides_;
-
- /// These names are optional
- std::vector<std::string> dim_names_;
-
- template <typename SparseIndexType>
- friend class SparseTensorImpl;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
-};
-
-template <typename TYPE>
-class NumericTensor : public Tensor {
- public:
- using TypeClass = TYPE;
- using value_type = typename TypeClass::c_type;
-
- /// \brief Create a NumericTensor with full parameters
- ///
- /// This factory function will return Status::Invalid when the parameters are
- /// inconsistent
- ///
- /// \param[in] data The buffer of the tensor content
- /// \param[in] shape The shape of the tensor
- /// \param[in] strides The strides of the tensor
- /// (if this is empty, the data assumed to be row-major)
- /// \param[in] dim_names The names of the tensor dimensions
- static Result<std::shared_ptr<NumericTensor<TYPE>>> Make(
- const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides = {},
- const std::vector<std::string>& dim_names = {}) {
- ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters(
- TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names));
- return std::make_shared<NumericTensor<TYPE>>(data, shape, strides, dim_names);
- }
-
- /// Constructor with non-negative strides and dimension names
- NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides,
- const std::vector<std::string>& dim_names)
- : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
-
- /// Constructor with no dimension names or strides, data assumed to be row-major
- NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape)
- : NumericTensor(data, shape, {}, {}) {}
-
- /// Constructor with non-negative strides
- NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
- const std::vector<int64_t>& strides)
- : NumericTensor(data, shape, strides, {}) {}
-
- const value_type& Value(const std::vector<int64_t>& index) const {
- return Tensor::Value<TypeClass>(index);
- }
-};
-
-} // namespace arrow
+ protected:
+ Tensor() {}
+
+ std::shared_ptr<DataType> type_;
+ std::shared_ptr<Buffer> data_;
+ std::vector<int64_t> shape_;
+ std::vector<int64_t> strides_;
+
+ /// These names are optional
+ std::vector<std::string> dim_names_;
+
+ template <typename SparseIndexType>
+ friend class SparseTensorImpl;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
+};
+
+template <typename TYPE>
+class NumericTensor : public Tensor {
+ public:
+ using TypeClass = TYPE;
+ using value_type = typename TypeClass::c_type;
+
+ /// \brief Create a NumericTensor with full parameters
+ ///
+ /// This factory function will return Status::Invalid when the parameters are
+ /// inconsistent
+ ///
+ /// \param[in] data The buffer of the tensor content
+ /// \param[in] shape The shape of the tensor
+ /// \param[in] strides The strides of the tensor
+ /// (if this is empty, the data assumed to be row-major)
+ /// \param[in] dim_names The names of the tensor dimensions
+ static Result<std::shared_ptr<NumericTensor<TYPE>>> Make(
+ const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides = {},
+ const std::vector<std::string>& dim_names = {}) {
+ ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters(
+ TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names));
+ return std::make_shared<NumericTensor<TYPE>>(data, shape, strides, dim_names);
+ }
+
+ /// Constructor with non-negative strides and dimension names
+ NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides,
+ const std::vector<std::string>& dim_names)
+ : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
+
+ /// Constructor with no dimension names or strides, data assumed to be row-major
+ NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape)
+ : NumericTensor(data, shape, {}, {}) {}
+
+ /// Constructor with non-negative strides
+ NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& strides)
+ : NumericTensor(data, shape, strides, {}) {}
+
+ const value_type& Value(const std::vector<int64_t>& index) const {
+ return Tensor::Value<TypeClass>(index);
+ }
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter.h b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter.h
index 408ab22305f..ba29d6cb8f0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter.h
@@ -1,67 +1,67 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/sparse_tensor.h" // IWYU pragma: export
-
-#include <memory>
-
-namespace arrow {
-namespace internal {
-
-struct SparseTensorConverterMixin {
- static bool IsNonZero(const uint8_t val) { return val != 0; }
-
- static void AssignIndex(uint8_t* indices, int64_t val, const int elsize);
-
- static int64_t GetIndexValue(const uint8_t* value_ptr, const int elsize);
-};
-
-Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data);
-
-Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
- const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data);
-
-Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data);
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
- MemoryPool* pool, const SparseCOOTensor* sparse_tensor);
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSRMatrix(
- MemoryPool* pool, const SparseCSRMatrix* sparse_tensor);
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
- MemoryPool* pool, const SparseCSCMatrix* sparse_tensor);
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
- MemoryPool* pool, const SparseCSFTensor* sparse_tensor);
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/sparse_tensor.h" // IWYU pragma: export
+
+#include <memory>
+
+namespace arrow {
+namespace internal {
+
+struct SparseTensorConverterMixin {
+ static bool IsNonZero(const uint8_t val) { return val != 0; }
+
+ static void AssignIndex(uint8_t* indices, int64_t val, const int elsize);
+
+ static int64_t GetIndexValue(const uint8_t* value_ptr, const int elsize);
+};
+
+Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data);
+
+Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
+ const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data);
+
+Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data);
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
+ MemoryPool* pool, const SparseCOOTensor* sparse_tensor);
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSRMatrix(
+ MemoryPool* pool, const SparseCSRMatrix* sparse_tensor);
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
+ MemoryPool* pool, const SparseCSCMatrix* sparse_tensor);
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
+ MemoryPool* pool, const SparseCSFTensor* sparse_tensor);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter_internal.h
index 3a87feaf4b3..f26f4e5d53e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/converter_internal.h
@@ -1,88 +1,88 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/tensor/converter.h"
-
-#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
- switch (index_elsize) { \
- case 1: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint8_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint8_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint8_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint8_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 2: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint16_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint16_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint16_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint16_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 4: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint32_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint32_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint32_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint32_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 8: \
- switch (value_elsize) { \
- case 1: \
- ACTION(int64_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(int64_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(int64_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(int64_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- }
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/tensor/converter.h"
+
+#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
+ switch (index_elsize) { \
+ case 1: \
+ switch (value_elsize) { \
+ case 1: \
+ ACTION(uint8_t, uint8_t, __VA_ARGS__); \
+ break; \
+ case 2: \
+ ACTION(uint8_t, uint16_t, __VA_ARGS__); \
+ break; \
+ case 4: \
+ ACTION(uint8_t, uint32_t, __VA_ARGS__); \
+ break; \
+ case 8: \
+ ACTION(uint8_t, uint64_t, __VA_ARGS__); \
+ break; \
+ } \
+ break; \
+ case 2: \
+ switch (value_elsize) { \
+ case 1: \
+ ACTION(uint16_t, uint8_t, __VA_ARGS__); \
+ break; \
+ case 2: \
+ ACTION(uint16_t, uint16_t, __VA_ARGS__); \
+ break; \
+ case 4: \
+ ACTION(uint16_t, uint32_t, __VA_ARGS__); \
+ break; \
+ case 8: \
+ ACTION(uint16_t, uint64_t, __VA_ARGS__); \
+ break; \
+ } \
+ break; \
+ case 4: \
+ switch (value_elsize) { \
+ case 1: \
+ ACTION(uint32_t, uint8_t, __VA_ARGS__); \
+ break; \
+ case 2: \
+ ACTION(uint32_t, uint16_t, __VA_ARGS__); \
+ break; \
+ case 4: \
+ ACTION(uint32_t, uint32_t, __VA_ARGS__); \
+ break; \
+ case 8: \
+ ACTION(uint32_t, uint64_t, __VA_ARGS__); \
+ break; \
+ } \
+ break; \
+ case 8: \
+ switch (value_elsize) { \
+ case 1: \
+ ACTION(int64_t, uint8_t, __VA_ARGS__); \
+ break; \
+ case 2: \
+ ACTION(int64_t, uint16_t, __VA_ARGS__); \
+ break; \
+ case 4: \
+ ACTION(int64_t, uint32_t, __VA_ARGS__); \
+ break; \
+ case 8: \
+ ACTION(int64_t, uint64_t, __VA_ARGS__); \
+ break; \
+ } \
+ break; \
+ }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
index 2124d0a4e4b..fb44c7606d1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
@@ -1,333 +1,333 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/tensor/converter_internal.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <numeric>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-namespace internal {
-namespace {
-
-template <typename c_index_type>
-inline void IncrementRowMajorIndex(std::vector<c_index_type>& coord,
- const std::vector<int64_t>& shape) {
- const int64_t ndim = shape.size();
- ++coord[ndim - 1];
- if (coord[ndim - 1] == shape[ndim - 1]) {
- int64_t d = ndim - 1;
- while (d > 0 && coord[d] == shape[d]) {
- coord[d] = 0;
- ++coord[d - 1];
- --d;
- }
- }
-}
-
-template <typename c_index_type, typename c_value_type>
-void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices,
- c_value_type* values, const int64_t size) {
- const auto ndim = tensor.ndim();
- const auto& shape = tensor.shape();
- const c_value_type* tensor_data =
- reinterpret_cast<const c_value_type*>(tensor.raw_data());
-
- constexpr c_value_type zero = 0;
- std::vector<c_index_type> coord(ndim, 0);
- for (int64_t n = tensor.size(); n > 0; --n) {
- const c_value_type x = *tensor_data;
- if (ARROW_PREDICT_FALSE(x != zero)) {
- std::copy(coord.begin(), coord.end(), indices);
- *values++ = x;
- indices += ndim;
- }
-
- IncrementRowMajorIndex(coord, shape);
- ++tensor_data;
- }
-}
-
-template <typename c_index_type, typename c_value_type>
-void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices,
- c_value_type* out_values, const int64_t size) {
- const auto ndim = tensor.ndim();
- std::vector<c_index_type> indices(ndim * size);
- std::vector<c_value_type> values(size);
- ConvertRowMajorTensor(tensor, indices.data(), values.data(), size);
-
- // transpose indices
- for (int64_t i = 0; i < size; ++i) {
- for (int j = 0; j < ndim / 2; ++j) {
- std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]);
- }
- }
-
- // sort indices
- std::vector<int64_t> order(size);
- std::iota(order.begin(), order.end(), 0);
- std::sort(order.begin(), order.end(), [&](const int64_t xi, const int64_t yi) {
- const int64_t x_offset = xi * ndim;
- const int64_t y_offset = yi * ndim;
- for (int j = 0; j < ndim; ++j) {
- const auto x = indices[x_offset + j];
- const auto y = indices[y_offset + j];
- if (x < y) return true;
- if (x > y) return false;
- }
- return false;
- });
-
- // transfer result
- const auto* indices_data = indices.data();
- for (int64_t i = 0; i < size; ++i) {
- out_values[i] = values[i];
-
- std::copy_n(indices_data, ndim, out_indices);
- indices_data += ndim;
- out_indices += ndim;
- }
-}
-
-template <typename c_index_type, typename c_value_type>
-void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
- c_value_type* values, const int64_t size) {
- using ValueType = typename CTypeTraits<c_value_type>::ArrowType;
- const auto& shape = tensor.shape();
- const auto ndim = tensor.ndim();
- std::vector<int64_t> coord(ndim, 0);
-
- constexpr c_value_type zero = 0;
- c_value_type x;
- int64_t i;
- for (int64_t n = tensor.size(); n > 0; --n) {
- x = tensor.Value<ValueType>(coord);
- if (ARROW_PREDICT_FALSE(x != zero)) {
- *values++ = x;
- for (i = 0; i < ndim; ++i) {
- *indices++ = static_cast<c_index_type>(coord[i]);
- }
- }
-
- IncrementRowMajorIndex(coord, shape);
- }
-}
-
-#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size) \
- func<index_type, value_type>(tensor_, reinterpret_cast<index_type*>(indices), \
- reinterpret_cast<value_type*>(values), size)
-
-// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++.
-#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \
- ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__))
-
-#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \
- ARROW_EXPAND( \
- CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__))
-
-#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \
- ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__))
-
-// ----------------------------------------------------------------------
-// SparseTensorConverter for SparseCOOIndex
-
-class SparseCOOTensorConverter : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::AssignIndex;
- using SparseTensorConverterMixin::IsNonZero;
-
- public:
- SparseCOOTensorConverter(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool)
- : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
-
- Status Convert() {
- RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
- tensor_.shape()));
-
- const int index_elsize = GetByteWidth(*index_value_type_);
- const int value_elsize = GetByteWidth(*tensor_.type());
-
- const int64_t ndim = tensor_.ndim();
- ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
-
- ARROW_ASSIGN_OR_RAISE(auto indices_buffer,
- AllocateBuffer(index_elsize * ndim * nonzero_count, pool_));
- uint8_t* indices = indices_buffer->mutable_data();
-
- ARROW_ASSIGN_OR_RAISE(auto values_buffer,
- AllocateBuffer(value_elsize * nonzero_count, pool_));
- uint8_t* values = values_buffer->mutable_data();
-
- const uint8_t* tensor_data = tensor_.raw_data();
- if (ndim <= 1) {
- const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0];
- for (int64_t i = 0; i < count; ++i) {
- if (std::any_of(tensor_data, tensor_data + value_elsize, IsNonZero)) {
- AssignIndex(indices, i, index_elsize);
- std::copy_n(tensor_data, value_elsize, values);
-
- indices += index_elsize;
- values += value_elsize;
- }
- tensor_data += value_elsize;
- }
- } else if (tensor_.is_row_major()) {
- DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
- nonzero_count);
- } else if (tensor_.is_column_major()) {
- DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
- nonzero_count);
- } else {
- DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values,
- nonzero_count);
- }
-
- // make results
- const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
- std::vector<int64_t> indices_strides;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter_internal.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+template <typename c_index_type>
+inline void IncrementRowMajorIndex(std::vector<c_index_type>& coord,
+ const std::vector<int64_t>& shape) {
+ const int64_t ndim = shape.size();
+ ++coord[ndim - 1];
+ if (coord[ndim - 1] == shape[ndim - 1]) {
+ int64_t d = ndim - 1;
+ while (d > 0 && coord[d] == shape[d]) {
+ coord[d] = 0;
+ ++coord[d - 1];
+ --d;
+ }
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices,
+ c_value_type* values, const int64_t size) {
+ const auto ndim = tensor.ndim();
+ const auto& shape = tensor.shape();
+ const c_value_type* tensor_data =
+ reinterpret_cast<const c_value_type*>(tensor.raw_data());
+
+ constexpr c_value_type zero = 0;
+ std::vector<c_index_type> coord(ndim, 0);
+ for (int64_t n = tensor.size(); n > 0; --n) {
+ const c_value_type x = *tensor_data;
+ if (ARROW_PREDICT_FALSE(x != zero)) {
+ std::copy(coord.begin(), coord.end(), indices);
+ *values++ = x;
+ indices += ndim;
+ }
+
+ IncrementRowMajorIndex(coord, shape);
+ ++tensor_data;
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices,
+ c_value_type* out_values, const int64_t size) {
+ const auto ndim = tensor.ndim();
+ std::vector<c_index_type> indices(ndim * size);
+ std::vector<c_value_type> values(size);
+ ConvertRowMajorTensor(tensor, indices.data(), values.data(), size);
+
+ // transpose indices
+ for (int64_t i = 0; i < size; ++i) {
+ for (int j = 0; j < ndim / 2; ++j) {
+ std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]);
+ }
+ }
+
+ // sort indices
+ std::vector<int64_t> order(size);
+ std::iota(order.begin(), order.end(), 0);
+ std::sort(order.begin(), order.end(), [&](const int64_t xi, const int64_t yi) {
+ const int64_t x_offset = xi * ndim;
+ const int64_t y_offset = yi * ndim;
+ for (int j = 0; j < ndim; ++j) {
+ const auto x = indices[x_offset + j];
+ const auto y = indices[y_offset + j];
+ if (x < y) return true;
+ if (x > y) return false;
+ }
+ return false;
+ });
+
+ // transfer result
+ const auto* indices_data = indices.data();
+ for (int64_t i = 0; i < size; ++i) {
+ out_values[i] = values[i];
+
+ std::copy_n(indices_data, ndim, out_indices);
+ indices_data += ndim;
+ out_indices += ndim;
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
+ c_value_type* values, const int64_t size) {
+ using ValueType = typename CTypeTraits<c_value_type>::ArrowType;
+ const auto& shape = tensor.shape();
+ const auto ndim = tensor.ndim();
+ std::vector<int64_t> coord(ndim, 0);
+
+ constexpr c_value_type zero = 0;
+ c_value_type x;
+ int64_t i;
+ for (int64_t n = tensor.size(); n > 0; --n) {
+ x = tensor.Value<ValueType>(coord);
+ if (ARROW_PREDICT_FALSE(x != zero)) {
+ *values++ = x;
+ for (i = 0; i < ndim; ++i) {
+ *indices++ = static_cast<c_index_type>(coord[i]);
+ }
+ }
+
+ IncrementRowMajorIndex(coord, shape);
+ }
+}
+
+#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size) \
+ func<index_type, value_type>(tensor_, reinterpret_cast<index_type*>(indices), \
+ reinterpret_cast<value_type*>(values), size)
+
+// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++.
+#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__))
+
+#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND( \
+ CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__))
+
+#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__))
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCOOIndex
+
+class SparseCOOTensorConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+ SparseCOOTensorConverter(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool)
+ : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
+
+ Status Convert() {
+ RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
+ tensor_.shape()));
+
+ const int index_elsize = GetByteWidth(*index_value_type_);
+ const int value_elsize = GetByteWidth(*tensor_.type());
+
+ const int64_t ndim = tensor_.ndim();
+ ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
+
+ ARROW_ASSIGN_OR_RAISE(auto indices_buffer,
+ AllocateBuffer(index_elsize * ndim * nonzero_count, pool_));
+ uint8_t* indices = indices_buffer->mutable_data();
+
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * nonzero_count, pool_));
+ uint8_t* values = values_buffer->mutable_data();
+
+ const uint8_t* tensor_data = tensor_.raw_data();
+ if (ndim <= 1) {
+ const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0];
+ for (int64_t i = 0; i < count; ++i) {
+ if (std::any_of(tensor_data, tensor_data + value_elsize, IsNonZero)) {
+ AssignIndex(indices, i, index_elsize);
+ std::copy_n(tensor_data, value_elsize, values);
+
+ indices += index_elsize;
+ values += value_elsize;
+ }
+ tensor_data += value_elsize;
+ }
+ } else if (tensor_.is_row_major()) {
+ DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ } else if (tensor_.is_column_major()) {
+ DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ } else {
+ DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ }
+
+ // make results
+ const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
+ std::vector<int64_t> indices_strides;
RETURN_NOT_OK(internal::ComputeRowMajorStrides(
- checked_cast<const FixedWidthType&>(*index_value_type_), indices_shape,
+ checked_cast<const FixedWidthType&>(*index_value_type_), indices_shape,
&indices_strides));
- auto coords = std::make_shared<Tensor>(index_value_type_, std::move(indices_buffer),
- indices_shape, indices_strides);
- ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
- data = std::move(values_buffer);
-
- return Status::OK();
- }
-
- std::shared_ptr<SparseCOOIndex> sparse_index;
- std::shared_ptr<Buffer> data;
-
- private:
- const Tensor& tensor_;
- const std::shared_ptr<DataType>& index_value_type_;
- MemoryPool* pool_;
-};
-
-} // namespace
-
-void SparseTensorConverterMixin::AssignIndex(uint8_t* indices, int64_t val,
- const int elsize) {
- switch (elsize) {
- case 1:
- *indices = static_cast<uint8_t>(val);
- break;
- case 2:
- *reinterpret_cast<uint16_t*>(indices) = static_cast<uint16_t>(val);
- break;
- case 4:
- *reinterpret_cast<uint32_t*>(indices) = static_cast<uint32_t>(val);
- break;
- case 8:
- *reinterpret_cast<int64_t*>(indices) = val;
- break;
- default:
- break;
- }
-}
-
-int64_t SparseTensorConverterMixin::GetIndexValue(const uint8_t* value_ptr,
- const int elsize) {
- switch (elsize) {
- case 1:
- return *value_ptr;
-
- case 2:
- return *reinterpret_cast<const uint16_t*>(value_ptr);
-
- case 4:
- return *reinterpret_cast<const uint32_t*>(value_ptr);
-
- case 8:
- return *reinterpret_cast<const int64_t*>(value_ptr);
-
- default:
- return 0;
- }
-}
-
-Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data) {
- SparseCOOTensorConverter converter(tensor, index_value_type, pool);
- RETURN_NOT_OK(converter.Convert());
-
- *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
- *out_data = converter.data;
- return Status::OK();
-}
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
- MemoryPool* pool, const SparseCOOTensor* sparse_tensor) {
- const auto& sparse_index =
- checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
- const auto& coords = sparse_index.indices();
- const auto* coords_data = coords->raw_data();
-
- const int index_elsize = GetByteWidth(*coords->type());
-
- const auto& value_type = checked_cast<const FixedWidthType&>(*sparse_tensor->type());
- const int value_elsize = GetByteWidth(value_type);
- ARROW_ASSIGN_OR_RAISE(auto values_buffer,
- AllocateBuffer(value_elsize * sparse_tensor->size(), pool));
- auto values = values_buffer->mutable_data();
- std::fill_n(values, value_elsize * sparse_tensor->size(), 0);
-
- std::vector<int64_t> strides;
+ auto coords = std::make_shared<Tensor>(index_value_type_, std::move(indices_buffer),
+ indices_shape, indices_strides);
+ ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
+ data = std::move(values_buffer);
+
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCOOIndex> sparse_index;
+ std::shared_ptr<Buffer> data;
+
+ private:
+ const Tensor& tensor_;
+ const std::shared_ptr<DataType>& index_value_type_;
+ MemoryPool* pool_;
+};
+
+} // namespace
+
+void SparseTensorConverterMixin::AssignIndex(uint8_t* indices, int64_t val,
+ const int elsize) {
+ switch (elsize) {
+ case 1:
+ *indices = static_cast<uint8_t>(val);
+ break;
+ case 2:
+ *reinterpret_cast<uint16_t*>(indices) = static_cast<uint16_t>(val);
+ break;
+ case 4:
+ *reinterpret_cast<uint32_t*>(indices) = static_cast<uint32_t>(val);
+ break;
+ case 8:
+ *reinterpret_cast<int64_t*>(indices) = val;
+ break;
+ default:
+ break;
+ }
+}
+
+int64_t SparseTensorConverterMixin::GetIndexValue(const uint8_t* value_ptr,
+ const int elsize) {
+ switch (elsize) {
+ case 1:
+ return *value_ptr;
+
+ case 2:
+ return *reinterpret_cast<const uint16_t*>(value_ptr);
+
+ case 4:
+ return *reinterpret_cast<const uint32_t*>(value_ptr);
+
+ case 8:
+ return *reinterpret_cast<const int64_t*>(value_ptr);
+
+ default:
+ return 0;
+ }
+}
+
+Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data) {
+ SparseCOOTensorConverter converter(tensor, index_value_type, pool);
+ RETURN_NOT_OK(converter.Convert());
+
+ *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
+ *out_data = converter.data;
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
+ MemoryPool* pool, const SparseCOOTensor* sparse_tensor) {
+ const auto& sparse_index =
+ checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
+ const auto& coords = sparse_index.indices();
+ const auto* coords_data = coords->raw_data();
+
+ const int index_elsize = GetByteWidth(*coords->type());
+
+ const auto& value_type = checked_cast<const FixedWidthType&>(*sparse_tensor->type());
+ const int value_elsize = GetByteWidth(value_type);
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * sparse_tensor->size(), pool));
+ auto values = values_buffer->mutable_data();
+ std::fill_n(values, value_elsize * sparse_tensor->size(), 0);
+
+ std::vector<int64_t> strides;
RETURN_NOT_OK(ComputeRowMajorStrides(value_type, sparse_tensor->shape(), &strides));
-
- const auto* raw_data = sparse_tensor->raw_data();
- const int ndim = sparse_tensor->ndim();
-
- for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) {
- int64_t offset = 0;
-
- for (int j = 0; j < ndim; ++j) {
- auto index = static_cast<int64_t>(
- SparseTensorConverterMixin::GetIndexValue(coords_data, index_elsize));
- offset += index * strides[j];
- coords_data += index_elsize;
- }
-
- std::copy_n(raw_data, value_elsize, values + offset);
- raw_data += value_elsize;
- }
-
- return std::make_shared<Tensor>(sparse_tensor->type(), std::move(values_buffer),
- sparse_tensor->shape(), strides,
- sparse_tensor->dim_names());
-}
-
-} // namespace internal
-} // namespace arrow
+
+ const auto* raw_data = sparse_tensor->raw_data();
+ const int ndim = sparse_tensor->ndim();
+
+ for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) {
+ int64_t offset = 0;
+
+ for (int j = 0; j < ndim; ++j) {
+ auto index = static_cast<int64_t>(
+ SparseTensorConverterMixin::GetIndexValue(coords_data, index_elsize));
+ offset += index * strides[j];
+ coords_data += index_elsize;
+ }
+
+ std::copy_n(raw_data, value_elsize, values + offset);
+ raw_data += value_elsize;
+ }
+
+ return std::make_shared<Tensor>(sparse_tensor->type(), std::move(values_buffer),
+ sparse_tensor->shape(), strides,
+ sparse_tensor->dim_names());
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
index 77a71d8a12e..0cb8110fd5e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
@@ -1,289 +1,289 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/tensor/converter.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/sort.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-namespace internal {
-namespace {
-
-inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t>& shape,
- const std::vector<int64_t>& axis_order) {
- const int64_t ndim = shape.size();
- const int64_t last_axis = axis_order[ndim - 1];
- ++coord[last_axis];
- if (coord[last_axis] == shape[last_axis]) {
- int64_t d = ndim - 1;
- while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
- coord[axis_order[d]] = 0;
- ++coord[axis_order[d - 1]];
- --d;
- }
- }
-}
-
-// ----------------------------------------------------------------------
-// SparseTensorConverter for SparseCSFIndex
-
-class SparseCSFTensorConverter : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::AssignIndex;
- using SparseTensorConverterMixin::IsNonZero;
-
- public:
- SparseCSFTensorConverter(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool)
- : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
-
- Status Convert() {
- RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
- tensor_.shape()));
-
- const int index_elsize = GetByteWidth(*index_value_type_);
- const int value_elsize = GetByteWidth(*tensor_.type());
-
- const int64_t ndim = tensor_.ndim();
- // Axis order as ascending order of dimension size is a good heuristic but is not
- // necessarily optimal.
- std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
- ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
-
- ARROW_ASSIGN_OR_RAISE(auto values_buffer,
- AllocateBuffer(value_elsize * nonzero_count, pool_));
- auto* values = values_buffer->mutable_data();
-
- std::vector<int64_t> counts(ndim, 0);
- std::vector<int64_t> coord(ndim, 0);
- std::vector<int64_t> previous_coord(ndim, -1);
- std::vector<BufferBuilder> indptr_buffer_builders(ndim - 1);
- std::vector<BufferBuilder> indices_buffer_builders(ndim);
-
- const auto* tensor_data = tensor_.raw_data();
- uint8_t index_buffer[sizeof(int64_t)];
-
- if (ndim <= 1) {
- return Status::NotImplemented("TODO for ndim <= 1");
- } else {
- const auto& shape = tensor_.shape();
- for (int64_t n = tensor_.size(); n > 0; n--) {
- const auto offset = tensor_.CalculateValueOffset(coord);
- const auto xp = tensor_data + offset;
-
- if (std::any_of(xp, xp + value_elsize, IsNonZero)) {
- bool tree_split = false;
-
- std::copy_n(xp, value_elsize, values);
- values += value_elsize;
-
- for (int64_t i = 0; i < ndim; ++i) {
- int64_t dimension = axis_order[i];
-
- tree_split = tree_split || (coord[dimension] != previous_coord[dimension]);
- if (tree_split) {
- if (i < ndim - 1) {
- AssignIndex(index_buffer, counts[i + 1], index_elsize);
- RETURN_NOT_OK(
- indptr_buffer_builders[i].Append(index_buffer, index_elsize));
- }
-
- AssignIndex(index_buffer, coord[dimension], index_elsize);
- RETURN_NOT_OK(
- indices_buffer_builders[i].Append(index_buffer, index_elsize));
-
- ++counts[i];
- }
- }
-
- previous_coord = coord;
- }
-
- IncrementIndex(coord, shape, axis_order);
- }
- }
-
- for (int64_t column = 0; column < ndim - 1; ++column) {
- AssignIndex(index_buffer, counts[column + 1], index_elsize);
- RETURN_NOT_OK(indptr_buffer_builders[column].Append(index_buffer, index_elsize));
- }
-
- // make results
- data = std::move(values_buffer);
-
- std::vector<std::shared_ptr<Buffer>> indptr_buffers(ndim - 1);
- std::vector<std::shared_ptr<Buffer>> indices_buffers(ndim);
- std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
- std::vector<int64_t> indices_shapes = counts;
-
- for (int64_t column = 0; column < ndim; ++column) {
- RETURN_NOT_OK(
- indices_buffer_builders[column].Finish(&indices_buffers[column], true));
- }
- for (int64_t column = 0; column < ndim - 1; ++column) {
- RETURN_NOT_OK(indptr_buffer_builders[column].Finish(&indptr_buffers[column], true));
- }
-
- ARROW_ASSIGN_OR_RAISE(
- sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
- indptr_buffers, indices_buffers));
- return Status::OK();
- }
-
- std::shared_ptr<SparseCSFIndex> sparse_index;
- std::shared_ptr<Buffer> data;
-
- private:
- const Tensor& tensor_;
- const std::shared_ptr<DataType>& index_value_type_;
- MemoryPool* pool_;
-};
-
-class TensorBuilderFromSparseCSFTensor : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::GetIndexValue;
-
- MemoryPool* pool_;
- const SparseCSFTensor* sparse_tensor_;
- const SparseCSFIndex* sparse_index_;
- const std::vector<std::shared_ptr<Tensor>>& indptr_;
- const std::vector<std::shared_ptr<Tensor>>& indices_;
- const std::vector<int64_t>& axis_order_;
- const std::vector<int64_t>& shape_;
- const int64_t non_zero_length_;
- const int ndim_;
- const int64_t tensor_size_;
- const FixedWidthType& value_type_;
- const int value_elsize_;
- const uint8_t* raw_data_;
- std::vector<int64_t> strides_;
- std::shared_ptr<Buffer> values_buffer_;
- uint8_t* values_;
-
- public:
- TensorBuilderFromSparseCSFTensor(const SparseCSFTensor* sparse_tensor, MemoryPool* pool)
- : pool_(pool),
- sparse_tensor_(sparse_tensor),
- sparse_index_(
- checked_cast<const SparseCSFIndex*>(sparse_tensor->sparse_index().get())),
- indptr_(sparse_index_->indptr()),
- indices_(sparse_index_->indices()),
- axis_order_(sparse_index_->axis_order()),
- shape_(sparse_tensor->shape()),
- non_zero_length_(sparse_tensor->non_zero_length()),
- ndim_(sparse_tensor->ndim()),
- tensor_size_(sparse_tensor->size()),
- value_type_(checked_cast<const FixedWidthType&>(*sparse_tensor->type())),
- value_elsize_(GetByteWidth(value_type_)),
- raw_data_(sparse_tensor->raw_data()) {}
-
- int ElementSize(const std::shared_ptr<Tensor>& tensor) const {
- return GetByteWidth(*tensor->type());
- }
-
- Result<std::shared_ptr<Tensor>> Build() {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/sort.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t>& shape,
+ const std::vector<int64_t>& axis_order) {
+ const int64_t ndim = shape.size();
+ const int64_t last_axis = axis_order[ndim - 1];
+ ++coord[last_axis];
+ if (coord[last_axis] == shape[last_axis]) {
+ int64_t d = ndim - 1;
+ while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
+ coord[axis_order[d]] = 0;
+ ++coord[axis_order[d - 1]];
+ --d;
+ }
+ }
+}
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCSFIndex
+
+class SparseCSFTensorConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+ SparseCSFTensorConverter(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool)
+ : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
+
+ Status Convert() {
+ RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
+ tensor_.shape()));
+
+ const int index_elsize = GetByteWidth(*index_value_type_);
+ const int value_elsize = GetByteWidth(*tensor_.type());
+
+ const int64_t ndim = tensor_.ndim();
+ // Axis order as ascending order of dimension size is a good heuristic but is not
+ // necessarily optimal.
+ std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
+ ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
+
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * nonzero_count, pool_));
+ auto* values = values_buffer->mutable_data();
+
+ std::vector<int64_t> counts(ndim, 0);
+ std::vector<int64_t> coord(ndim, 0);
+ std::vector<int64_t> previous_coord(ndim, -1);
+ std::vector<BufferBuilder> indptr_buffer_builders(ndim - 1);
+ std::vector<BufferBuilder> indices_buffer_builders(ndim);
+
+ const auto* tensor_data = tensor_.raw_data();
+ uint8_t index_buffer[sizeof(int64_t)];
+
+ if (ndim <= 1) {
+ return Status::NotImplemented("TODO for ndim <= 1");
+ } else {
+ const auto& shape = tensor_.shape();
+ for (int64_t n = tensor_.size(); n > 0; n--) {
+ const auto offset = tensor_.CalculateValueOffset(coord);
+ const auto xp = tensor_data + offset;
+
+ if (std::any_of(xp, xp + value_elsize, IsNonZero)) {
+ bool tree_split = false;
+
+ std::copy_n(xp, value_elsize, values);
+ values += value_elsize;
+
+ for (int64_t i = 0; i < ndim; ++i) {
+ int64_t dimension = axis_order[i];
+
+ tree_split = tree_split || (coord[dimension] != previous_coord[dimension]);
+ if (tree_split) {
+ if (i < ndim - 1) {
+ AssignIndex(index_buffer, counts[i + 1], index_elsize);
+ RETURN_NOT_OK(
+ indptr_buffer_builders[i].Append(index_buffer, index_elsize));
+ }
+
+ AssignIndex(index_buffer, coord[dimension], index_elsize);
+ RETURN_NOT_OK(
+ indices_buffer_builders[i].Append(index_buffer, index_elsize));
+
+ ++counts[i];
+ }
+ }
+
+ previous_coord = coord;
+ }
+
+ IncrementIndex(coord, shape, axis_order);
+ }
+ }
+
+ for (int64_t column = 0; column < ndim - 1; ++column) {
+ AssignIndex(index_buffer, counts[column + 1], index_elsize);
+ RETURN_NOT_OK(indptr_buffer_builders[column].Append(index_buffer, index_elsize));
+ }
+
+ // make results
+ data = std::move(values_buffer);
+
+ std::vector<std::shared_ptr<Buffer>> indptr_buffers(ndim - 1);
+ std::vector<std::shared_ptr<Buffer>> indices_buffers(ndim);
+ std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
+ std::vector<int64_t> indices_shapes = counts;
+
+ for (int64_t column = 0; column < ndim; ++column) {
+ RETURN_NOT_OK(
+ indices_buffer_builders[column].Finish(&indices_buffers[column], true));
+ }
+ for (int64_t column = 0; column < ndim - 1; ++column) {
+ RETURN_NOT_OK(indptr_buffer_builders[column].Finish(&indptr_buffers[column], true));
+ }
+
+ ARROW_ASSIGN_OR_RAISE(
+ sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
+ indptr_buffers, indices_buffers));
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSFIndex> sparse_index;
+ std::shared_ptr<Buffer> data;
+
+ private:
+ const Tensor& tensor_;
+ const std::shared_ptr<DataType>& index_value_type_;
+ MemoryPool* pool_;
+};
+
+class TensorBuilderFromSparseCSFTensor : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::GetIndexValue;
+
+ MemoryPool* pool_;
+ const SparseCSFTensor* sparse_tensor_;
+ const SparseCSFIndex* sparse_index_;
+ const std::vector<std::shared_ptr<Tensor>>& indptr_;
+ const std::vector<std::shared_ptr<Tensor>>& indices_;
+ const std::vector<int64_t>& axis_order_;
+ const std::vector<int64_t>& shape_;
+ const int64_t non_zero_length_;
+ const int ndim_;
+ const int64_t tensor_size_;
+ const FixedWidthType& value_type_;
+ const int value_elsize_;
+ const uint8_t* raw_data_;
+ std::vector<int64_t> strides_;
+ std::shared_ptr<Buffer> values_buffer_;
+ uint8_t* values_;
+
+ public:
+ TensorBuilderFromSparseCSFTensor(const SparseCSFTensor* sparse_tensor, MemoryPool* pool)
+ : pool_(pool),
+ sparse_tensor_(sparse_tensor),
+ sparse_index_(
+ checked_cast<const SparseCSFIndex*>(sparse_tensor->sparse_index().get())),
+ indptr_(sparse_index_->indptr()),
+ indices_(sparse_index_->indices()),
+ axis_order_(sparse_index_->axis_order()),
+ shape_(sparse_tensor->shape()),
+ non_zero_length_(sparse_tensor->non_zero_length()),
+ ndim_(sparse_tensor->ndim()),
+ tensor_size_(sparse_tensor->size()),
+ value_type_(checked_cast<const FixedWidthType&>(*sparse_tensor->type())),
+ value_elsize_(GetByteWidth(value_type_)),
+ raw_data_(sparse_tensor->raw_data()) {}
+
+ int ElementSize(const std::shared_ptr<Tensor>& tensor) const {
+ return GetByteWidth(*tensor->type());
+ }
+
+ Result<std::shared_ptr<Tensor>> Build() {
RETURN_NOT_OK(internal::ComputeRowMajorStrides(value_type_, shape_, &strides_));
-
- ARROW_ASSIGN_OR_RAISE(values_buffer_,
- AllocateBuffer(value_elsize_ * tensor_size_, pool_));
- values_ = values_buffer_->mutable_data();
- std::fill_n(values_, value_elsize_ * tensor_size_, 0);
-
- const int64_t start = 0;
- const int64_t stop = indptr_[0]->size() - 1;
- ExpandValues(0, 0, start, stop);
-
- return std::make_shared<Tensor>(sparse_tensor_->type(), std::move(values_buffer_),
- shape_, strides_, sparse_tensor_->dim_names());
- }
-
- void ExpandValues(const int64_t dim, const int64_t dim_offset, const int64_t start,
- const int64_t stop) {
- const auto& cur_indices = indices_[dim];
- const int indices_elsize = ElementSize(cur_indices);
- const auto* indices_data = cur_indices->raw_data() + start * indices_elsize;
-
- if (dim == ndim_ - 1) {
- for (auto i = start; i < stop; ++i) {
- const int64_t index =
- SparseTensorConverterMixin::GetIndexValue(indices_data, indices_elsize);
- const int64_t offset = dim_offset + index * strides_[axis_order_[dim]];
-
- std::copy_n(raw_data_ + i * value_elsize_, value_elsize_, values_ + offset);
-
- indices_data += indices_elsize;
- }
- } else {
- const auto& cur_indptr = indptr_[dim];
- const int indptr_elsize = ElementSize(cur_indptr);
- const auto* indptr_data = cur_indptr->raw_data() + start * indptr_elsize;
-
- for (int64_t i = start; i < stop; ++i) {
- const int64_t index =
- SparseTensorConverterMixin::GetIndexValue(indices_data, indices_elsize);
- const int64_t offset = dim_offset + index * strides_[axis_order_[dim]];
- const int64_t next_start = GetIndexValue(indptr_data, indptr_elsize);
- const int64_t next_stop =
- GetIndexValue(indptr_data + indptr_elsize, indptr_elsize);
-
- ExpandValues(dim + 1, offset, next_start, next_stop);
-
- indices_data += indices_elsize;
- indptr_data += indptr_elsize;
- }
- }
- }
-};
-
-} // namespace
-
-Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data) {
- SparseCSFTensorConverter converter(tensor, index_value_type, pool);
- RETURN_NOT_OK(converter.Convert());
-
- *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
- *out_data = converter.data;
- return Status::OK();
-}
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
- MemoryPool* pool, const SparseCSFTensor* sparse_tensor) {
- TensorBuilderFromSparseCSFTensor builder(sparse_tensor, pool);
- return builder.Build();
-}
-
-} // namespace internal
-} // namespace arrow
+
+ ARROW_ASSIGN_OR_RAISE(values_buffer_,
+ AllocateBuffer(value_elsize_ * tensor_size_, pool_));
+ values_ = values_buffer_->mutable_data();
+ std::fill_n(values_, value_elsize_ * tensor_size_, 0);
+
+ const int64_t start = 0;
+ const int64_t stop = indptr_[0]->size() - 1;
+ ExpandValues(0, 0, start, stop);
+
+ return std::make_shared<Tensor>(sparse_tensor_->type(), std::move(values_buffer_),
+ shape_, strides_, sparse_tensor_->dim_names());
+ }
+
+ void ExpandValues(const int64_t dim, const int64_t dim_offset, const int64_t start,
+ const int64_t stop) {
+ const auto& cur_indices = indices_[dim];
+ const int indices_elsize = ElementSize(cur_indices);
+ const auto* indices_data = cur_indices->raw_data() + start * indices_elsize;
+
+ if (dim == ndim_ - 1) {
+ for (auto i = start; i < stop; ++i) {
+ const int64_t index =
+ SparseTensorConverterMixin::GetIndexValue(indices_data, indices_elsize);
+ const int64_t offset = dim_offset + index * strides_[axis_order_[dim]];
+
+ std::copy_n(raw_data_ + i * value_elsize_, value_elsize_, values_ + offset);
+
+ indices_data += indices_elsize;
+ }
+ } else {
+ const auto& cur_indptr = indptr_[dim];
+ const int indptr_elsize = ElementSize(cur_indptr);
+ const auto* indptr_data = cur_indptr->raw_data() + start * indptr_elsize;
+
+ for (int64_t i = start; i < stop; ++i) {
+ const int64_t index =
+ SparseTensorConverterMixin::GetIndexValue(indices_data, indices_elsize);
+ const int64_t offset = dim_offset + index * strides_[axis_order_[dim]];
+ const int64_t next_start = GetIndexValue(indptr_data, indptr_elsize);
+ const int64_t next_stop =
+ GetIndexValue(indptr_data + indptr_elsize, indptr_elsize);
+
+ ExpandValues(dim + 1, offset, next_start, next_stop);
+
+ indices_data += indices_elsize;
+ indptr_data += indptr_elsize;
+ }
+ }
+ }
+};
+
+} // namespace
+
+Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data) {
+ SparseCSFTensorConverter converter(tensor, index_value_type, pool);
+ RETURN_NOT_OK(converter.Convert());
+
+ *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
+ *out_data = converter.data;
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
+ MemoryPool* pool, const SparseCSFTensor* sparse_tensor) {
+ TensorBuilderFromSparseCSFTensor builder(sparse_tensor, pool);
+ return builder.Build();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
index 137b5d3202f..3e85b34383d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
@@ -1,241 +1,241 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/tensor/converter.h"
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-class MemoryPool;
-
-namespace internal {
-namespace {
-
-// ----------------------------------------------------------------------
-// SparseTensorConverter for SparseCSRIndex
-
-class SparseCSXMatrixConverter : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::AssignIndex;
- using SparseTensorConverterMixin::IsNonZero;
-
- public:
- SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool)
- : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
-
- Status Convert() {
- RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
- tensor_.shape()));
-
- const int index_elsize = GetByteWidth(*index_value_type_);
- const int value_elsize = GetByteWidth(*tensor_.type());
-
- const int64_t ndim = tensor_.ndim();
- if (ndim > 2) {
- return Status::Invalid("Invalid tensor dimension");
- }
-
- const int major_axis = static_cast<int>(axis_);
- const int64_t n_major = tensor_.shape()[major_axis];
- const int64_t n_minor = tensor_.shape()[1 - major_axis];
- ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
-
- std::shared_ptr<Buffer> indptr_buffer;
- std::shared_ptr<Buffer> indices_buffer;
-
- ARROW_ASSIGN_OR_RAISE(auto values_buffer,
- AllocateBuffer(value_elsize * nonzero_count, pool_));
- auto* values = values_buffer->mutable_data();
-
- const auto* tensor_data = tensor_.raw_data();
-
- if (ndim <= 1) {
- return Status::NotImplemented("TODO for ndim <= 1");
- } else {
- ARROW_ASSIGN_OR_RAISE(indptr_buffer,
- AllocateBuffer(index_elsize * (n_major + 1), pool_));
- auto* indptr = indptr_buffer->mutable_data();
-
- ARROW_ASSIGN_OR_RAISE(indices_buffer,
- AllocateBuffer(index_elsize * nonzero_count, pool_));
- auto* indices = indices_buffer->mutable_data();
-
- std::vector<int64_t> coords(2);
- int64_t k = 0;
- std::fill_n(indptr, index_elsize, 0);
- indptr += index_elsize;
- for (int64_t i = 0; i < n_major; ++i) {
- for (int64_t j = 0; j < n_minor; ++j) {
- if (axis_ == SparseMatrixCompressedAxis::ROW) {
- coords = {i, j};
- } else {
- coords = {j, i};
- }
- const int64_t offset = tensor_.CalculateValueOffset(coords);
- if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize,
- IsNonZero)) {
- std::copy_n(tensor_data + offset, value_elsize, values);
- values += value_elsize;
-
- AssignIndex(indices, j, index_elsize);
- indices += index_elsize;
-
- k++;
- }
- }
- AssignIndex(indptr, k, index_elsize);
- indptr += index_elsize;
- }
- }
-
- std::vector<int64_t> indptr_shape({n_major + 1});
- std::shared_ptr<Tensor> indptr_tensor =
- std::make_shared<Tensor>(index_value_type_, indptr_buffer, indptr_shape);
-
- std::vector<int64_t> indices_shape({nonzero_count});
- std::shared_ptr<Tensor> indices_tensor =
- std::make_shared<Tensor>(index_value_type_, indices_buffer, indices_shape);
-
- if (axis_ == SparseMatrixCompressedAxis::ROW) {
- sparse_index = std::make_shared<SparseCSRIndex>(indptr_tensor, indices_tensor);
- } else {
- sparse_index = std::make_shared<SparseCSCIndex>(indptr_tensor, indices_tensor);
- }
- data = std::move(values_buffer);
-
- return Status::OK();
- }
-
- std::shared_ptr<SparseIndex> sparse_index;
- std::shared_ptr<Buffer> data;
-
- private:
- SparseMatrixCompressedAxis axis_;
- const Tensor& tensor_;
- const std::shared_ptr<DataType>& index_value_type_;
- MemoryPool* pool_;
-};
-
-} // namespace
-
-Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
- const Tensor& tensor,
- const std::shared_ptr<DataType>& index_value_type,
- MemoryPool* pool,
- std::shared_ptr<SparseIndex>* out_sparse_index,
- std::shared_ptr<Buffer>* out_data) {
- SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool);
- RETURN_NOT_OK(converter.Convert());
-
- *out_sparse_index = converter.sparse_index;
- *out_data = converter.data;
- return Status::OK();
-}
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(
- SparseMatrixCompressedAxis axis, MemoryPool* pool,
- const std::shared_ptr<Tensor>& indptr, const std::shared_ptr<Tensor>& indices,
- const int64_t non_zero_length, const std::shared_ptr<DataType>& value_type,
- const std::vector<int64_t>& shape, const int64_t tensor_size, const uint8_t* raw_data,
- const std::vector<std::string>& dim_names) {
- const auto* indptr_data = indptr->raw_data();
- const auto* indices_data = indices->raw_data();
-
- const int indptr_elsize = GetByteWidth(*indptr->type());
- const int indices_elsize = GetByteWidth(*indices->type());
-
- const auto& fw_value_type = checked_cast<const FixedWidthType&>(*value_type);
- const int value_elsize = GetByteWidth(fw_value_type);
- ARROW_ASSIGN_OR_RAISE(auto values_buffer,
- AllocateBuffer(value_elsize * tensor_size, pool));
- auto values = values_buffer->mutable_data();
- std::fill_n(values, value_elsize * tensor_size, 0);
-
- std::vector<int64_t> strides;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCSRIndex
+
+class SparseCSXMatrixConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+ SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool)
+ : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
+
+ Status Convert() {
+ RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
+ tensor_.shape()));
+
+ const int index_elsize = GetByteWidth(*index_value_type_);
+ const int value_elsize = GetByteWidth(*tensor_.type());
+
+ const int64_t ndim = tensor_.ndim();
+ if (ndim > 2) {
+ return Status::Invalid("Invalid tensor dimension");
+ }
+
+ const int major_axis = static_cast<int>(axis_);
+ const int64_t n_major = tensor_.shape()[major_axis];
+ const int64_t n_minor = tensor_.shape()[1 - major_axis];
+ ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
+
+ std::shared_ptr<Buffer> indptr_buffer;
+ std::shared_ptr<Buffer> indices_buffer;
+
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * nonzero_count, pool_));
+ auto* values = values_buffer->mutable_data();
+
+ const auto* tensor_data = tensor_.raw_data();
+
+ if (ndim <= 1) {
+ return Status::NotImplemented("TODO for ndim <= 1");
+ } else {
+ ARROW_ASSIGN_OR_RAISE(indptr_buffer,
+ AllocateBuffer(index_elsize * (n_major + 1), pool_));
+ auto* indptr = indptr_buffer->mutable_data();
+
+ ARROW_ASSIGN_OR_RAISE(indices_buffer,
+ AllocateBuffer(index_elsize * nonzero_count, pool_));
+ auto* indices = indices_buffer->mutable_data();
+
+ std::vector<int64_t> coords(2);
+ int64_t k = 0;
+ std::fill_n(indptr, index_elsize, 0);
+ indptr += index_elsize;
+ for (int64_t i = 0; i < n_major; ++i) {
+ for (int64_t j = 0; j < n_minor; ++j) {
+ if (axis_ == SparseMatrixCompressedAxis::ROW) {
+ coords = {i, j};
+ } else {
+ coords = {j, i};
+ }
+ const int64_t offset = tensor_.CalculateValueOffset(coords);
+ if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize,
+ IsNonZero)) {
+ std::copy_n(tensor_data + offset, value_elsize, values);
+ values += value_elsize;
+
+ AssignIndex(indices, j, index_elsize);
+ indices += index_elsize;
+
+ k++;
+ }
+ }
+ AssignIndex(indptr, k, index_elsize);
+ indptr += index_elsize;
+ }
+ }
+
+ std::vector<int64_t> indptr_shape({n_major + 1});
+ std::shared_ptr<Tensor> indptr_tensor =
+ std::make_shared<Tensor>(index_value_type_, indptr_buffer, indptr_shape);
+
+ std::vector<int64_t> indices_shape({nonzero_count});
+ std::shared_ptr<Tensor> indices_tensor =
+ std::make_shared<Tensor>(index_value_type_, indices_buffer, indices_shape);
+
+ if (axis_ == SparseMatrixCompressedAxis::ROW) {
+ sparse_index = std::make_shared<SparseCSRIndex>(indptr_tensor, indices_tensor);
+ } else {
+ sparse_index = std::make_shared<SparseCSCIndex>(indptr_tensor, indices_tensor);
+ }
+ data = std::move(values_buffer);
+
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseIndex> sparse_index;
+ std::shared_ptr<Buffer> data;
+
+ private:
+ SparseMatrixCompressedAxis axis_;
+ const Tensor& tensor_;
+ const std::shared_ptr<DataType>& index_value_type_;
+ MemoryPool* pool_;
+};
+
+} // namespace
+
+Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
+ const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data) {
+ SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool);
+ RETURN_NOT_OK(converter.Convert());
+
+ *out_sparse_index = converter.sparse_index;
+ *out_data = converter.data;
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(
+ SparseMatrixCompressedAxis axis, MemoryPool* pool,
+ const std::shared_ptr<Tensor>& indptr, const std::shared_ptr<Tensor>& indices,
+ const int64_t non_zero_length, const std::shared_ptr<DataType>& value_type,
+ const std::vector<int64_t>& shape, const int64_t tensor_size, const uint8_t* raw_data,
+ const std::vector<std::string>& dim_names) {
+ const auto* indptr_data = indptr->raw_data();
+ const auto* indices_data = indices->raw_data();
+
+ const int indptr_elsize = GetByteWidth(*indptr->type());
+ const int indices_elsize = GetByteWidth(*indices->type());
+
+ const auto& fw_value_type = checked_cast<const FixedWidthType&>(*value_type);
+ const int value_elsize = GetByteWidth(fw_value_type);
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * tensor_size, pool));
+ auto values = values_buffer->mutable_data();
+ std::fill_n(values, value_elsize * tensor_size, 0);
+
+ std::vector<int64_t> strides;
RETURN_NOT_OK(ComputeRowMajorStrides(fw_value_type, shape, &strides));
-
- const auto nc = shape[1];
-
- int64_t offset = 0;
- for (int64_t i = 0; i < indptr->size() - 1; ++i) {
- const auto start =
- SparseTensorConverterMixin::GetIndexValue(indptr_data, indptr_elsize);
- const auto stop = SparseTensorConverterMixin::GetIndexValue(
- indptr_data + indptr_elsize, indptr_elsize);
-
- for (int64_t j = start; j < stop; ++j) {
- const auto index = SparseTensorConverterMixin::GetIndexValue(
- indices_data + j * indices_elsize, indices_elsize);
- switch (axis) {
- case SparseMatrixCompressedAxis::ROW:
- offset = (index + i * nc) * value_elsize;
- break;
- case SparseMatrixCompressedAxis::COLUMN:
- offset = (i + index * nc) * value_elsize;
- break;
- }
-
- std::copy_n(raw_data, value_elsize, values + offset);
- raw_data += value_elsize;
- }
-
- indptr_data += indptr_elsize;
- }
-
- return std::make_shared<Tensor>(value_type, std::move(values_buffer), shape, strides,
- dim_names);
-}
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSRMatrix(
- MemoryPool* pool, const SparseCSRMatrix* sparse_tensor) {
- const auto& sparse_index =
- internal::checked_cast<const SparseCSRIndex&>(*sparse_tensor->sparse_index());
- const auto& indptr = sparse_index.indptr();
- const auto& indices = sparse_index.indices();
- const auto non_zero_length = sparse_tensor->non_zero_length();
- return MakeTensorFromSparseCSXMatrix(
- SparseMatrixCompressedAxis::ROW, pool, indptr, indices, non_zero_length,
- sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(),
- sparse_tensor->raw_data(), sparse_tensor->dim_names());
-}
-
-Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
- MemoryPool* pool, const SparseCSCMatrix* sparse_tensor) {
- const auto& sparse_index =
- internal::checked_cast<const SparseCSCIndex&>(*sparse_tensor->sparse_index());
- const auto& indptr = sparse_index.indptr();
- const auto& indices = sparse_index.indices();
- const auto non_zero_length = sparse_tensor->non_zero_length();
- return MakeTensorFromSparseCSXMatrix(
- SparseMatrixCompressedAxis::COLUMN, pool, indptr, indices, non_zero_length,
- sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(),
- sparse_tensor->raw_data(), sparse_tensor->dim_names());
-}
-
-} // namespace internal
-} // namespace arrow
+
+ const auto nc = shape[1];
+
+ int64_t offset = 0;
+ for (int64_t i = 0; i < indptr->size() - 1; ++i) {
+ const auto start =
+ SparseTensorConverterMixin::GetIndexValue(indptr_data, indptr_elsize);
+ const auto stop = SparseTensorConverterMixin::GetIndexValue(
+ indptr_data + indptr_elsize, indptr_elsize);
+
+ for (int64_t j = start; j < stop; ++j) {
+ const auto index = SparseTensorConverterMixin::GetIndexValue(
+ indices_data + j * indices_elsize, indices_elsize);
+ switch (axis) {
+ case SparseMatrixCompressedAxis::ROW:
+ offset = (index + i * nc) * value_elsize;
+ break;
+ case SparseMatrixCompressedAxis::COLUMN:
+ offset = (i + index * nc) * value_elsize;
+ break;
+ }
+
+ std::copy_n(raw_data, value_elsize, values + offset);
+ raw_data += value_elsize;
+ }
+
+ indptr_data += indptr_elsize;
+ }
+
+ return std::make_shared<Tensor>(value_type, std::move(values_buffer), shape, strides,
+ dim_names);
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSRMatrix(
+ MemoryPool* pool, const SparseCSRMatrix* sparse_tensor) {
+ const auto& sparse_index =
+ internal::checked_cast<const SparseCSRIndex&>(*sparse_tensor->sparse_index());
+ const auto& indptr = sparse_index.indptr();
+ const auto& indices = sparse_index.indices();
+ const auto non_zero_length = sparse_tensor->non_zero_length();
+ return MakeTensorFromSparseCSXMatrix(
+ SparseMatrixCompressedAxis::ROW, pool, indptr, indices, non_zero_length,
+ sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(),
+ sparse_tensor->raw_data(), sparse_tensor->dim_names());
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
+ MemoryPool* pool, const SparseCSCMatrix* sparse_tensor) {
+ const auto& sparse_index =
+ internal::checked_cast<const SparseCSCIndex&>(*sparse_tensor->sparse_index());
+ const auto& indptr = sparse_index.indptr();
+ const auto& indices = sparse_index.indices();
+ const auto non_zero_length = sparse_tensor->non_zero_length();
+ return MakeTensorFromSparseCSXMatrix(
+ SparseMatrixCompressedAxis::COLUMN, pool, indptr, indices, non_zero_length,
+ sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(),
+ sparse_tensor->raw_data(), sparse_tensor->dim_names());
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type.cc b/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
index 41914f43663..55129af4bf7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
@@ -1,193 +1,193 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/type.h"
-
-#include <algorithm>
-#include <climits>
-#include <cstddef>
-#include <limits>
-#include <ostream>
-#include <sstream> // IWYU pragma: keep
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/compare.h"
-#include "arrow/record_batch.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/hash_util.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/make_unique.h"
-#include "arrow/util/range.h"
-#include "arrow/util/vector.h"
-#include "arrow/visitor_inline.h"
-
-namespace arrow {
-
-constexpr Type::type NullType::type_id;
-constexpr Type::type ListType::type_id;
-constexpr Type::type LargeListType::type_id;
-
-constexpr Type::type MapType::type_id;
-
-constexpr Type::type FixedSizeListType::type_id;
-
-constexpr Type::type BinaryType::type_id;
-
-constexpr Type::type LargeBinaryType::type_id;
-
-constexpr Type::type StringType::type_id;
-
-constexpr Type::type LargeStringType::type_id;
-
-constexpr Type::type FixedSizeBinaryType::type_id;
-
-constexpr Type::type StructType::type_id;
-
-constexpr Type::type Decimal128Type::type_id;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/type.h"
+
+#include <algorithm>
+#include <climits>
+#include <cstddef>
+#include <limits>
+#include <ostream>
+#include <sstream> // IWYU pragma: keep
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compare.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hash_util.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/range.h"
+#include "arrow/util/vector.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+constexpr Type::type NullType::type_id;
+constexpr Type::type ListType::type_id;
+constexpr Type::type LargeListType::type_id;
+
+constexpr Type::type MapType::type_id;
+
+constexpr Type::type FixedSizeListType::type_id;
+
+constexpr Type::type BinaryType::type_id;
+
+constexpr Type::type LargeBinaryType::type_id;
+
+constexpr Type::type StringType::type_id;
+
+constexpr Type::type LargeStringType::type_id;
+
+constexpr Type::type FixedSizeBinaryType::type_id;
+
+constexpr Type::type StructType::type_id;
+
+constexpr Type::type Decimal128Type::type_id;
+
constexpr Type::type Decimal256Type::type_id;
-constexpr Type::type SparseUnionType::type_id;
-
-constexpr Type::type DenseUnionType::type_id;
-
-constexpr Type::type Date32Type::type_id;
-
-constexpr Type::type Date64Type::type_id;
-
-constexpr Type::type Time32Type::type_id;
-
-constexpr Type::type Time64Type::type_id;
-
-constexpr Type::type TimestampType::type_id;
-
-constexpr Type::type MonthIntervalType::type_id;
-
-constexpr Type::type DayTimeIntervalType::type_id;
-
-constexpr Type::type DurationType::type_id;
-
-constexpr Type::type DictionaryType::type_id;
-
-namespace internal {
-
-struct TypeIdToTypeNameVisitor {
- std::string out;
-
- template <typename ArrowType>
- Status Visit(const ArrowType*) {
- out = ArrowType::type_name();
- return Status::OK();
- }
-};
-
-std::string ToTypeName(Type::type id) {
- TypeIdToTypeNameVisitor visitor;
-
- ARROW_CHECK_OK(VisitTypeIdInline(id, &visitor));
- return std::move(visitor.out);
-}
-
-std::string ToString(Type::type id) {
- switch (id) {
-#define TO_STRING_CASE(_id) \
- case Type::_id: \
- return ARROW_STRINGIFY(_id);
-
- TO_STRING_CASE(NA)
- TO_STRING_CASE(BOOL)
- TO_STRING_CASE(INT8)
- TO_STRING_CASE(INT16)
- TO_STRING_CASE(INT32)
- TO_STRING_CASE(INT64)
- TO_STRING_CASE(UINT8)
- TO_STRING_CASE(UINT16)
- TO_STRING_CASE(UINT32)
- TO_STRING_CASE(UINT64)
- TO_STRING_CASE(HALF_FLOAT)
- TO_STRING_CASE(FLOAT)
- TO_STRING_CASE(DOUBLE)
+constexpr Type::type SparseUnionType::type_id;
+
+constexpr Type::type DenseUnionType::type_id;
+
+constexpr Type::type Date32Type::type_id;
+
+constexpr Type::type Date64Type::type_id;
+
+constexpr Type::type Time32Type::type_id;
+
+constexpr Type::type Time64Type::type_id;
+
+constexpr Type::type TimestampType::type_id;
+
+constexpr Type::type MonthIntervalType::type_id;
+
+constexpr Type::type DayTimeIntervalType::type_id;
+
+constexpr Type::type DurationType::type_id;
+
+constexpr Type::type DictionaryType::type_id;
+
+namespace internal {
+
+struct TypeIdToTypeNameVisitor {
+ std::string out;
+
+ template <typename ArrowType>
+ Status Visit(const ArrowType*) {
+ out = ArrowType::type_name();
+ return Status::OK();
+ }
+};
+
+std::string ToTypeName(Type::type id) {
+ TypeIdToTypeNameVisitor visitor;
+
+ ARROW_CHECK_OK(VisitTypeIdInline(id, &visitor));
+ return std::move(visitor.out);
+}
+
+std::string ToString(Type::type id) {
+ switch (id) {
+#define TO_STRING_CASE(_id) \
+ case Type::_id: \
+ return ARROW_STRINGIFY(_id);
+
+ TO_STRING_CASE(NA)
+ TO_STRING_CASE(BOOL)
+ TO_STRING_CASE(INT8)
+ TO_STRING_CASE(INT16)
+ TO_STRING_CASE(INT32)
+ TO_STRING_CASE(INT64)
+ TO_STRING_CASE(UINT8)
+ TO_STRING_CASE(UINT16)
+ TO_STRING_CASE(UINT32)
+ TO_STRING_CASE(UINT64)
+ TO_STRING_CASE(HALF_FLOAT)
+ TO_STRING_CASE(FLOAT)
+ TO_STRING_CASE(DOUBLE)
TO_STRING_CASE(DECIMAL128)
TO_STRING_CASE(DECIMAL256)
- TO_STRING_CASE(DATE32)
- TO_STRING_CASE(DATE64)
- TO_STRING_CASE(TIME32)
- TO_STRING_CASE(TIME64)
- TO_STRING_CASE(TIMESTAMP)
- TO_STRING_CASE(INTERVAL_DAY_TIME)
- TO_STRING_CASE(INTERVAL_MONTHS)
- TO_STRING_CASE(DURATION)
- TO_STRING_CASE(STRING)
- TO_STRING_CASE(BINARY)
- TO_STRING_CASE(LARGE_STRING)
- TO_STRING_CASE(LARGE_BINARY)
- TO_STRING_CASE(FIXED_SIZE_BINARY)
- TO_STRING_CASE(STRUCT)
- TO_STRING_CASE(LIST)
- TO_STRING_CASE(LARGE_LIST)
- TO_STRING_CASE(FIXED_SIZE_LIST)
- TO_STRING_CASE(MAP)
- TO_STRING_CASE(DENSE_UNION)
- TO_STRING_CASE(SPARSE_UNION)
- TO_STRING_CASE(DICTIONARY)
- TO_STRING_CASE(EXTENSION)
-
-#undef TO_STRING_CASE
-
- default:
- ARROW_LOG(FATAL) << "Unhandled type id: " << id;
- return "";
- }
-}
-
-std::string ToString(TimeUnit::type unit) {
- switch (unit) {
- case TimeUnit::SECOND:
- return "s";
- case TimeUnit::MILLI:
- return "ms";
- case TimeUnit::MICRO:
- return "us";
- case TimeUnit::NANO:
- return "ns";
- default:
- DCHECK(false);
- return "";
- }
-}
-
-int GetByteWidth(const DataType& type) {
- const auto& fw_type = checked_cast<const FixedWidthType&>(type);
- return fw_type.bit_width() / CHAR_BIT;
-}
-
-} // namespace internal
-
-namespace {
-
+ TO_STRING_CASE(DATE32)
+ TO_STRING_CASE(DATE64)
+ TO_STRING_CASE(TIME32)
+ TO_STRING_CASE(TIME64)
+ TO_STRING_CASE(TIMESTAMP)
+ TO_STRING_CASE(INTERVAL_DAY_TIME)
+ TO_STRING_CASE(INTERVAL_MONTHS)
+ TO_STRING_CASE(DURATION)
+ TO_STRING_CASE(STRING)
+ TO_STRING_CASE(BINARY)
+ TO_STRING_CASE(LARGE_STRING)
+ TO_STRING_CASE(LARGE_BINARY)
+ TO_STRING_CASE(FIXED_SIZE_BINARY)
+ TO_STRING_CASE(STRUCT)
+ TO_STRING_CASE(LIST)
+ TO_STRING_CASE(LARGE_LIST)
+ TO_STRING_CASE(FIXED_SIZE_LIST)
+ TO_STRING_CASE(MAP)
+ TO_STRING_CASE(DENSE_UNION)
+ TO_STRING_CASE(SPARSE_UNION)
+ TO_STRING_CASE(DICTIONARY)
+ TO_STRING_CASE(EXTENSION)
+
+#undef TO_STRING_CASE
+
+ default:
+ ARROW_LOG(FATAL) << "Unhandled type id: " << id;
+ return "";
+ }
+}
+
+std::string ToString(TimeUnit::type unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ return "s";
+ case TimeUnit::MILLI:
+ return "ms";
+ case TimeUnit::MICRO:
+ return "us";
+ case TimeUnit::NANO:
+ return "ns";
+ default:
+ DCHECK(false);
+ return "";
+ }
+}
+
+int GetByteWidth(const DataType& type) {
+ const auto& fw_type = checked_cast<const FixedWidthType&>(type);
+ return fw_type.bit_width() / CHAR_BIT;
+}
+
+} // namespace internal
+
+namespace {
+
struct PhysicalTypeVisitor {
const std::shared_ptr<DataType>& real_type;
std::shared_ptr<DataType> result;
@@ -214,563 +214,563 @@ std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& real_
namespace {
-using internal::checked_cast;
-
-// Merges `existing` and `other` if one of them is of NullType, otherwise
-// returns nullptr.
-// - if `other` if of NullType or is nullable, the unified field will be nullable.
-// - if `existing` is of NullType but other is not, the unified field will
-// have `other`'s type and will be nullable
-std::shared_ptr<Field> MaybePromoteNullTypes(const Field& existing, const Field& other) {
- if (existing.type()->id() != Type::NA && other.type()->id() != Type::NA) {
- return nullptr;
- }
- if (existing.type()->id() == Type::NA) {
- return other.WithNullable(true)->WithMetadata(existing.metadata());
- }
- // `other` must be null.
- return existing.WithNullable(true);
-}
-} // namespace
-
-Field::~Field() {}
-
-bool Field::HasMetadata() const {
- return (metadata_ != nullptr) && (metadata_->size() > 0);
-}
-
-std::shared_ptr<Field> Field::WithMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const {
- return std::make_shared<Field>(name_, type_, nullable_, metadata);
-}
-
-std::shared_ptr<Field> Field::WithMergedMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const {
- std::shared_ptr<const KeyValueMetadata> merged_metadata;
- if (metadata_) {
- merged_metadata = metadata_->Merge(*metadata);
- } else {
- merged_metadata = metadata;
- }
- return std::make_shared<Field>(name_, type_, nullable_, merged_metadata);
-}
-
-std::shared_ptr<Field> Field::RemoveMetadata() const {
- return std::make_shared<Field>(name_, type_, nullable_);
-}
-
-std::shared_ptr<Field> Field::WithType(const std::shared_ptr<DataType>& type) const {
- return std::make_shared<Field>(name_, type, nullable_, metadata_);
-}
-
-std::shared_ptr<Field> Field::WithName(const std::string& name) const {
- return std::make_shared<Field>(name, type_, nullable_, metadata_);
-}
-
-std::shared_ptr<Field> Field::WithNullable(const bool nullable) const {
- return std::make_shared<Field>(name_, type_, nullable, metadata_);
-}
-
-Result<std::shared_ptr<Field>> Field::MergeWith(const Field& other,
- MergeOptions options) const {
- if (name() != other.name()) {
- return Status::Invalid("Field ", name(), " doesn't have the same name as ",
- other.name());
- }
-
- if (Equals(other, /*check_metadata=*/false)) {
- return Copy();
- }
-
- if (options.promote_nullability) {
- if (type()->Equals(other.type())) {
- return Copy()->WithNullable(nullable() || other.nullable());
- }
- std::shared_ptr<Field> promoted = MaybePromoteNullTypes(*this, other);
- if (promoted) return promoted;
- }
-
- return Status::Invalid("Unable to merge: Field ", name(),
- " has incompatible types: ", type()->ToString(), " vs ",
- other.type()->ToString());
-}
-
-Result<std::shared_ptr<Field>> Field::MergeWith(const std::shared_ptr<Field>& other,
- MergeOptions options) const {
- DCHECK_NE(other, nullptr);
- return MergeWith(*other, options);
-}
-
-std::vector<std::shared_ptr<Field>> Field::Flatten() const {
- std::vector<std::shared_ptr<Field>> flattened;
- if (type_->id() == Type::STRUCT) {
- for (const auto& child : type_->fields()) {
- auto flattened_child = child->Copy();
- flattened.push_back(flattened_child);
- flattened_child->name_.insert(0, name() + ".");
- flattened_child->nullable_ |= nullable_;
- }
- } else {
- flattened.push_back(this->Copy());
- }
- return flattened;
-}
-
-std::shared_ptr<Field> Field::Copy() const {
- return ::arrow::field(name_, type_, nullable_, metadata_);
-}
-
-bool Field::Equals(const Field& other, bool check_metadata) const {
- if (this == &other) {
- return true;
- }
- if (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
- this->type_->Equals(*other.type_.get(), check_metadata)) {
- if (!check_metadata) {
- return true;
- } else if (this->HasMetadata() && other.HasMetadata()) {
- return metadata_->Equals(*other.metadata_);
- } else if (!this->HasMetadata() && !other.HasMetadata()) {
- return true;
- } else {
- return false;
- }
- }
- return false;
-}
-
-bool Field::Equals(const std::shared_ptr<Field>& other, bool check_metadata) const {
- return Equals(*other.get(), check_metadata);
-}
-
-bool Field::IsCompatibleWith(const Field& other) const { return MergeWith(other).ok(); }
-
-bool Field::IsCompatibleWith(const std::shared_ptr<Field>& other) const {
- DCHECK_NE(other, nullptr);
- return IsCompatibleWith(*other);
-}
-
-std::string Field::ToString(bool show_metadata) const {
- std::stringstream ss;
- ss << name_ << ": " << type_->ToString();
- if (!nullable_) {
- ss << " not null";
- }
- if (show_metadata && metadata_) {
- ss << metadata_->ToString();
- }
- return ss.str();
-}
-
-DataType::~DataType() {}
-
-bool DataType::Equals(const DataType& other, bool check_metadata) const {
- return TypeEquals(*this, other, check_metadata);
-}
-
-bool DataType::Equals(const std::shared_ptr<DataType>& other) const {
- if (!other) {
- return false;
- }
- return Equals(*other.get());
-}
-
-size_t DataType::Hash() const {
- static constexpr size_t kHashSeed = 0;
- size_t result = kHashSeed;
- internal::hash_combine(result, this->ComputeFingerprint());
- return result;
-}
-
-std::ostream& operator<<(std::ostream& os, const DataType& type) {
- os << type.ToString();
- return os;
-}
-
-FloatingPointType::Precision HalfFloatType::precision() const {
- return FloatingPointType::HALF;
-}
-
-FloatingPointType::Precision FloatType::precision() const {
- return FloatingPointType::SINGLE;
-}
-
-FloatingPointType::Precision DoubleType::precision() const {
- return FloatingPointType::DOUBLE;
-}
-
-std::string ListType::ToString() const {
- std::stringstream s;
- s << "list<" << value_field()->ToString() << ">";
- return s.str();
-}
-
-std::string LargeListType::ToString() const {
- std::stringstream s;
- s << "large_list<" << value_field()->ToString() << ">";
- return s.str();
-}
-
-MapType::MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
- bool keys_sorted)
- : MapType(::arrow::field("key", std::move(key_type), false),
- ::arrow::field("value", std::move(item_type)), keys_sorted) {}
-
-MapType::MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
- bool keys_sorted)
- : MapType(::arrow::field("key", std::move(key_type), false), std::move(item_field),
- keys_sorted) {}
-
-MapType::MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
- bool keys_sorted)
- : MapType(
- ::arrow::field("entries",
- struct_({std::move(key_field), std::move(item_field)}), false),
- keys_sorted) {}
-
-MapType::MapType(std::shared_ptr<Field> value_field, bool keys_sorted)
- : ListType(std::move(value_field)), keys_sorted_(keys_sorted) {
- id_ = type_id;
-}
-
-Result<std::shared_ptr<DataType>> MapType::Make(std::shared_ptr<Field> value_field,
- bool keys_sorted) {
- const auto& value_type = *value_field->type();
- if (value_field->nullable() || value_type.id() != Type::STRUCT) {
- return Status::TypeError("Map entry field should be non-nullable struct");
- }
- const auto& struct_type = checked_cast<const StructType&>(value_type);
- if (struct_type.num_fields() != 2) {
- return Status::TypeError("Map entry field should have two children (got ",
- struct_type.num_fields(), ")");
- }
- if (struct_type.field(0)->nullable()) {
- return Status::TypeError("Map key field should be non-nullable");
- }
- return std::make_shared<MapType>(std::move(value_field), keys_sorted);
-}
-
-std::string MapType::ToString() const {
- std::stringstream s;
-
- const auto print_field_name = [](std::ostream& os, const Field& field,
- const char* std_name) {
- if (field.name() != std_name) {
- os << " ('" << field.name() << "')";
- }
- };
- const auto print_field = [&](std::ostream& os, const Field& field,
- const char* std_name) {
- os << field.type()->ToString();
- print_field_name(os, field, std_name);
- };
-
- s << "map<";
- print_field(s, *key_field(), "key");
- s << ", ";
- print_field(s, *item_field(), "value");
- if (keys_sorted_) {
- s << ", keys_sorted";
- }
- print_field_name(s, *value_field(), "entries");
- s << ">";
- return s.str();
-}
-
-std::string FixedSizeListType::ToString() const {
- std::stringstream s;
- s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_ << "]";
- return s.str();
-}
-
-std::string BinaryType::ToString() const { return "binary"; }
-
-std::string LargeBinaryType::ToString() const { return "large_binary"; }
-
-std::string StringType::ToString() const { return "string"; }
-
-std::string LargeStringType::ToString() const { return "large_string"; }
-
-int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
-
-Result<std::shared_ptr<DataType>> FixedSizeBinaryType::Make(int32_t byte_width) {
- if (byte_width < 0) {
- return Status::Invalid("Negative FixedSizeBinaryType byte width");
- }
- if (byte_width > std::numeric_limits<int>::max() / CHAR_BIT) {
- // bit_width() would overflow
- return Status::Invalid("byte width of FixedSizeBinaryType too large");
- }
- return std::make_shared<FixedSizeBinaryType>(byte_width);
-}
-
-std::string FixedSizeBinaryType::ToString() const {
- std::stringstream ss;
- ss << "fixed_size_binary[" << byte_width_ << "]";
- return ss.str();
-}
-
-// ----------------------------------------------------------------------
-// Date types
-
-DateType::DateType(Type::type type_id) : TemporalType(type_id) {}
-
-Date32Type::Date32Type() : DateType(Type::DATE32) {}
-
-Date64Type::Date64Type() : DateType(Type::DATE64) {}
-
-std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
-
-std::string Date32Type::ToString() const { return std::string("date32[day]"); }
-
-// ----------------------------------------------------------------------
-// Time types
-
-TimeType::TimeType(Type::type type_id, TimeUnit::type unit)
- : TemporalType(type_id), unit_(unit) {}
-
-Time32Type::Time32Type(TimeUnit::type unit) : TimeType(Type::TIME32, unit) {
- ARROW_CHECK(unit == TimeUnit::SECOND || unit == TimeUnit::MILLI)
- << "Must be seconds or milliseconds";
-}
-
-std::string Time32Type::ToString() const {
- std::stringstream ss;
- ss << "time32[" << this->unit_ << "]";
- return ss.str();
-}
-
-Time64Type::Time64Type(TimeUnit::type unit) : TimeType(Type::TIME64, unit) {
- ARROW_CHECK(unit == TimeUnit::MICRO || unit == TimeUnit::NANO)
- << "Must be microseconds or nanoseconds";
-}
-
-std::string Time64Type::ToString() const {
- std::stringstream ss;
- ss << "time64[" << this->unit_ << "]";
- return ss.str();
-}
-
-std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
- switch (unit) {
- case TimeUnit::SECOND:
- os << "s";
- break;
- case TimeUnit::MILLI:
- os << "ms";
- break;
- case TimeUnit::MICRO:
- os << "us";
- break;
- case TimeUnit::NANO:
- os << "ns";
- break;
- }
- return os;
-}
-
-// ----------------------------------------------------------------------
-// Timestamp types
-
-std::string TimestampType::ToString() const {
- std::stringstream ss;
- ss << "timestamp[" << this->unit_;
- if (this->timezone_.size() > 0) {
- ss << ", tz=" << this->timezone_;
- }
- ss << "]";
- return ss.str();
-}
-
-// Duration types
-std::string DurationType::ToString() const {
- std::stringstream ss;
- ss << "duration[" << this->unit_ << "]";
- return ss.str();
-}
-
-// ----------------------------------------------------------------------
-// Union type
-
-constexpr int8_t UnionType::kMaxTypeCode;
-constexpr int UnionType::kInvalidChildId;
-
-UnionMode::type UnionType::mode() const {
- return id_ == Type::SPARSE_UNION ? UnionMode::SPARSE : UnionMode::DENSE;
-}
-
-UnionType::UnionType(std::vector<std::shared_ptr<Field>> fields,
- std::vector<int8_t> type_codes, Type::type id)
- : NestedType(id),
- type_codes_(std::move(type_codes)),
- child_ids_(kMaxTypeCode + 1, kInvalidChildId) {
- children_ = std::move(fields);
- DCHECK_OK(ValidateParameters(children_, type_codes_, mode()));
- for (int child_id = 0; child_id < static_cast<int>(type_codes_.size()); ++child_id) {
- const auto type_code = type_codes_[child_id];
- child_ids_[type_code] = child_id;
- }
-}
-
-Status UnionType::ValidateParameters(const std::vector<std::shared_ptr<Field>>& fields,
- const std::vector<int8_t>& type_codes,
- UnionMode::type mode) {
- if (fields.size() != type_codes.size()) {
- return Status::Invalid("Union should get the same number of fields as type codes");
- }
- for (const auto type_code : type_codes) {
- if (type_code < 0 || type_code > kMaxTypeCode) {
- return Status::Invalid("Union type code out of bounds");
- }
- }
- return Status::OK();
-}
-
-DataTypeLayout UnionType::layout() const {
- if (mode() == UnionMode::SPARSE) {
- return DataTypeLayout(
- {DataTypeLayout::AlwaysNull(), DataTypeLayout::FixedWidth(sizeof(uint8_t))});
- } else {
- return DataTypeLayout({DataTypeLayout::AlwaysNull(),
- DataTypeLayout::FixedWidth(sizeof(uint8_t)),
- DataTypeLayout::FixedWidth(sizeof(int32_t))});
- }
-}
-
-uint8_t UnionType::max_type_code() const {
- return type_codes_.size() == 0
- ? 0
- : *std::max_element(type_codes_.begin(), type_codes_.end());
-}
-
-std::string UnionType::ToString() const {
- std::stringstream s;
-
- s << name() << "<";
-
- for (size_t i = 0; i < children_.size(); ++i) {
- if (i) {
- s << ", ";
- }
- s << children_[i]->ToString() << "=" << static_cast<int>(type_codes_[i]);
- }
- s << ">";
- return s.str();
-}
-
-SparseUnionType::SparseUnionType(std::vector<std::shared_ptr<Field>> fields,
- std::vector<int8_t> type_codes)
- : UnionType(fields, type_codes, Type::SPARSE_UNION) {}
-
-Result<std::shared_ptr<DataType>> SparseUnionType::Make(
- std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes) {
- RETURN_NOT_OK(ValidateParameters(fields, type_codes, UnionMode::SPARSE));
- return std::make_shared<SparseUnionType>(fields, type_codes);
-}
-
-DenseUnionType::DenseUnionType(std::vector<std::shared_ptr<Field>> fields,
- std::vector<int8_t> type_codes)
- : UnionType(fields, type_codes, Type::DENSE_UNION) {}
-
-Result<std::shared_ptr<DataType>> DenseUnionType::Make(
- std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes) {
- RETURN_NOT_OK(ValidateParameters(fields, type_codes, UnionMode::DENSE));
- return std::make_shared<DenseUnionType>(fields, type_codes);
-}
-
-// ----------------------------------------------------------------------
-// Struct type
-
-namespace {
-
-std::unordered_multimap<std::string, int> CreateNameToIndexMap(
- const std::vector<std::shared_ptr<Field>>& fields) {
- std::unordered_multimap<std::string, int> name_to_index;
- for (size_t i = 0; i < fields.size(); ++i) {
- name_to_index.emplace(fields[i]->name(), static_cast<int>(i));
- }
- return name_to_index;
-}
-
-template <int NotFoundValue = -1, int DuplicateFoundValue = -1>
-int LookupNameIndex(const std::unordered_multimap<std::string, int>& name_to_index,
- const std::string& name) {
- auto p = name_to_index.equal_range(name);
- auto it = p.first;
- if (it == p.second) {
- // Not found
- return NotFoundValue;
- }
- auto index = it->second;
- if (++it != p.second) {
- // Duplicate field name
- return DuplicateFoundValue;
- }
- return index;
-}
-
-} // namespace
-
-class StructType::Impl {
- public:
- explicit Impl(const std::vector<std::shared_ptr<Field>>& fields)
- : name_to_index_(CreateNameToIndexMap(fields)) {}
-
- const std::unordered_multimap<std::string, int> name_to_index_;
-};
-
-StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
- : NestedType(Type::STRUCT), impl_(new Impl(fields)) {
- children_ = fields;
-}
-
-StructType::~StructType() {}
-
-std::string StructType::ToString() const {
- std::stringstream s;
- s << "struct<";
- for (int i = 0; i < this->num_fields(); ++i) {
- if (i > 0) {
- s << ", ";
- }
- std::shared_ptr<Field> field = this->field(i);
- s << field->ToString();
- }
- s << ">";
- return s.str();
-}
-
-std::shared_ptr<Field> StructType::GetFieldByName(const std::string& name) const {
- int i = GetFieldIndex(name);
- return i == -1 ? nullptr : children_[i];
-}
-
-int StructType::GetFieldIndex(const std::string& name) const {
- return LookupNameIndex(impl_->name_to_index_, name);
-}
-
-std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
- std::vector<int> result;
- auto p = impl_->name_to_index_.equal_range(name);
- for (auto it = p.first; it != p.second; ++it) {
- result.push_back(it->second);
- }
- if (result.size() > 1) {
- std::sort(result.begin(), result.end());
- }
- return result;
-}
-
-std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
- const std::string& name) const {
- std::vector<std::shared_ptr<Field>> result;
- auto p = impl_->name_to_index_.equal_range(name);
- for (auto it = p.first; it != p.second; ++it) {
- result.push_back(children_[it->second]);
- }
- return result;
-}
-
+using internal::checked_cast;
+
+// Merges `existing` and `other` if one of them is of NullType, otherwise
+// returns nullptr.
+// - if `other` if of NullType or is nullable, the unified field will be nullable.
+// - if `existing` is of NullType but other is not, the unified field will
+// have `other`'s type and will be nullable
+std::shared_ptr<Field> MaybePromoteNullTypes(const Field& existing, const Field& other) {
+ if (existing.type()->id() != Type::NA && other.type()->id() != Type::NA) {
+ return nullptr;
+ }
+ if (existing.type()->id() == Type::NA) {
+ return other.WithNullable(true)->WithMetadata(existing.metadata());
+ }
+ // `other` must be null.
+ return existing.WithNullable(true);
+}
+} // namespace
+
+Field::~Field() {}
+
+bool Field::HasMetadata() const {
+ return (metadata_ != nullptr) && (metadata_->size() > 0);
+}
+
+std::shared_ptr<Field> Field::WithMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const {
+ return std::make_shared<Field>(name_, type_, nullable_, metadata);
+}
+
+std::shared_ptr<Field> Field::WithMergedMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const {
+ std::shared_ptr<const KeyValueMetadata> merged_metadata;
+ if (metadata_) {
+ merged_metadata = metadata_->Merge(*metadata);
+ } else {
+ merged_metadata = metadata;
+ }
+ return std::make_shared<Field>(name_, type_, nullable_, merged_metadata);
+}
+
+std::shared_ptr<Field> Field::RemoveMetadata() const {
+ return std::make_shared<Field>(name_, type_, nullable_);
+}
+
+std::shared_ptr<Field> Field::WithType(const std::shared_ptr<DataType>& type) const {
+ return std::make_shared<Field>(name_, type, nullable_, metadata_);
+}
+
+std::shared_ptr<Field> Field::WithName(const std::string& name) const {
+ return std::make_shared<Field>(name, type_, nullable_, metadata_);
+}
+
+std::shared_ptr<Field> Field::WithNullable(const bool nullable) const {
+ return std::make_shared<Field>(name_, type_, nullable, metadata_);
+}
+
+Result<std::shared_ptr<Field>> Field::MergeWith(const Field& other,
+ MergeOptions options) const {
+ if (name() != other.name()) {
+ return Status::Invalid("Field ", name(), " doesn't have the same name as ",
+ other.name());
+ }
+
+ if (Equals(other, /*check_metadata=*/false)) {
+ return Copy();
+ }
+
+ if (options.promote_nullability) {
+ if (type()->Equals(other.type())) {
+ return Copy()->WithNullable(nullable() || other.nullable());
+ }
+ std::shared_ptr<Field> promoted = MaybePromoteNullTypes(*this, other);
+ if (promoted) return promoted;
+ }
+
+ return Status::Invalid("Unable to merge: Field ", name(),
+ " has incompatible types: ", type()->ToString(), " vs ",
+ other.type()->ToString());
+}
+
+Result<std::shared_ptr<Field>> Field::MergeWith(const std::shared_ptr<Field>& other,
+ MergeOptions options) const {
+ DCHECK_NE(other, nullptr);
+ return MergeWith(*other, options);
+}
+
+std::vector<std::shared_ptr<Field>> Field::Flatten() const {
+ std::vector<std::shared_ptr<Field>> flattened;
+ if (type_->id() == Type::STRUCT) {
+ for (const auto& child : type_->fields()) {
+ auto flattened_child = child->Copy();
+ flattened.push_back(flattened_child);
+ flattened_child->name_.insert(0, name() + ".");
+ flattened_child->nullable_ |= nullable_;
+ }
+ } else {
+ flattened.push_back(this->Copy());
+ }
+ return flattened;
+}
+
+std::shared_ptr<Field> Field::Copy() const {
+ return ::arrow::field(name_, type_, nullable_, metadata_);
+}
+
+bool Field::Equals(const Field& other, bool check_metadata) const {
+ if (this == &other) {
+ return true;
+ }
+ if (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
+ this->type_->Equals(*other.type_.get(), check_metadata)) {
+ if (!check_metadata) {
+ return true;
+ } else if (this->HasMetadata() && other.HasMetadata()) {
+ return metadata_->Equals(*other.metadata_);
+ } else if (!this->HasMetadata() && !other.HasMetadata()) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ return false;
+}
+
+bool Field::Equals(const std::shared_ptr<Field>& other, bool check_metadata) const {
+ return Equals(*other.get(), check_metadata);
+}
+
+bool Field::IsCompatibleWith(const Field& other) const { return MergeWith(other).ok(); }
+
+bool Field::IsCompatibleWith(const std::shared_ptr<Field>& other) const {
+ DCHECK_NE(other, nullptr);
+ return IsCompatibleWith(*other);
+}
+
+std::string Field::ToString(bool show_metadata) const {
+ std::stringstream ss;
+ ss << name_ << ": " << type_->ToString();
+ if (!nullable_) {
+ ss << " not null";
+ }
+ if (show_metadata && metadata_) {
+ ss << metadata_->ToString();
+ }
+ return ss.str();
+}
+
+DataType::~DataType() {}
+
+bool DataType::Equals(const DataType& other, bool check_metadata) const {
+ return TypeEquals(*this, other, check_metadata);
+}
+
+bool DataType::Equals(const std::shared_ptr<DataType>& other) const {
+ if (!other) {
+ return false;
+ }
+ return Equals(*other.get());
+}
+
+size_t DataType::Hash() const {
+ static constexpr size_t kHashSeed = 0;
+ size_t result = kHashSeed;
+ internal::hash_combine(result, this->ComputeFingerprint());
+ return result;
+}
+
+std::ostream& operator<<(std::ostream& os, const DataType& type) {
+ os << type.ToString();
+ return os;
+}
+
+FloatingPointType::Precision HalfFloatType::precision() const {
+ return FloatingPointType::HALF;
+}
+
+FloatingPointType::Precision FloatType::precision() const {
+ return FloatingPointType::SINGLE;
+}
+
+FloatingPointType::Precision DoubleType::precision() const {
+ return FloatingPointType::DOUBLE;
+}
+
+std::string ListType::ToString() const {
+ std::stringstream s;
+ s << "list<" << value_field()->ToString() << ">";
+ return s.str();
+}
+
+std::string LargeListType::ToString() const {
+ std::stringstream s;
+ s << "large_list<" << value_field()->ToString() << ">";
+ return s.str();
+}
+
+MapType::MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
+ bool keys_sorted)
+ : MapType(::arrow::field("key", std::move(key_type), false),
+ ::arrow::field("value", std::move(item_type)), keys_sorted) {}
+
+MapType::MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
+ bool keys_sorted)
+ : MapType(::arrow::field("key", std::move(key_type), false), std::move(item_field),
+ keys_sorted) {}
+
+MapType::MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
+ bool keys_sorted)
+ : MapType(
+ ::arrow::field("entries",
+ struct_({std::move(key_field), std::move(item_field)}), false),
+ keys_sorted) {}
+
+MapType::MapType(std::shared_ptr<Field> value_field, bool keys_sorted)
+ : ListType(std::move(value_field)), keys_sorted_(keys_sorted) {
+ id_ = type_id;
+}
+
+Result<std::shared_ptr<DataType>> MapType::Make(std::shared_ptr<Field> value_field,
+ bool keys_sorted) {
+ const auto& value_type = *value_field->type();
+ if (value_field->nullable() || value_type.id() != Type::STRUCT) {
+ return Status::TypeError("Map entry field should be non-nullable struct");
+ }
+ const auto& struct_type = checked_cast<const StructType&>(value_type);
+ if (struct_type.num_fields() != 2) {
+ return Status::TypeError("Map entry field should have two children (got ",
+ struct_type.num_fields(), ")");
+ }
+ if (struct_type.field(0)->nullable()) {
+ return Status::TypeError("Map key field should be non-nullable");
+ }
+ return std::make_shared<MapType>(std::move(value_field), keys_sorted);
+}
+
+std::string MapType::ToString() const {
+ std::stringstream s;
+
+ const auto print_field_name = [](std::ostream& os, const Field& field,
+ const char* std_name) {
+ if (field.name() != std_name) {
+ os << " ('" << field.name() << "')";
+ }
+ };
+ const auto print_field = [&](std::ostream& os, const Field& field,
+ const char* std_name) {
+ os << field.type()->ToString();
+ print_field_name(os, field, std_name);
+ };
+
+ s << "map<";
+ print_field(s, *key_field(), "key");
+ s << ", ";
+ print_field(s, *item_field(), "value");
+ if (keys_sorted_) {
+ s << ", keys_sorted";
+ }
+ print_field_name(s, *value_field(), "entries");
+ s << ">";
+ return s.str();
+}
+
+std::string FixedSizeListType::ToString() const {
+ std::stringstream s;
+ s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_ << "]";
+ return s.str();
+}
+
+std::string BinaryType::ToString() const { return "binary"; }
+
+std::string LargeBinaryType::ToString() const { return "large_binary"; }
+
+std::string StringType::ToString() const { return "string"; }
+
+std::string LargeStringType::ToString() const { return "large_string"; }
+
+int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
+
+Result<std::shared_ptr<DataType>> FixedSizeBinaryType::Make(int32_t byte_width) {
+ if (byte_width < 0) {
+ return Status::Invalid("Negative FixedSizeBinaryType byte width");
+ }
+ if (byte_width > std::numeric_limits<int>::max() / CHAR_BIT) {
+ // bit_width() would overflow
+ return Status::Invalid("byte width of FixedSizeBinaryType too large");
+ }
+ return std::make_shared<FixedSizeBinaryType>(byte_width);
+}
+
+std::string FixedSizeBinaryType::ToString() const {
+ std::stringstream ss;
+ ss << "fixed_size_binary[" << byte_width_ << "]";
+ return ss.str();
+}
+
+// ----------------------------------------------------------------------
+// Date types
+
+DateType::DateType(Type::type type_id) : TemporalType(type_id) {}
+
+Date32Type::Date32Type() : DateType(Type::DATE32) {}
+
+Date64Type::Date64Type() : DateType(Type::DATE64) {}
+
+std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
+
+std::string Date32Type::ToString() const { return std::string("date32[day]"); }
+
+// ----------------------------------------------------------------------
+// Time types
+
+TimeType::TimeType(Type::type type_id, TimeUnit::type unit)
+ : TemporalType(type_id), unit_(unit) {}
+
+Time32Type::Time32Type(TimeUnit::type unit) : TimeType(Type::TIME32, unit) {
+ ARROW_CHECK(unit == TimeUnit::SECOND || unit == TimeUnit::MILLI)
+ << "Must be seconds or milliseconds";
+}
+
+std::string Time32Type::ToString() const {
+ std::stringstream ss;
+ ss << "time32[" << this->unit_ << "]";
+ return ss.str();
+}
+
+Time64Type::Time64Type(TimeUnit::type unit) : TimeType(Type::TIME64, unit) {
+ ARROW_CHECK(unit == TimeUnit::MICRO || unit == TimeUnit::NANO)
+ << "Must be microseconds or nanoseconds";
+}
+
+std::string Time64Type::ToString() const {
+ std::stringstream ss;
+ ss << "time64[" << this->unit_ << "]";
+ return ss.str();
+}
+
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ os << "s";
+ break;
+ case TimeUnit::MILLI:
+ os << "ms";
+ break;
+ case TimeUnit::MICRO:
+ os << "us";
+ break;
+ case TimeUnit::NANO:
+ os << "ns";
+ break;
+ }
+ return os;
+}
+
+// ----------------------------------------------------------------------
+// Timestamp types
+
+std::string TimestampType::ToString() const {
+ std::stringstream ss;
+ ss << "timestamp[" << this->unit_;
+ if (this->timezone_.size() > 0) {
+ ss << ", tz=" << this->timezone_;
+ }
+ ss << "]";
+ return ss.str();
+}
+
+// Duration types
+std::string DurationType::ToString() const {
+ std::stringstream ss;
+ ss << "duration[" << this->unit_ << "]";
+ return ss.str();
+}
+
+// ----------------------------------------------------------------------
+// Union type
+
+constexpr int8_t UnionType::kMaxTypeCode;
+constexpr int UnionType::kInvalidChildId;
+
+UnionMode::type UnionType::mode() const {
+ return id_ == Type::SPARSE_UNION ? UnionMode::SPARSE : UnionMode::DENSE;
+}
+
+UnionType::UnionType(std::vector<std::shared_ptr<Field>> fields,
+ std::vector<int8_t> type_codes, Type::type id)
+ : NestedType(id),
+ type_codes_(std::move(type_codes)),
+ child_ids_(kMaxTypeCode + 1, kInvalidChildId) {
+ children_ = std::move(fields);
+ DCHECK_OK(ValidateParameters(children_, type_codes_, mode()));
+ for (int child_id = 0; child_id < static_cast<int>(type_codes_.size()); ++child_id) {
+ const auto type_code = type_codes_[child_id];
+ child_ids_[type_code] = child_id;
+ }
+}
+
+Status UnionType::ValidateParameters(const std::vector<std::shared_ptr<Field>>& fields,
+ const std::vector<int8_t>& type_codes,
+ UnionMode::type mode) {
+ if (fields.size() != type_codes.size()) {
+ return Status::Invalid("Union should get the same number of fields as type codes");
+ }
+ for (const auto type_code : type_codes) {
+ if (type_code < 0 || type_code > kMaxTypeCode) {
+ return Status::Invalid("Union type code out of bounds");
+ }
+ }
+ return Status::OK();
+}
+
+DataTypeLayout UnionType::layout() const {
+ if (mode() == UnionMode::SPARSE) {
+ return DataTypeLayout(
+ {DataTypeLayout::AlwaysNull(), DataTypeLayout::FixedWidth(sizeof(uint8_t))});
+ } else {
+ return DataTypeLayout({DataTypeLayout::AlwaysNull(),
+ DataTypeLayout::FixedWidth(sizeof(uint8_t)),
+ DataTypeLayout::FixedWidth(sizeof(int32_t))});
+ }
+}
+
+uint8_t UnionType::max_type_code() const {
+ return type_codes_.size() == 0
+ ? 0
+ : *std::max_element(type_codes_.begin(), type_codes_.end());
+}
+
+std::string UnionType::ToString() const {
+ std::stringstream s;
+
+ s << name() << "<";
+
+ for (size_t i = 0; i < children_.size(); ++i) {
+ if (i) {
+ s << ", ";
+ }
+ s << children_[i]->ToString() << "=" << static_cast<int>(type_codes_[i]);
+ }
+ s << ">";
+ return s.str();
+}
+
+SparseUnionType::SparseUnionType(std::vector<std::shared_ptr<Field>> fields,
+ std::vector<int8_t> type_codes)
+ : UnionType(fields, type_codes, Type::SPARSE_UNION) {}
+
+Result<std::shared_ptr<DataType>> SparseUnionType::Make(
+ std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes) {
+ RETURN_NOT_OK(ValidateParameters(fields, type_codes, UnionMode::SPARSE));
+ return std::make_shared<SparseUnionType>(fields, type_codes);
+}
+
+DenseUnionType::DenseUnionType(std::vector<std::shared_ptr<Field>> fields,
+ std::vector<int8_t> type_codes)
+ : UnionType(fields, type_codes, Type::DENSE_UNION) {}
+
+Result<std::shared_ptr<DataType>> DenseUnionType::Make(
+ std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes) {
+ RETURN_NOT_OK(ValidateParameters(fields, type_codes, UnionMode::DENSE));
+ return std::make_shared<DenseUnionType>(fields, type_codes);
+}
+
+// ----------------------------------------------------------------------
+// Struct type
+
+namespace {
+
+std::unordered_multimap<std::string, int> CreateNameToIndexMap(
+ const std::vector<std::shared_ptr<Field>>& fields) {
+ std::unordered_multimap<std::string, int> name_to_index;
+ for (size_t i = 0; i < fields.size(); ++i) {
+ name_to_index.emplace(fields[i]->name(), static_cast<int>(i));
+ }
+ return name_to_index;
+}
+
+template <int NotFoundValue = -1, int DuplicateFoundValue = -1>
+int LookupNameIndex(const std::unordered_multimap<std::string, int>& name_to_index,
+ const std::string& name) {
+ auto p = name_to_index.equal_range(name);
+ auto it = p.first;
+ if (it == p.second) {
+ // Not found
+ return NotFoundValue;
+ }
+ auto index = it->second;
+ if (++it != p.second) {
+ // Duplicate field name
+ return DuplicateFoundValue;
+ }
+ return index;
+}
+
+} // namespace
+
+class StructType::Impl {
+ public:
+ explicit Impl(const std::vector<std::shared_ptr<Field>>& fields)
+ : name_to_index_(CreateNameToIndexMap(fields)) {}
+
+ const std::unordered_multimap<std::string, int> name_to_index_;
+};
+
+StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
+ : NestedType(Type::STRUCT), impl_(new Impl(fields)) {
+ children_ = fields;
+}
+
+StructType::~StructType() {}
+
+std::string StructType::ToString() const {
+ std::stringstream s;
+ s << "struct<";
+ for (int i = 0; i < this->num_fields(); ++i) {
+ if (i > 0) {
+ s << ", ";
+ }
+ std::shared_ptr<Field> field = this->field(i);
+ s << field->ToString();
+ }
+ s << ">";
+ return s.str();
+}
+
+std::shared_ptr<Field> StructType::GetFieldByName(const std::string& name) const {
+ int i = GetFieldIndex(name);
+ return i == -1 ? nullptr : children_[i];
+}
+
+int StructType::GetFieldIndex(const std::string& name) const {
+ return LookupNameIndex(impl_->name_to_index_, name);
+}
+
+std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
+ std::vector<int> result;
+ auto p = impl_->name_to_index_.equal_range(name);
+ for (auto it = p.first; it != p.second; ++it) {
+ result.push_back(it->second);
+ }
+ if (result.size() > 1) {
+ std::sort(result.begin(), result.end());
+ }
+ return result;
+}
+
+std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
+ const std::string& name) const {
+ std::vector<std::shared_ptr<Field>> result;
+ auto p = impl_->name_to_index_.equal_range(name);
+ for (auto it = p.first; it != p.second; ++it) {
+ result.push_back(children_[it->second]);
+ }
+ return result;
+}
+
Result<std::shared_ptr<DataType>> DecimalType::Make(Type::type type_id, int32_t precision,
int32_t scale) {
if (type_id == Type::DECIMAL128) {
@@ -804,23 +804,23 @@ int32_t DecimalType::DecimalSize(int32_t precision) {
return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) + 1));
}
-// ----------------------------------------------------------------------
-// Decimal128 type
-
-Decimal128Type::Decimal128Type(int32_t precision, int32_t scale)
+// ----------------------------------------------------------------------
+// Decimal128 type
+
+Decimal128Type::Decimal128Type(int32_t precision, int32_t scale)
: DecimalType(type_id, 16, precision, scale) {
- ARROW_CHECK_GE(precision, kMinPrecision);
- ARROW_CHECK_LE(precision, kMaxPrecision);
-}
-
-Result<std::shared_ptr<DataType>> Decimal128Type::Make(int32_t precision, int32_t scale) {
- if (precision < kMinPrecision || precision > kMaxPrecision) {
- return Status::Invalid("Decimal precision out of range: ", precision);
- }
- return std::make_shared<Decimal128Type>(precision, scale);
-}
-
-// ----------------------------------------------------------------------
+ ARROW_CHECK_GE(precision, kMinPrecision);
+ ARROW_CHECK_LE(precision, kMaxPrecision);
+}
+
+Result<std::shared_ptr<DataType>> Decimal128Type::Make(int32_t precision, int32_t scale) {
+ if (precision < kMinPrecision || precision > kMaxPrecision) {
+ return Status::Invalid("Decimal precision out of range: ", precision);
+ }
+ return std::make_shared<Decimal128Type>(precision, scale);
+}
+
+// ----------------------------------------------------------------------
// Decimal256 type
Decimal256Type::Decimal256Type(int32_t precision, int32_t scale)
@@ -837,209 +837,209 @@ Result<std::shared_ptr<DataType>> Decimal256Type::Make(int32_t precision, int32_
}
// ----------------------------------------------------------------------
-// Dictionary-encoded type
-
-Status DictionaryType::ValidateParameters(const DataType& index_type,
- const DataType& value_type) {
- if (!is_integer(index_type.id())) {
- return Status::TypeError("Dictionary index type should be integer, got ",
- index_type.ToString());
- }
- return Status::OK();
-}
-
-int DictionaryType::bit_width() const {
- return checked_cast<const FixedWidthType&>(*index_type_).bit_width();
-}
-
-Result<std::shared_ptr<DataType>> DictionaryType::Make(
- const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& value_type, bool ordered) {
- RETURN_NOT_OK(ValidateParameters(*index_type, *value_type));
- return std::make_shared<DictionaryType>(index_type, value_type, ordered);
-}
-
-DictionaryType::DictionaryType(const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& value_type, bool ordered)
- : FixedWidthType(Type::DICTIONARY),
- index_type_(index_type),
- value_type_(value_type),
- ordered_(ordered) {
- ARROW_CHECK_OK(ValidateParameters(*index_type_, *value_type_));
-}
-
-DataTypeLayout DictionaryType::layout() const {
- auto layout = index_type_->layout();
- layout.has_dictionary = true;
- return layout;
-}
-
-std::string DictionaryType::ToString() const {
- std::stringstream ss;
- ss << this->name() << "<values=" << value_type_->ToString()
- << ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ << ">";
- return ss.str();
-}
-
-// ----------------------------------------------------------------------
-// Null type
-
-std::string NullType::ToString() const { return name(); }
-
-// ----------------------------------------------------------------------
-// FieldRef
-
-size_t FieldPath::hash() const {
- return internal::ComputeStringHash<0>(indices().data(), indices().size() * sizeof(int));
-}
-
-std::string FieldPath::ToString() const {
+// Dictionary-encoded type
+
+Status DictionaryType::ValidateParameters(const DataType& index_type,
+ const DataType& value_type) {
+ if (!is_integer(index_type.id())) {
+ return Status::TypeError("Dictionary index type should be integer, got ",
+ index_type.ToString());
+ }
+ return Status::OK();
+}
+
+int DictionaryType::bit_width() const {
+ return checked_cast<const FixedWidthType&>(*index_type_).bit_width();
+}
+
+Result<std::shared_ptr<DataType>> DictionaryType::Make(
+ const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& value_type, bool ordered) {
+ RETURN_NOT_OK(ValidateParameters(*index_type, *value_type));
+ return std::make_shared<DictionaryType>(index_type, value_type, ordered);
+}
+
+DictionaryType::DictionaryType(const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& value_type, bool ordered)
+ : FixedWidthType(Type::DICTIONARY),
+ index_type_(index_type),
+ value_type_(value_type),
+ ordered_(ordered) {
+ ARROW_CHECK_OK(ValidateParameters(*index_type_, *value_type_));
+}
+
+DataTypeLayout DictionaryType::layout() const {
+ auto layout = index_type_->layout();
+ layout.has_dictionary = true;
+ return layout;
+}
+
+std::string DictionaryType::ToString() const {
+ std::stringstream ss;
+ ss << this->name() << "<values=" << value_type_->ToString()
+ << ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ << ">";
+ return ss.str();
+}
+
+// ----------------------------------------------------------------------
+// Null type
+
+std::string NullType::ToString() const { return name(); }
+
+// ----------------------------------------------------------------------
+// FieldRef
+
+size_t FieldPath::hash() const {
+ return internal::ComputeStringHash<0>(indices().data(), indices().size() * sizeof(int));
+}
+
+std::string FieldPath::ToString() const {
if (this->indices().empty()) {
return "FieldPath(empty)";
}
- std::string repr = "FieldPath(";
- for (auto index : this->indices()) {
- repr += std::to_string(index) + " ";
- }
+ std::string repr = "FieldPath(";
+ for (auto index : this->indices()) {
+ repr += std::to_string(index) + " ";
+ }
repr.back() = ')';
- return repr;
-}
-
-struct FieldPathGetImpl {
- static const DataType& GetType(const ArrayData& data) { return *data.type; }
-
- static void Summarize(const FieldVector& fields, std::stringstream* ss) {
- *ss << "{ ";
- for (const auto& field : fields) {
- *ss << field->ToString() << ", ";
- }
- *ss << "}";
- }
-
- template <typename T>
- static void Summarize(const std::vector<T>& columns, std::stringstream* ss) {
- *ss << "{ ";
- for (const auto& column : columns) {
- *ss << GetType(*column) << ", ";
- }
- *ss << "}";
- }
-
- template <typename T>
- static Status IndexError(const FieldPath* path, int out_of_range_depth,
- const std::vector<T>& children) {
- std::stringstream ss;
- ss << "index out of range. ";
-
- ss << "indices=[ ";
- int depth = 0;
- for (int i : path->indices()) {
- if (depth != out_of_range_depth) {
- ss << i << " ";
- continue;
- }
- ss << ">" << i << "< ";
- ++depth;
- }
- ss << "] ";
-
- if (std::is_same<T, std::shared_ptr<Field>>::value) {
- ss << "fields were: ";
- } else {
- ss << "columns had types: ";
- }
- Summarize(children, &ss);
-
- return Status::IndexError(ss.str());
- }
-
- template <typename T, typename GetChildren>
- static Result<T> Get(const FieldPath* path, const std::vector<T>* children,
- GetChildren&& get_children, int* out_of_range_depth) {
- if (path->indices().empty()) {
- return Status::Invalid("empty indices cannot be traversed");
- }
-
- int depth = 0;
- const T* out;
- for (int index : path->indices()) {
+ return repr;
+}
+
+struct FieldPathGetImpl {
+ static const DataType& GetType(const ArrayData& data) { return *data.type; }
+
+ static void Summarize(const FieldVector& fields, std::stringstream* ss) {
+ *ss << "{ ";
+ for (const auto& field : fields) {
+ *ss << field->ToString() << ", ";
+ }
+ *ss << "}";
+ }
+
+ template <typename T>
+ static void Summarize(const std::vector<T>& columns, std::stringstream* ss) {
+ *ss << "{ ";
+ for (const auto& column : columns) {
+ *ss << GetType(*column) << ", ";
+ }
+ *ss << "}";
+ }
+
+ template <typename T>
+ static Status IndexError(const FieldPath* path, int out_of_range_depth,
+ const std::vector<T>& children) {
+ std::stringstream ss;
+ ss << "index out of range. ";
+
+ ss << "indices=[ ";
+ int depth = 0;
+ for (int i : path->indices()) {
+ if (depth != out_of_range_depth) {
+ ss << i << " ";
+ continue;
+ }
+ ss << ">" << i << "< ";
+ ++depth;
+ }
+ ss << "] ";
+
+ if (std::is_same<T, std::shared_ptr<Field>>::value) {
+ ss << "fields were: ";
+ } else {
+ ss << "columns had types: ";
+ }
+ Summarize(children, &ss);
+
+ return Status::IndexError(ss.str());
+ }
+
+ template <typename T, typename GetChildren>
+ static Result<T> Get(const FieldPath* path, const std::vector<T>* children,
+ GetChildren&& get_children, int* out_of_range_depth) {
+ if (path->indices().empty()) {
+ return Status::Invalid("empty indices cannot be traversed");
+ }
+
+ int depth = 0;
+ const T* out;
+ for (int index : path->indices()) {
if (children == nullptr) {
return Status::NotImplemented("Get child data of non-struct array");
}
- if (index < 0 || static_cast<size_t>(index) >= children->size()) {
- *out_of_range_depth = depth;
- return nullptr;
- }
-
- out = &children->at(index);
- children = get_children(*out);
- ++depth;
- }
-
- return *out;
- }
-
- template <typename T, typename GetChildren>
- static Result<T> Get(const FieldPath* path, const std::vector<T>* children,
- GetChildren&& get_children) {
- int out_of_range_depth = -1;
- ARROW_ASSIGN_OR_RAISE(auto child,
- Get(path, children, std::forward<GetChildren>(get_children),
- &out_of_range_depth));
- if (child != nullptr) {
- return std::move(child);
- }
- return IndexError(path, out_of_range_depth, *children);
- }
-
- static Result<std::shared_ptr<Field>> Get(const FieldPath* path,
- const FieldVector& fields) {
- return FieldPathGetImpl::Get(path, &fields, [](const std::shared_ptr<Field>& field) {
- return &field->type()->fields();
- });
- }
-
- static Result<std::shared_ptr<ArrayData>> Get(const FieldPath* path,
- const ArrayDataVector& child_data) {
- return FieldPathGetImpl::Get(
- path, &child_data,
+ if (index < 0 || static_cast<size_t>(index) >= children->size()) {
+ *out_of_range_depth = depth;
+ return nullptr;
+ }
+
+ out = &children->at(index);
+ children = get_children(*out);
+ ++depth;
+ }
+
+ return *out;
+ }
+
+ template <typename T, typename GetChildren>
+ static Result<T> Get(const FieldPath* path, const std::vector<T>* children,
+ GetChildren&& get_children) {
+ int out_of_range_depth = -1;
+ ARROW_ASSIGN_OR_RAISE(auto child,
+ Get(path, children, std::forward<GetChildren>(get_children),
+ &out_of_range_depth));
+ if (child != nullptr) {
+ return std::move(child);
+ }
+ return IndexError(path, out_of_range_depth, *children);
+ }
+
+ static Result<std::shared_ptr<Field>> Get(const FieldPath* path,
+ const FieldVector& fields) {
+ return FieldPathGetImpl::Get(path, &fields, [](const std::shared_ptr<Field>& field) {
+ return &field->type()->fields();
+ });
+ }
+
+ static Result<std::shared_ptr<ArrayData>> Get(const FieldPath* path,
+ const ArrayDataVector& child_data) {
+ return FieldPathGetImpl::Get(
+ path, &child_data,
[](const std::shared_ptr<ArrayData>& data) -> const ArrayDataVector* {
if (data->type->id() != Type::STRUCT) {
return nullptr;
- }
+ }
return &data->child_data;
- });
- }
-};
-
-Result<std::shared_ptr<Field>> FieldPath::Get(const Schema& schema) const {
- return FieldPathGetImpl::Get(this, schema.fields());
-}
-
-Result<std::shared_ptr<Field>> FieldPath::Get(const Field& field) const {
- return FieldPathGetImpl::Get(this, field.type()->fields());
-}
-
-Result<std::shared_ptr<Field>> FieldPath::Get(const DataType& type) const {
- return FieldPathGetImpl::Get(this, type.fields());
-}
-
-Result<std::shared_ptr<Field>> FieldPath::Get(const FieldVector& fields) const {
- return FieldPathGetImpl::Get(this, fields);
-}
-
-Result<std::shared_ptr<Array>> FieldPath::Get(const RecordBatch& batch) const {
- ARROW_ASSIGN_OR_RAISE(auto data, FieldPathGetImpl::Get(this, batch.column_data()));
+ });
+ }
+};
+
+Result<std::shared_ptr<Field>> FieldPath::Get(const Schema& schema) const {
+ return FieldPathGetImpl::Get(this, schema.fields());
+}
+
+Result<std::shared_ptr<Field>> FieldPath::Get(const Field& field) const {
+ return FieldPathGetImpl::Get(this, field.type()->fields());
+}
+
+Result<std::shared_ptr<Field>> FieldPath::Get(const DataType& type) const {
+ return FieldPathGetImpl::Get(this, type.fields());
+}
+
+Result<std::shared_ptr<Field>> FieldPath::Get(const FieldVector& fields) const {
+ return FieldPathGetImpl::Get(this, fields);
+}
+
+Result<std::shared_ptr<Array>> FieldPath::Get(const RecordBatch& batch) const {
+ ARROW_ASSIGN_OR_RAISE(auto data, FieldPathGetImpl::Get(this, batch.column_data()));
return MakeArray(std::move(data));
-}
-
+}
+
Result<std::shared_ptr<Array>> FieldPath::Get(const Array& array) const {
ARROW_ASSIGN_OR_RAISE(auto data, Get(*array.data()));
return MakeArray(std::move(data));
-}
-
+}
+
Result<std::shared_ptr<ArrayData>> FieldPath::Get(const ArrayData& data) const {
if (data.type->id() != Type::STRUCT) {
return Status::NotImplemented("Get child data of non-struct array");
@@ -1047,272 +1047,272 @@ Result<std::shared_ptr<ArrayData>> FieldPath::Get(const ArrayData& data) const {
return FieldPathGetImpl::Get(this, data.child_data);
}
-FieldRef::FieldRef(FieldPath indices) : impl_(std::move(indices)) {
- DCHECK_GT(util::get<FieldPath>(impl_).indices().size(), 0);
-}
-
-void FieldRef::Flatten(std::vector<FieldRef> children) {
- // flatten children
- struct Visitor {
+FieldRef::FieldRef(FieldPath indices) : impl_(std::move(indices)) {
+ DCHECK_GT(util::get<FieldPath>(impl_).indices().size(), 0);
+}
+
+void FieldRef::Flatten(std::vector<FieldRef> children) {
+ // flatten children
+ struct Visitor {
void operator()(std::string* name) { *out++ = FieldRef(std::move(*name)); }
-
+
void operator()(FieldPath* indices) { *out++ = FieldRef(std::move(*indices)); }
-
+
void operator()(std::vector<FieldRef>* children) {
for (auto& child : *children) {
util::visit(*this, &child.impl_);
- }
- }
-
- std::back_insert_iterator<std::vector<FieldRef>> out;
- };
-
- std::vector<FieldRef> out;
- Visitor visitor{std::back_inserter(out)};
+ }
+ }
+
+ std::back_insert_iterator<std::vector<FieldRef>> out;
+ };
+
+ std::vector<FieldRef> out;
+ Visitor visitor{std::back_inserter(out)};
visitor(&children);
-
- DCHECK(!out.empty());
- DCHECK(std::none_of(out.begin(), out.end(),
- [](const FieldRef& ref) { return ref.IsNested(); }));
-
- if (out.size() == 1) {
- impl_ = std::move(out[0].impl_);
- } else {
- impl_ = std::move(out);
- }
-}
-
-Result<FieldRef> FieldRef::FromDotPath(const std::string& dot_path_arg) {
- if (dot_path_arg.empty()) {
- return Status::Invalid("Dot path was empty");
- }
-
- std::vector<FieldRef> children;
-
- util::string_view dot_path = dot_path_arg;
-
- auto parse_name = [&] {
- std::string name;
- for (;;) {
- auto segment_end = dot_path.find_first_of("\\[.");
- if (segment_end == util::string_view::npos) {
- // dot_path doesn't contain any other special characters; consume all
- name.append(dot_path.begin(), dot_path.end());
- dot_path = "";
- break;
- }
-
- if (dot_path[segment_end] != '\\') {
- // segment_end points to a subscript for a new FieldRef
- name.append(dot_path.begin(), segment_end);
- dot_path = dot_path.substr(segment_end);
- break;
- }
-
- if (dot_path.size() == segment_end + 1) {
- // dot_path ends with backslash; consume it all
- name.append(dot_path.begin(), dot_path.end());
- dot_path = "";
- break;
- }
-
- // append all characters before backslash, then the character which follows it
- name.append(dot_path.begin(), segment_end);
- name.push_back(dot_path[segment_end + 1]);
- dot_path = dot_path.substr(segment_end + 2);
- }
- return name;
- };
-
- while (!dot_path.empty()) {
- auto subscript = dot_path[0];
- dot_path = dot_path.substr(1);
- switch (subscript) {
- case '.': {
- // next element is a name
- children.emplace_back(parse_name());
- continue;
- }
- case '[': {
- auto subscript_end = dot_path.find_first_not_of("0123456789");
- if (subscript_end == util::string_view::npos || dot_path[subscript_end] != ']') {
- return Status::Invalid("Dot path '", dot_path_arg,
- "' contained an unterminated index");
- }
- children.emplace_back(std::atoi(dot_path.data()));
- dot_path = dot_path.substr(subscript_end + 1);
- continue;
- }
- default:
- return Status::Invalid("Dot path must begin with '[' or '.', got '", dot_path_arg,
- "'");
- }
- }
-
- FieldRef out;
- out.Flatten(std::move(children));
- return out;
-}
-
-size_t FieldRef::hash() const {
- struct Visitor : std::hash<std::string> {
- using std::hash<std::string>::operator();
-
- size_t operator()(const FieldPath& path) { return path.hash(); }
-
- size_t operator()(const std::vector<FieldRef>& children) {
- size_t hash = 0;
-
- for (const FieldRef& child : children) {
- hash ^= child.hash();
- }
-
- return hash;
- }
- };
-
- return util::visit(Visitor{}, impl_);
-}
-
-std::string FieldRef::ToString() const {
- struct Visitor {
- std::string operator()(const FieldPath& path) { return path.ToString(); }
-
- std::string operator()(const std::string& name) { return "Name(" + name + ")"; }
-
- std::string operator()(const std::vector<FieldRef>& children) {
- std::string repr = "Nested(";
- for (const auto& child : children) {
- repr += child.ToString() + " ";
- }
- repr.resize(repr.size() - 1);
- repr += ")";
- return repr;
- }
- };
-
- return "FieldRef." + util::visit(Visitor{}, impl_);
-}
-
-std::vector<FieldPath> FieldRef::FindAll(const Schema& schema) const {
+
+ DCHECK(!out.empty());
+ DCHECK(std::none_of(out.begin(), out.end(),
+ [](const FieldRef& ref) { return ref.IsNested(); }));
+
+ if (out.size() == 1) {
+ impl_ = std::move(out[0].impl_);
+ } else {
+ impl_ = std::move(out);
+ }
+}
+
+Result<FieldRef> FieldRef::FromDotPath(const std::string& dot_path_arg) {
+ if (dot_path_arg.empty()) {
+ return Status::Invalid("Dot path was empty");
+ }
+
+ std::vector<FieldRef> children;
+
+ util::string_view dot_path = dot_path_arg;
+
+ auto parse_name = [&] {
+ std::string name;
+ for (;;) {
+ auto segment_end = dot_path.find_first_of("\\[.");
+ if (segment_end == util::string_view::npos) {
+ // dot_path doesn't contain any other special characters; consume all
+ name.append(dot_path.begin(), dot_path.end());
+ dot_path = "";
+ break;
+ }
+
+ if (dot_path[segment_end] != '\\') {
+ // segment_end points to a subscript for a new FieldRef
+ name.append(dot_path.begin(), segment_end);
+ dot_path = dot_path.substr(segment_end);
+ break;
+ }
+
+ if (dot_path.size() == segment_end + 1) {
+ // dot_path ends with backslash; consume it all
+ name.append(dot_path.begin(), dot_path.end());
+ dot_path = "";
+ break;
+ }
+
+ // append all characters before backslash, then the character which follows it
+ name.append(dot_path.begin(), segment_end);
+ name.push_back(dot_path[segment_end + 1]);
+ dot_path = dot_path.substr(segment_end + 2);
+ }
+ return name;
+ };
+
+ while (!dot_path.empty()) {
+ auto subscript = dot_path[0];
+ dot_path = dot_path.substr(1);
+ switch (subscript) {
+ case '.': {
+ // next element is a name
+ children.emplace_back(parse_name());
+ continue;
+ }
+ case '[': {
+ auto subscript_end = dot_path.find_first_not_of("0123456789");
+ if (subscript_end == util::string_view::npos || dot_path[subscript_end] != ']') {
+ return Status::Invalid("Dot path '", dot_path_arg,
+ "' contained an unterminated index");
+ }
+ children.emplace_back(std::atoi(dot_path.data()));
+ dot_path = dot_path.substr(subscript_end + 1);
+ continue;
+ }
+ default:
+ return Status::Invalid("Dot path must begin with '[' or '.', got '", dot_path_arg,
+ "'");
+ }
+ }
+
+ FieldRef out;
+ out.Flatten(std::move(children));
+ return out;
+}
+
+size_t FieldRef::hash() const {
+ struct Visitor : std::hash<std::string> {
+ using std::hash<std::string>::operator();
+
+ size_t operator()(const FieldPath& path) { return path.hash(); }
+
+ size_t operator()(const std::vector<FieldRef>& children) {
+ size_t hash = 0;
+
+ for (const FieldRef& child : children) {
+ hash ^= child.hash();
+ }
+
+ return hash;
+ }
+ };
+
+ return util::visit(Visitor{}, impl_);
+}
+
+std::string FieldRef::ToString() const {
+ struct Visitor {
+ std::string operator()(const FieldPath& path) { return path.ToString(); }
+
+ std::string operator()(const std::string& name) { return "Name(" + name + ")"; }
+
+ std::string operator()(const std::vector<FieldRef>& children) {
+ std::string repr = "Nested(";
+ for (const auto& child : children) {
+ repr += child.ToString() + " ";
+ }
+ repr.resize(repr.size() - 1);
+ repr += ")";
+ return repr;
+ }
+ };
+
+ return "FieldRef." + util::visit(Visitor{}, impl_);
+}
+
+std::vector<FieldPath> FieldRef::FindAll(const Schema& schema) const {
if (auto name = this->name()) {
return internal::MapVector([](int i) { return FieldPath{i}; },
schema.GetAllFieldIndices(*name));
}
- return FindAll(schema.fields());
-}
-
-std::vector<FieldPath> FieldRef::FindAll(const Field& field) const {
- return FindAll(field.type()->fields());
-}
-
-std::vector<FieldPath> FieldRef::FindAll(const DataType& type) const {
- return FindAll(type.fields());
-}
-
-std::vector<FieldPath> FieldRef::FindAll(const FieldVector& fields) const {
- struct Visitor {
- std::vector<FieldPath> operator()(const FieldPath& path) {
- // skip long IndexError construction if path is out of range
- int out_of_range_depth;
- auto maybe_field = FieldPathGetImpl::Get(
- &path, &fields_,
- [](const std::shared_ptr<Field>& field) { return &field->type()->fields(); },
- &out_of_range_depth);
-
- DCHECK_OK(maybe_field.status());
-
- if (maybe_field.ValueOrDie() != nullptr) {
- return {path};
- }
- return {};
- }
-
- std::vector<FieldPath> operator()(const std::string& name) {
- std::vector<FieldPath> out;
-
- for (int i = 0; i < static_cast<int>(fields_.size()); ++i) {
- if (fields_[i]->name() == name) {
- out.push_back({i});
- }
- }
-
- return out;
- }
-
- struct Matches {
- // referents[i] is referenced by prefixes[i]
- std::vector<FieldPath> prefixes;
- FieldVector referents;
-
- Matches(std::vector<FieldPath> matches, const FieldVector& fields) {
- for (auto& match : matches) {
- Add({}, std::move(match), fields);
- }
- }
-
- Matches() = default;
-
- size_t size() const { return referents.size(); }
-
- void Add(const FieldPath& prefix, const FieldPath& suffix,
- const FieldVector& fields) {
- auto maybe_field = suffix.Get(fields);
- DCHECK_OK(maybe_field.status());
- referents.push_back(std::move(maybe_field).ValueOrDie());
-
- std::vector<int> concatenated_indices(prefix.indices().size() +
- suffix.indices().size());
- auto it = concatenated_indices.begin();
- for (auto path : {&prefix, &suffix}) {
- it = std::copy(path->indices().begin(), path->indices().end(), it);
- }
- prefixes.emplace_back(std::move(concatenated_indices));
- }
- };
-
- std::vector<FieldPath> operator()(const std::vector<FieldRef>& refs) {
- DCHECK_GE(refs.size(), 1);
- Matches matches(refs.front().FindAll(fields_), fields_);
-
- for (auto ref_it = refs.begin() + 1; ref_it != refs.end(); ++ref_it) {
- Matches next_matches;
- for (size_t i = 0; i < matches.size(); ++i) {
- const auto& referent = *matches.referents[i];
-
- for (const FieldPath& match : ref_it->FindAll(referent)) {
- next_matches.Add(matches.prefixes[i], match, referent.type()->fields());
- }
- }
- matches = std::move(next_matches);
- }
-
- return matches.prefixes;
- }
-
- const FieldVector& fields_;
- };
-
- return util::visit(Visitor{fields}, impl_);
-}
-
+ return FindAll(schema.fields());
+}
+
+std::vector<FieldPath> FieldRef::FindAll(const Field& field) const {
+ return FindAll(field.type()->fields());
+}
+
+std::vector<FieldPath> FieldRef::FindAll(const DataType& type) const {
+ return FindAll(type.fields());
+}
+
+std::vector<FieldPath> FieldRef::FindAll(const FieldVector& fields) const {
+ struct Visitor {
+ std::vector<FieldPath> operator()(const FieldPath& path) {
+ // skip long IndexError construction if path is out of range
+ int out_of_range_depth;
+ auto maybe_field = FieldPathGetImpl::Get(
+ &path, &fields_,
+ [](const std::shared_ptr<Field>& field) { return &field->type()->fields(); },
+ &out_of_range_depth);
+
+ DCHECK_OK(maybe_field.status());
+
+ if (maybe_field.ValueOrDie() != nullptr) {
+ return {path};
+ }
+ return {};
+ }
+
+ std::vector<FieldPath> operator()(const std::string& name) {
+ std::vector<FieldPath> out;
+
+ for (int i = 0; i < static_cast<int>(fields_.size()); ++i) {
+ if (fields_[i]->name() == name) {
+ out.push_back({i});
+ }
+ }
+
+ return out;
+ }
+
+ struct Matches {
+ // referents[i] is referenced by prefixes[i]
+ std::vector<FieldPath> prefixes;
+ FieldVector referents;
+
+ Matches(std::vector<FieldPath> matches, const FieldVector& fields) {
+ for (auto& match : matches) {
+ Add({}, std::move(match), fields);
+ }
+ }
+
+ Matches() = default;
+
+ size_t size() const { return referents.size(); }
+
+ void Add(const FieldPath& prefix, const FieldPath& suffix,
+ const FieldVector& fields) {
+ auto maybe_field = suffix.Get(fields);
+ DCHECK_OK(maybe_field.status());
+ referents.push_back(std::move(maybe_field).ValueOrDie());
+
+ std::vector<int> concatenated_indices(prefix.indices().size() +
+ suffix.indices().size());
+ auto it = concatenated_indices.begin();
+ for (auto path : {&prefix, &suffix}) {
+ it = std::copy(path->indices().begin(), path->indices().end(), it);
+ }
+ prefixes.emplace_back(std::move(concatenated_indices));
+ }
+ };
+
+ std::vector<FieldPath> operator()(const std::vector<FieldRef>& refs) {
+ DCHECK_GE(refs.size(), 1);
+ Matches matches(refs.front().FindAll(fields_), fields_);
+
+ for (auto ref_it = refs.begin() + 1; ref_it != refs.end(); ++ref_it) {
+ Matches next_matches;
+ for (size_t i = 0; i < matches.size(); ++i) {
+ const auto& referent = *matches.referents[i];
+
+ for (const FieldPath& match : ref_it->FindAll(referent)) {
+ next_matches.Add(matches.prefixes[i], match, referent.type()->fields());
+ }
+ }
+ matches = std::move(next_matches);
+ }
+
+ return matches.prefixes;
+ }
+
+ const FieldVector& fields_;
+ };
+
+ return util::visit(Visitor{fields}, impl_);
+}
+
std::vector<FieldPath> FieldRef::FindAll(const ArrayData& array) const {
return FindAll(*array.type);
-}
-
+}
+
std::vector<FieldPath> FieldRef::FindAll(const Array& array) const {
- return FindAll(*array.type());
-}
-
-std::vector<FieldPath> FieldRef::FindAll(const RecordBatch& batch) const {
- return FindAll(*batch.schema());
-}
-
-void PrintTo(const FieldRef& ref, std::ostream* os) { *os << ref.ToString(); }
-
-// ----------------------------------------------------------------------
-// Schema implementation
-
+ return FindAll(*array.type());
+}
+
+std::vector<FieldPath> FieldRef::FindAll(const RecordBatch& batch) const {
+ return FindAll(*batch.schema());
+}
+
+void PrintTo(const FieldRef& ref, std::ostream* os) { *os << ref.ToString(); }
+
+// ----------------------------------------------------------------------
+// Schema implementation
+
std::string EndiannessToString(Endianness endianness) {
switch (endianness) {
case Endianness::Little:
@@ -1325,36 +1325,36 @@ std::string EndiannessToString(Endianness endianness) {
}
}
-class Schema::Impl {
- public:
+class Schema::Impl {
+ public:
Impl(std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
- std::shared_ptr<const KeyValueMetadata> metadata)
- : fields_(std::move(fields)),
+ std::shared_ptr<const KeyValueMetadata> metadata)
+ : fields_(std::move(fields)),
endianness_(endianness),
- name_to_index_(CreateNameToIndexMap(fields_)),
- metadata_(std::move(metadata)) {}
-
- std::vector<std::shared_ptr<Field>> fields_;
+ name_to_index_(CreateNameToIndexMap(fields_)),
+ metadata_(std::move(metadata)) {}
+
+ std::vector<std::shared_ptr<Field>> fields_;
Endianness endianness_;
- std::unordered_multimap<std::string, int> name_to_index_;
- std::shared_ptr<const KeyValueMetadata> metadata_;
-};
-
+ std::unordered_multimap<std::string, int> name_to_index_;
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
Schema::Schema(std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
std::shared_ptr<const KeyValueMetadata> metadata)
: detail::Fingerprintable(),
impl_(new Impl(std::move(fields), endianness, std::move(metadata))) {}
-Schema::Schema(std::vector<std::shared_ptr<Field>> fields,
- std::shared_ptr<const KeyValueMetadata> metadata)
- : detail::Fingerprintable(),
+Schema::Schema(std::vector<std::shared_ptr<Field>> fields,
+ std::shared_ptr<const KeyValueMetadata> metadata)
+ : detail::Fingerprintable(),
impl_(new Impl(std::move(fields), Endianness::Native, std::move(metadata))) {}
-
-Schema::Schema(const Schema& schema)
- : detail::Fingerprintable(), impl_(new Impl(*schema.impl_)) {}
-
+
+Schema::Schema(const Schema& schema)
+ : detail::Fingerprintable(), impl_(new Impl(*schema.impl_)) {}
+
Schema::~Schema() = default;
-
+
std::shared_ptr<Schema> Schema::WithEndianness(Endianness endianness) const {
return std::make_shared<Schema>(impl_->fields_, endianness, impl_->metadata_);
}
@@ -1363,920 +1363,920 @@ Endianness Schema::endianness() const { return impl_->endianness_; }
bool Schema::is_native_endian() const { return impl_->endianness_ == Endianness::Native; }
-int Schema::num_fields() const { return static_cast<int>(impl_->fields_.size()); }
-
-const std::shared_ptr<Field>& Schema::field(int i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(i, num_fields());
- return impl_->fields_[i];
-}
-
-const std::vector<std::shared_ptr<Field>>& Schema::fields() const {
- return impl_->fields_;
-}
-
-bool Schema::Equals(const Schema& other, bool check_metadata) const {
- if (this == &other) {
- return true;
- }
-
+int Schema::num_fields() const { return static_cast<int>(impl_->fields_.size()); }
+
+const std::shared_ptr<Field>& Schema::field(int i) const {
+ DCHECK_GE(i, 0);
+ DCHECK_LT(i, num_fields());
+ return impl_->fields_[i];
+}
+
+const std::vector<std::shared_ptr<Field>>& Schema::fields() const {
+ return impl_->fields_;
+}
+
+bool Schema::Equals(const Schema& other, bool check_metadata) const {
+ if (this == &other) {
+ return true;
+ }
+
// checks endianness equality
if (endianness() != other.endianness()) {
return false;
}
- // checks field equality
- if (num_fields() != other.num_fields()) {
- return false;
- }
-
- if (check_metadata) {
- const auto& metadata_fp = metadata_fingerprint();
- const auto& other_metadata_fp = other.metadata_fingerprint();
- if (metadata_fp != other_metadata_fp) {
- return false;
- }
- }
-
- // Fast path using fingerprints, if possible
- const auto& fp = fingerprint();
- const auto& other_fp = other.fingerprint();
- if (!fp.empty() && !other_fp.empty()) {
- return fp == other_fp;
- }
-
- // Fall back on field-by-field comparison
- for (int i = 0; i < num_fields(); ++i) {
- if (!field(i)->Equals(*other.field(i).get(), check_metadata)) {
- return false;
- }
- }
-
- return true;
-}
-
-bool Schema::Equals(const std::shared_ptr<Schema>& other, bool check_metadata) const {
- if (other == nullptr) {
- return false;
- }
-
- return Equals(*other, check_metadata);
-}
-
-std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) const {
- int i = GetFieldIndex(name);
- return i == -1 ? nullptr : impl_->fields_[i];
-}
-
-int Schema::GetFieldIndex(const std::string& name) const {
- return LookupNameIndex(impl_->name_to_index_, name);
-}
-
-std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
- std::vector<int> result;
- auto p = impl_->name_to_index_.equal_range(name);
- for (auto it = p.first; it != p.second; ++it) {
- result.push_back(it->second);
- }
- if (result.size() > 1) {
- std::sort(result.begin(), result.end());
- }
- return result;
-}
-
-Status Schema::CanReferenceFieldsByNames(const std::vector<std::string>& names) const {
- for (const auto& name : names) {
- if (GetFieldByName(name) == nullptr) {
- return Status::Invalid("Field named '", name,
- "' not found or not unique in the schema.");
- }
- }
-
- return Status::OK();
-}
-
-std::vector<std::shared_ptr<Field>> Schema::GetAllFieldsByName(
- const std::string& name) const {
- std::vector<std::shared_ptr<Field>> result;
- auto p = impl_->name_to_index_.equal_range(name);
- for (auto it = p.first; it != p.second; ++it) {
- result.push_back(impl_->fields_[it->second]);
- }
- return result;
-}
-
-Result<std::shared_ptr<Schema>> Schema::AddField(
- int i, const std::shared_ptr<Field>& field) const {
- if (i < 0 || i > this->num_fields()) {
- return Status::Invalid("Invalid column index to add field.");
- }
-
- return std::make_shared<Schema>(internal::AddVectorElement(impl_->fields_, i, field),
- impl_->metadata_);
-}
-
-Result<std::shared_ptr<Schema>> Schema::SetField(
- int i, const std::shared_ptr<Field>& field) const {
- if (i < 0 || i > this->num_fields()) {
- return Status::Invalid("Invalid column index to add field.");
- }
-
- return std::make_shared<Schema>(
- internal::ReplaceVectorElement(impl_->fields_, i, field), impl_->metadata_);
-}
-
-Result<std::shared_ptr<Schema>> Schema::RemoveField(int i) const {
- if (i < 0 || i >= this->num_fields()) {
- return Status::Invalid("Invalid column index to remove field.");
- }
-
- return std::make_shared<Schema>(internal::DeleteVectorElement(impl_->fields_, i),
- impl_->metadata_);
-}
-
-bool Schema::HasMetadata() const {
- return (impl_->metadata_ != nullptr) && (impl_->metadata_->size() > 0);
-}
-
-bool Schema::HasDistinctFieldNames() const {
- auto fields = field_names();
- std::unordered_set<std::string> names{fields.cbegin(), fields.cend()};
- return names.size() == fields.size();
-}
-
-std::shared_ptr<Schema> Schema::WithMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const {
- return std::make_shared<Schema>(impl_->fields_, metadata);
-}
-
+ // checks field equality
+ if (num_fields() != other.num_fields()) {
+ return false;
+ }
+
+ if (check_metadata) {
+ const auto& metadata_fp = metadata_fingerprint();
+ const auto& other_metadata_fp = other.metadata_fingerprint();
+ if (metadata_fp != other_metadata_fp) {
+ return false;
+ }
+ }
+
+ // Fast path using fingerprints, if possible
+ const auto& fp = fingerprint();
+ const auto& other_fp = other.fingerprint();
+ if (!fp.empty() && !other_fp.empty()) {
+ return fp == other_fp;
+ }
+
+ // Fall back on field-by-field comparison
+ for (int i = 0; i < num_fields(); ++i) {
+ if (!field(i)->Equals(*other.field(i).get(), check_metadata)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool Schema::Equals(const std::shared_ptr<Schema>& other, bool check_metadata) const {
+ if (other == nullptr) {
+ return false;
+ }
+
+ return Equals(*other, check_metadata);
+}
+
+std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) const {
+ int i = GetFieldIndex(name);
+ return i == -1 ? nullptr : impl_->fields_[i];
+}
+
+int Schema::GetFieldIndex(const std::string& name) const {
+ return LookupNameIndex(impl_->name_to_index_, name);
+}
+
+std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
+ std::vector<int> result;
+ auto p = impl_->name_to_index_.equal_range(name);
+ for (auto it = p.first; it != p.second; ++it) {
+ result.push_back(it->second);
+ }
+ if (result.size() > 1) {
+ std::sort(result.begin(), result.end());
+ }
+ return result;
+}
+
+Status Schema::CanReferenceFieldsByNames(const std::vector<std::string>& names) const {
+ for (const auto& name : names) {
+ if (GetFieldByName(name) == nullptr) {
+ return Status::Invalid("Field named '", name,
+ "' not found or not unique in the schema.");
+ }
+ }
+
+ return Status::OK();
+}
+
+std::vector<std::shared_ptr<Field>> Schema::GetAllFieldsByName(
+ const std::string& name) const {
+ std::vector<std::shared_ptr<Field>> result;
+ auto p = impl_->name_to_index_.equal_range(name);
+ for (auto it = p.first; it != p.second; ++it) {
+ result.push_back(impl_->fields_[it->second]);
+ }
+ return result;
+}
+
+Result<std::shared_ptr<Schema>> Schema::AddField(
+ int i, const std::shared_ptr<Field>& field) const {
+ if (i < 0 || i > this->num_fields()) {
+ return Status::Invalid("Invalid column index to add field.");
+ }
+
+ return std::make_shared<Schema>(internal::AddVectorElement(impl_->fields_, i, field),
+ impl_->metadata_);
+}
+
+Result<std::shared_ptr<Schema>> Schema::SetField(
+ int i, const std::shared_ptr<Field>& field) const {
+ if (i < 0 || i > this->num_fields()) {
+ return Status::Invalid("Invalid column index to add field.");
+ }
+
+ return std::make_shared<Schema>(
+ internal::ReplaceVectorElement(impl_->fields_, i, field), impl_->metadata_);
+}
+
+Result<std::shared_ptr<Schema>> Schema::RemoveField(int i) const {
+ if (i < 0 || i >= this->num_fields()) {
+ return Status::Invalid("Invalid column index to remove field.");
+ }
+
+ return std::make_shared<Schema>(internal::DeleteVectorElement(impl_->fields_, i),
+ impl_->metadata_);
+}
+
+bool Schema::HasMetadata() const {
+ return (impl_->metadata_ != nullptr) && (impl_->metadata_->size() > 0);
+}
+
+bool Schema::HasDistinctFieldNames() const {
+ auto fields = field_names();
+ std::unordered_set<std::string> names{fields.cbegin(), fields.cend()};
+ return names.size() == fields.size();
+}
+
+std::shared_ptr<Schema> Schema::WithMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const {
+ return std::make_shared<Schema>(impl_->fields_, metadata);
+}
+
const std::shared_ptr<const KeyValueMetadata>& Schema::metadata() const {
- return impl_->metadata_;
-}
-
-std::shared_ptr<Schema> Schema::RemoveMetadata() const {
- return std::make_shared<Schema>(impl_->fields_);
-}
-
-std::string Schema::ToString(bool show_metadata) const {
- std::stringstream buffer;
-
- int i = 0;
- for (const auto& field : impl_->fields_) {
- if (i > 0) {
- buffer << std::endl;
- }
- buffer << field->ToString(show_metadata);
- ++i;
- }
-
+ return impl_->metadata_;
+}
+
+std::shared_ptr<Schema> Schema::RemoveMetadata() const {
+ return std::make_shared<Schema>(impl_->fields_);
+}
+
+std::string Schema::ToString(bool show_metadata) const {
+ std::stringstream buffer;
+
+ int i = 0;
+ for (const auto& field : impl_->fields_) {
+ if (i > 0) {
+ buffer << std::endl;
+ }
+ buffer << field->ToString(show_metadata);
+ ++i;
+ }
+
if (impl_->endianness_ != Endianness::Native) {
buffer << "\n-- endianness: " << EndiannessToString(impl_->endianness_) << " --";
}
- if (show_metadata && HasMetadata()) {
- buffer << impl_->metadata_->ToString();
- }
-
- return buffer.str();
-}
-
-std::vector<std::string> Schema::field_names() const {
- std::vector<std::string> names;
- for (const auto& field : impl_->fields_) {
- names.push_back(field->name());
- }
- return names;
-}
-
-class SchemaBuilder::Impl {
- public:
- friend class SchemaBuilder;
- Impl(ConflictPolicy policy, Field::MergeOptions field_merge_options)
- : policy_(policy), field_merge_options_(field_merge_options) {}
-
- Impl(std::vector<std::shared_ptr<Field>> fields,
- std::shared_ptr<const KeyValueMetadata> metadata, ConflictPolicy conflict_policy,
- Field::MergeOptions field_merge_options)
- : fields_(std::move(fields)),
- name_to_index_(CreateNameToIndexMap(fields_)),
- metadata_(std::move(metadata)),
- policy_(conflict_policy),
- field_merge_options_(field_merge_options) {}
-
- Status AddField(const std::shared_ptr<Field>& field) {
- DCHECK_NE(field, nullptr);
-
- // Short-circuit, no lookup needed.
- if (policy_ == CONFLICT_APPEND) {
- return AppendField(field);
- }
-
- auto name = field->name();
- constexpr int kNotFound = -1;
- constexpr int kDuplicateFound = -2;
- auto i = LookupNameIndex<kNotFound, kDuplicateFound>(name_to_index_, name);
-
- if (i == kNotFound) {
- return AppendField(field);
- }
-
- // From this point, there's one or more field in the builder that exists with
- // the same name.
-
- if (policy_ == CONFLICT_IGNORE) {
- // The ignore policy is more generous when there's duplicate in the builder.
- return Status::OK();
- } else if (policy_ == CONFLICT_ERROR) {
- return Status::Invalid("Duplicate found, policy dictate to treat as an error");
- }
-
- if (i == kDuplicateFound) {
- // Cannot merge/replace when there's more than one field in the builder
- // because we can't decide which to merge/replace.
- return Status::Invalid("Cannot merge field ", name,
- " more than one field with same name exists");
- }
-
- DCHECK_GE(i, 0);
-
- if (policy_ == CONFLICT_REPLACE) {
- fields_[i] = field;
- } else if (policy_ == CONFLICT_MERGE) {
- ARROW_ASSIGN_OR_RAISE(fields_[i], fields_[i]->MergeWith(field));
- }
-
- return Status::OK();
- }
-
- Status AppendField(const std::shared_ptr<Field>& field) {
- name_to_index_.emplace(field->name(), static_cast<int>(fields_.size()));
- fields_.push_back(field);
- return Status::OK();
- }
-
- void Reset() {
- fields_.clear();
- name_to_index_.clear();
- metadata_.reset();
- }
-
- private:
- std::vector<std::shared_ptr<Field>> fields_;
- std::unordered_multimap<std::string, int> name_to_index_;
- std::shared_ptr<const KeyValueMetadata> metadata_;
- ConflictPolicy policy_;
- Field::MergeOptions field_merge_options_;
-};
-
-SchemaBuilder::SchemaBuilder(ConflictPolicy policy,
- Field::MergeOptions field_merge_options) {
- impl_ = internal::make_unique<Impl>(policy, field_merge_options);
-}
-
-SchemaBuilder::SchemaBuilder(std::vector<std::shared_ptr<Field>> fields,
- ConflictPolicy policy,
- Field::MergeOptions field_merge_options) {
- impl_ = internal::make_unique<Impl>(std::move(fields), nullptr, policy,
- field_merge_options);
-}
-
-SchemaBuilder::SchemaBuilder(const std::shared_ptr<Schema>& schema, ConflictPolicy policy,
- Field::MergeOptions field_merge_options) {
- std::shared_ptr<const KeyValueMetadata> metadata;
- if (schema->HasMetadata()) {
- metadata = schema->metadata()->Copy();
- }
-
- impl_ = internal::make_unique<Impl>(schema->fields(), std::move(metadata), policy,
- field_merge_options);
-}
-
-SchemaBuilder::~SchemaBuilder() {}
-
-SchemaBuilder::ConflictPolicy SchemaBuilder::policy() const { return impl_->policy_; }
-
-void SchemaBuilder::SetPolicy(SchemaBuilder::ConflictPolicy resolution) {
- impl_->policy_ = resolution;
-}
-
-Status SchemaBuilder::AddField(const std::shared_ptr<Field>& field) {
- return impl_->AddField(field);
-}
-
-Status SchemaBuilder::AddFields(const std::vector<std::shared_ptr<Field>>& fields) {
- for (const auto& field : fields) {
- RETURN_NOT_OK(AddField(field));
- }
-
- return Status::OK();
-}
-
-Status SchemaBuilder::AddSchema(const std::shared_ptr<Schema>& schema) {
- DCHECK_NE(schema, nullptr);
- return AddFields(schema->fields());
-}
-
-Status SchemaBuilder::AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas) {
- for (const auto& schema : schemas) {
- RETURN_NOT_OK(AddSchema(schema));
- }
-
- return Status::OK();
-}
-
-Status SchemaBuilder::AddMetadata(const KeyValueMetadata& metadata) {
- impl_->metadata_ = metadata.Copy();
- return Status::OK();
-}
-
-Result<std::shared_ptr<Schema>> SchemaBuilder::Finish() const {
- return schema(impl_->fields_, impl_->metadata_);
-}
-
-void SchemaBuilder::Reset() { impl_->Reset(); }
-
-Result<std::shared_ptr<Schema>> SchemaBuilder::Merge(
- const std::vector<std::shared_ptr<Schema>>& schemas, ConflictPolicy policy) {
- SchemaBuilder builder{policy};
- RETURN_NOT_OK(builder.AddSchemas(schemas));
- return builder.Finish();
-}
-
-Status SchemaBuilder::AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
- ConflictPolicy policy) {
- return Merge(schemas, policy).status();
-}
-
-std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
- std::shared_ptr<const KeyValueMetadata> metadata) {
- return std::make_shared<Schema>(std::move(fields), std::move(metadata));
-}
-
+ if (show_metadata && HasMetadata()) {
+ buffer << impl_->metadata_->ToString();
+ }
+
+ return buffer.str();
+}
+
+std::vector<std::string> Schema::field_names() const {
+ std::vector<std::string> names;
+ for (const auto& field : impl_->fields_) {
+ names.push_back(field->name());
+ }
+ return names;
+}
+
+class SchemaBuilder::Impl {
+ public:
+ friend class SchemaBuilder;
+ Impl(ConflictPolicy policy, Field::MergeOptions field_merge_options)
+ : policy_(policy), field_merge_options_(field_merge_options) {}
+
+ Impl(std::vector<std::shared_ptr<Field>> fields,
+ std::shared_ptr<const KeyValueMetadata> metadata, ConflictPolicy conflict_policy,
+ Field::MergeOptions field_merge_options)
+ : fields_(std::move(fields)),
+ name_to_index_(CreateNameToIndexMap(fields_)),
+ metadata_(std::move(metadata)),
+ policy_(conflict_policy),
+ field_merge_options_(field_merge_options) {}
+
+ Status AddField(const std::shared_ptr<Field>& field) {
+ DCHECK_NE(field, nullptr);
+
+ // Short-circuit, no lookup needed.
+ if (policy_ == CONFLICT_APPEND) {
+ return AppendField(field);
+ }
+
+ auto name = field->name();
+ constexpr int kNotFound = -1;
+ constexpr int kDuplicateFound = -2;
+ auto i = LookupNameIndex<kNotFound, kDuplicateFound>(name_to_index_, name);
+
+ if (i == kNotFound) {
+ return AppendField(field);
+ }
+
+ // From this point, there's one or more field in the builder that exists with
+ // the same name.
+
+ if (policy_ == CONFLICT_IGNORE) {
+ // The ignore policy is more generous when there's duplicate in the builder.
+ return Status::OK();
+ } else if (policy_ == CONFLICT_ERROR) {
+ return Status::Invalid("Duplicate found, policy dictate to treat as an error");
+ }
+
+ if (i == kDuplicateFound) {
+ // Cannot merge/replace when there's more than one field in the builder
+ // because we can't decide which to merge/replace.
+ return Status::Invalid("Cannot merge field ", name,
+ " more than one field with same name exists");
+ }
+
+ DCHECK_GE(i, 0);
+
+ if (policy_ == CONFLICT_REPLACE) {
+ fields_[i] = field;
+ } else if (policy_ == CONFLICT_MERGE) {
+ ARROW_ASSIGN_OR_RAISE(fields_[i], fields_[i]->MergeWith(field));
+ }
+
+ return Status::OK();
+ }
+
+ Status AppendField(const std::shared_ptr<Field>& field) {
+ name_to_index_.emplace(field->name(), static_cast<int>(fields_.size()));
+ fields_.push_back(field);
+ return Status::OK();
+ }
+
+ void Reset() {
+ fields_.clear();
+ name_to_index_.clear();
+ metadata_.reset();
+ }
+
+ private:
+ std::vector<std::shared_ptr<Field>> fields_;
+ std::unordered_multimap<std::string, int> name_to_index_;
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+ ConflictPolicy policy_;
+ Field::MergeOptions field_merge_options_;
+};
+
+SchemaBuilder::SchemaBuilder(ConflictPolicy policy,
+ Field::MergeOptions field_merge_options) {
+ impl_ = internal::make_unique<Impl>(policy, field_merge_options);
+}
+
+SchemaBuilder::SchemaBuilder(std::vector<std::shared_ptr<Field>> fields,
+ ConflictPolicy policy,
+ Field::MergeOptions field_merge_options) {
+ impl_ = internal::make_unique<Impl>(std::move(fields), nullptr, policy,
+ field_merge_options);
+}
+
+SchemaBuilder::SchemaBuilder(const std::shared_ptr<Schema>& schema, ConflictPolicy policy,
+ Field::MergeOptions field_merge_options) {
+ std::shared_ptr<const KeyValueMetadata> metadata;
+ if (schema->HasMetadata()) {
+ metadata = schema->metadata()->Copy();
+ }
+
+ impl_ = internal::make_unique<Impl>(schema->fields(), std::move(metadata), policy,
+ field_merge_options);
+}
+
+SchemaBuilder::~SchemaBuilder() {}
+
+SchemaBuilder::ConflictPolicy SchemaBuilder::policy() const { return impl_->policy_; }
+
+void SchemaBuilder::SetPolicy(SchemaBuilder::ConflictPolicy resolution) {
+ impl_->policy_ = resolution;
+}
+
+Status SchemaBuilder::AddField(const std::shared_ptr<Field>& field) {
+ return impl_->AddField(field);
+}
+
+Status SchemaBuilder::AddFields(const std::vector<std::shared_ptr<Field>>& fields) {
+ for (const auto& field : fields) {
+ RETURN_NOT_OK(AddField(field));
+ }
+
+ return Status::OK();
+}
+
+Status SchemaBuilder::AddSchema(const std::shared_ptr<Schema>& schema) {
+ DCHECK_NE(schema, nullptr);
+ return AddFields(schema->fields());
+}
+
+Status SchemaBuilder::AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas) {
+ for (const auto& schema : schemas) {
+ RETURN_NOT_OK(AddSchema(schema));
+ }
+
+ return Status::OK();
+}
+
+Status SchemaBuilder::AddMetadata(const KeyValueMetadata& metadata) {
+ impl_->metadata_ = metadata.Copy();
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Schema>> SchemaBuilder::Finish() const {
+ return schema(impl_->fields_, impl_->metadata_);
+}
+
+void SchemaBuilder::Reset() { impl_->Reset(); }
+
+Result<std::shared_ptr<Schema>> SchemaBuilder::Merge(
+ const std::vector<std::shared_ptr<Schema>>& schemas, ConflictPolicy policy) {
+ SchemaBuilder builder{policy};
+ RETURN_NOT_OK(builder.AddSchemas(schemas));
+ return builder.Finish();
+}
+
+Status SchemaBuilder::AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
+ ConflictPolicy policy) {
+ return Merge(schemas, policy).status();
+}
+
+std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
+ std::shared_ptr<const KeyValueMetadata> metadata) {
+ return std::make_shared<Schema>(std::move(fields), std::move(metadata));
+}
+
std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
Endianness endianness,
std::shared_ptr<const KeyValueMetadata> metadata) {
return std::make_shared<Schema>(std::move(fields), endianness, std::move(metadata));
}
-Result<std::shared_ptr<Schema>> UnifySchemas(
- const std::vector<std::shared_ptr<Schema>>& schemas,
- const Field::MergeOptions field_merge_options) {
- if (schemas.empty()) {
- return Status::Invalid("Must provide at least one schema to unify.");
- }
-
- if (!schemas[0]->HasDistinctFieldNames()) {
- return Status::Invalid("Can't unify schema with duplicate field names.");
- }
-
- SchemaBuilder builder{schemas[0], SchemaBuilder::CONFLICT_MERGE, field_merge_options};
-
- for (size_t i = 1; i < schemas.size(); i++) {
- const auto& schema = schemas[i];
- if (!schema->HasDistinctFieldNames()) {
- return Status::Invalid("Can't unify schema with duplicate field names.");
- }
- RETURN_NOT_OK(builder.AddSchema(schema));
- }
-
- return builder.Finish();
-}
-
-// ----------------------------------------------------------------------
-// Fingerprint computations
-
-namespace detail {
-
-Fingerprintable::~Fingerprintable() {
- delete fingerprint_.load();
- delete metadata_fingerprint_.load();
-}
-
-template <typename ComputeFingerprint>
-static const std::string& LoadFingerprint(std::atomic<std::string*>* fingerprint,
- ComputeFingerprint&& compute_fingerprint) {
- auto new_p = new std::string(std::forward<ComputeFingerprint>(compute_fingerprint)());
- // Since fingerprint() and metadata_fingerprint() return a *reference* to the
- // allocated string, the first allocation ever should never be replaced by another
- // one. Hence the compare_exchange_strong() against nullptr.
- std::string* expected = nullptr;
- if (fingerprint->compare_exchange_strong(expected, new_p)) {
- return *new_p;
- } else {
- delete new_p;
- DCHECK_NE(expected, nullptr);
- return *expected;
- }
-}
-
-const std::string& Fingerprintable::LoadFingerprintSlow() const {
- return LoadFingerprint(&fingerprint_, [this]() { return ComputeFingerprint(); });
-}
-
-const std::string& Fingerprintable::LoadMetadataFingerprintSlow() const {
- return LoadFingerprint(&metadata_fingerprint_,
- [this]() { return ComputeMetadataFingerprint(); });
-}
-
-} // namespace detail
-
-static inline std::string TypeIdFingerprint(const DataType& type) {
- auto c = static_cast<int>(type.id()) + 'A';
- DCHECK_GE(c, 0);
- DCHECK_LT(c, 128); // Unlikely to happen any soon
- // Prefix with an unusual character in order to disambiguate
- std::string s{'@', static_cast<char>(c)};
- return s;
-}
-
-static char TimeUnitFingerprint(TimeUnit::type unit) {
- switch (unit) {
- case TimeUnit::SECOND:
- return 's';
- case TimeUnit::MILLI:
- return 'm';
- case TimeUnit::MICRO:
- return 'u';
- case TimeUnit::NANO:
- return 'n';
- default:
- DCHECK(false) << "Unexpected TimeUnit";
- return '\0';
- }
-}
-
-static char IntervalTypeFingerprint(IntervalType::type unit) {
- switch (unit) {
- case IntervalType::DAY_TIME:
- return 'd';
- case IntervalType::MONTHS:
- return 'M';
- default:
- DCHECK(false) << "Unexpected IntervalType::type";
- return '\0';
- }
-}
-
-static void AppendMetadataFingerprint(const KeyValueMetadata& metadata,
- std::stringstream* ss) {
- // Compute metadata fingerprint. KeyValueMetadata is not immutable,
- // so we don't cache the result on the metadata instance.
- const auto pairs = metadata.sorted_pairs();
- if (!pairs.empty()) {
- *ss << "!{";
- for (const auto& p : pairs) {
- const auto& k = p.first;
- const auto& v = p.second;
- // Since metadata strings can contain arbitrary characters, prefix with
- // string length to disambiguate.
- *ss << k.length() << ':' << k << ':';
- *ss << v.length() << ':' << v << ';';
- }
- *ss << '}';
- }
-}
-
-std::string Field::ComputeFingerprint() const {
- const auto& type_fingerprint = type_->fingerprint();
- if (type_fingerprint.empty()) {
- // Underlying DataType doesn't support fingerprinting.
- return "";
- }
- std::stringstream ss;
- ss << 'F';
- if (nullable_) {
- ss << 'n';
- } else {
- ss << 'N';
- }
- ss << name_;
- ss << '{' << type_fingerprint << '}';
- return ss.str();
-}
-
-std::string Field::ComputeMetadataFingerprint() const {
- std::stringstream ss;
- if (metadata_) {
- AppendMetadataFingerprint(*metadata_, &ss);
- }
- const auto& type_fingerprint = type_->metadata_fingerprint();
- if (!type_fingerprint.empty()) {
- ss << "+{" << type_->metadata_fingerprint() << "}";
- }
- return ss.str();
-}
-
-std::string Schema::ComputeFingerprint() const {
- std::stringstream ss;
- ss << "S{";
- for (const auto& field : fields()) {
- const auto& field_fingerprint = field->fingerprint();
- if (field_fingerprint.empty()) {
- return "";
- }
- ss << field_fingerprint << ";";
- }
+Result<std::shared_ptr<Schema>> UnifySchemas(
+ const std::vector<std::shared_ptr<Schema>>& schemas,
+ const Field::MergeOptions field_merge_options) {
+ if (schemas.empty()) {
+ return Status::Invalid("Must provide at least one schema to unify.");
+ }
+
+ if (!schemas[0]->HasDistinctFieldNames()) {
+ return Status::Invalid("Can't unify schema with duplicate field names.");
+ }
+
+ SchemaBuilder builder{schemas[0], SchemaBuilder::CONFLICT_MERGE, field_merge_options};
+
+ for (size_t i = 1; i < schemas.size(); i++) {
+ const auto& schema = schemas[i];
+ if (!schema->HasDistinctFieldNames()) {
+ return Status::Invalid("Can't unify schema with duplicate field names.");
+ }
+ RETURN_NOT_OK(builder.AddSchema(schema));
+ }
+
+ return builder.Finish();
+}
+
+// ----------------------------------------------------------------------
+// Fingerprint computations
+
+namespace detail {
+
+Fingerprintable::~Fingerprintable() {
+ delete fingerprint_.load();
+ delete metadata_fingerprint_.load();
+}
+
+template <typename ComputeFingerprint>
+static const std::string& LoadFingerprint(std::atomic<std::string*>* fingerprint,
+ ComputeFingerprint&& compute_fingerprint) {
+ auto new_p = new std::string(std::forward<ComputeFingerprint>(compute_fingerprint)());
+ // Since fingerprint() and metadata_fingerprint() return a *reference* to the
+ // allocated string, the first allocation ever should never be replaced by another
+ // one. Hence the compare_exchange_strong() against nullptr.
+ std::string* expected = nullptr;
+ if (fingerprint->compare_exchange_strong(expected, new_p)) {
+ return *new_p;
+ } else {
+ delete new_p;
+ DCHECK_NE(expected, nullptr);
+ return *expected;
+ }
+}
+
+const std::string& Fingerprintable::LoadFingerprintSlow() const {
+ return LoadFingerprint(&fingerprint_, [this]() { return ComputeFingerprint(); });
+}
+
+const std::string& Fingerprintable::LoadMetadataFingerprintSlow() const {
+ return LoadFingerprint(&metadata_fingerprint_,
+ [this]() { return ComputeMetadataFingerprint(); });
+}
+
+} // namespace detail
+
+static inline std::string TypeIdFingerprint(const DataType& type) {
+ auto c = static_cast<int>(type.id()) + 'A';
+ DCHECK_GE(c, 0);
+ DCHECK_LT(c, 128); // Unlikely to happen any soon
+ // Prefix with an unusual character in order to disambiguate
+ std::string s{'@', static_cast<char>(c)};
+ return s;
+}
+
+static char TimeUnitFingerprint(TimeUnit::type unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ return 's';
+ case TimeUnit::MILLI:
+ return 'm';
+ case TimeUnit::MICRO:
+ return 'u';
+ case TimeUnit::NANO:
+ return 'n';
+ default:
+ DCHECK(false) << "Unexpected TimeUnit";
+ return '\0';
+ }
+}
+
+static char IntervalTypeFingerprint(IntervalType::type unit) {
+ switch (unit) {
+ case IntervalType::DAY_TIME:
+ return 'd';
+ case IntervalType::MONTHS:
+ return 'M';
+ default:
+ DCHECK(false) << "Unexpected IntervalType::type";
+ return '\0';
+ }
+}
+
+static void AppendMetadataFingerprint(const KeyValueMetadata& metadata,
+ std::stringstream* ss) {
+ // Compute metadata fingerprint. KeyValueMetadata is not immutable,
+ // so we don't cache the result on the metadata instance.
+ const auto pairs = metadata.sorted_pairs();
+ if (!pairs.empty()) {
+ *ss << "!{";
+ for (const auto& p : pairs) {
+ const auto& k = p.first;
+ const auto& v = p.second;
+ // Since metadata strings can contain arbitrary characters, prefix with
+ // string length to disambiguate.
+ *ss << k.length() << ':' << k << ':';
+ *ss << v.length() << ':' << v << ';';
+ }
+ *ss << '}';
+ }
+}
+
+std::string Field::ComputeFingerprint() const {
+ const auto& type_fingerprint = type_->fingerprint();
+ if (type_fingerprint.empty()) {
+ // Underlying DataType doesn't support fingerprinting.
+ return "";
+ }
+ std::stringstream ss;
+ ss << 'F';
+ if (nullable_) {
+ ss << 'n';
+ } else {
+ ss << 'N';
+ }
+ ss << name_;
+ ss << '{' << type_fingerprint << '}';
+ return ss.str();
+}
+
+std::string Field::ComputeMetadataFingerprint() const {
+ std::stringstream ss;
+ if (metadata_) {
+ AppendMetadataFingerprint(*metadata_, &ss);
+ }
+ const auto& type_fingerprint = type_->metadata_fingerprint();
+ if (!type_fingerprint.empty()) {
+ ss << "+{" << type_->metadata_fingerprint() << "}";
+ }
+ return ss.str();
+}
+
+std::string Schema::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << "S{";
+ for (const auto& field : fields()) {
+ const auto& field_fingerprint = field->fingerprint();
+ if (field_fingerprint.empty()) {
+ return "";
+ }
+ ss << field_fingerprint << ";";
+ }
ss << (endianness() == Endianness::Little ? "L" : "B");
- ss << "}";
- return ss.str();
-}
-
-std::string Schema::ComputeMetadataFingerprint() const {
- std::stringstream ss;
- if (HasMetadata()) {
- AppendMetadataFingerprint(*metadata(), &ss);
- }
- ss << "S{";
- for (const auto& field : fields()) {
- const auto& field_fingerprint = field->metadata_fingerprint();
- ss << field_fingerprint << ";";
- }
- ss << "}";
- return ss.str();
-}
-
-void PrintTo(const Schema& s, std::ostream* os) { *os << s; }
-
-std::string DataType::ComputeFingerprint() const {
- // Default implementation returns empty string, signalling non-implemented
- // functionality.
- return "";
-}
-
-std::string DataType::ComputeMetadataFingerprint() const {
- // Whatever the data type, metadata can only be found on child fields
- std::string s;
- for (const auto& child : children_) {
- s += child->metadata_fingerprint() + ";";
- }
- return s;
-}
-
-#define PARAMETER_LESS_FINGERPRINT(TYPE_CLASS) \
- std::string TYPE_CLASS##Type::ComputeFingerprint() const { \
- return TypeIdFingerprint(*this); \
- }
-
-PARAMETER_LESS_FINGERPRINT(Null)
-PARAMETER_LESS_FINGERPRINT(Boolean)
-PARAMETER_LESS_FINGERPRINT(Int8)
-PARAMETER_LESS_FINGERPRINT(Int16)
-PARAMETER_LESS_FINGERPRINT(Int32)
-PARAMETER_LESS_FINGERPRINT(Int64)
-PARAMETER_LESS_FINGERPRINT(UInt8)
-PARAMETER_LESS_FINGERPRINT(UInt16)
-PARAMETER_LESS_FINGERPRINT(UInt32)
-PARAMETER_LESS_FINGERPRINT(UInt64)
-PARAMETER_LESS_FINGERPRINT(HalfFloat)
-PARAMETER_LESS_FINGERPRINT(Float)
-PARAMETER_LESS_FINGERPRINT(Double)
-PARAMETER_LESS_FINGERPRINT(Binary)
-PARAMETER_LESS_FINGERPRINT(LargeBinary)
-PARAMETER_LESS_FINGERPRINT(String)
-PARAMETER_LESS_FINGERPRINT(LargeString)
-PARAMETER_LESS_FINGERPRINT(Date32)
-PARAMETER_LESS_FINGERPRINT(Date64)
-
-#undef PARAMETER_LESS_FINGERPRINT
-
-std::string DictionaryType::ComputeFingerprint() const {
- const auto& index_fingerprint = index_type_->fingerprint();
- const auto& value_fingerprint = value_type_->fingerprint();
- std::string ordered_fingerprint = ordered_ ? "1" : "0";
-
- DCHECK(!index_fingerprint.empty()); // it's an integer type
- if (!value_fingerprint.empty()) {
- return TypeIdFingerprint(*this) + index_fingerprint + value_fingerprint +
- ordered_fingerprint;
- }
- return ordered_fingerprint;
-}
-
-std::string ListType::ComputeFingerprint() const {
- const auto& child_fingerprint = children_[0]->fingerprint();
- if (!child_fingerprint.empty()) {
- return TypeIdFingerprint(*this) + "{" + child_fingerprint + "}";
- }
- return "";
-}
-
-std::string LargeListType::ComputeFingerprint() const {
- const auto& child_fingerprint = children_[0]->fingerprint();
- if (!child_fingerprint.empty()) {
- return TypeIdFingerprint(*this) + "{" + child_fingerprint + "}";
- }
- return "";
-}
-
-std::string MapType::ComputeFingerprint() const {
- const auto& key_fingerprint = key_type()->fingerprint();
- const auto& item_fingerprint = item_type()->fingerprint();
- if (!key_fingerprint.empty() && !item_fingerprint.empty()) {
- if (keys_sorted_) {
- return TypeIdFingerprint(*this) + "s{" + key_fingerprint + item_fingerprint + "}";
- } else {
- return TypeIdFingerprint(*this) + "{" + key_fingerprint + item_fingerprint + "}";
- }
- }
- return "";
-}
-
-std::string FixedSizeListType::ComputeFingerprint() const {
- const auto& child_fingerprint = children_[0]->fingerprint();
- if (!child_fingerprint.empty()) {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << "[" << list_size_ << "]"
- << "{" << child_fingerprint << "}";
- return ss.str();
- }
- return "";
-}
-
-std::string FixedSizeBinaryType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << "[" << byte_width_ << "]";
- return ss.str();
-}
-
-std::string DecimalType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << "[" << byte_width_ << "," << precision_ << ","
- << scale_ << "]";
- return ss.str();
-}
-
-std::string StructType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << "{";
- for (const auto& child : children_) {
- const auto& child_fingerprint = child->fingerprint();
- if (child_fingerprint.empty()) {
- return "";
- }
- ss << child_fingerprint << ";";
- }
- ss << "}";
- return ss.str();
-}
-
-std::string UnionType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this);
- switch (mode()) {
- case UnionMode::SPARSE:
- ss << "[s";
- break;
- case UnionMode::DENSE:
- ss << "[d";
- break;
- default:
- DCHECK(false) << "Unexpected UnionMode";
- }
- for (const auto code : type_codes_) {
- // Represent code as integer, not raw character
- ss << ':' << static_cast<int32_t>(code);
- }
- ss << "]{";
- for (const auto& child : children_) {
- const auto& child_fingerprint = child->fingerprint();
- if (child_fingerprint.empty()) {
- return "";
- }
- ss << child_fingerprint << ";";
- }
- ss << "}";
- return ss.str();
-}
-
-std::string TimeType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_);
- return ss.str();
-}
-
-std::string TimestampType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_) << timezone_.length()
- << ':' << timezone_;
- return ss.str();
-}
-
-std::string IntervalType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << IntervalTypeFingerprint(interval_type());
- return ss.str();
-}
-
-std::string DurationType::ComputeFingerprint() const {
- std::stringstream ss;
- ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_);
- return ss.str();
-}
-
-// ----------------------------------------------------------------------
-// Visitors and factory functions
-
-Status DataType::Accept(TypeVisitor* visitor) const {
- return VisitTypeInline(*this, visitor);
-}
-
-#define TYPE_FACTORY(NAME, KLASS) \
- std::shared_ptr<DataType> NAME() { \
- static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
- return result; \
- }
-
-TYPE_FACTORY(null, NullType)
-TYPE_FACTORY(boolean, BooleanType)
-TYPE_FACTORY(int8, Int8Type)
-TYPE_FACTORY(uint8, UInt8Type)
-TYPE_FACTORY(int16, Int16Type)
-TYPE_FACTORY(uint16, UInt16Type)
-TYPE_FACTORY(int32, Int32Type)
-TYPE_FACTORY(uint32, UInt32Type)
-TYPE_FACTORY(int64, Int64Type)
-TYPE_FACTORY(uint64, UInt64Type)
-TYPE_FACTORY(float16, HalfFloatType)
-TYPE_FACTORY(float32, FloatType)
-TYPE_FACTORY(float64, DoubleType)
-TYPE_FACTORY(utf8, StringType)
-TYPE_FACTORY(large_utf8, LargeStringType)
-TYPE_FACTORY(binary, BinaryType)
-TYPE_FACTORY(large_binary, LargeBinaryType)
-TYPE_FACTORY(date64, Date64Type)
-TYPE_FACTORY(date32, Date32Type)
-
-std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width) {
- return std::make_shared<FixedSizeBinaryType>(byte_width);
-}
-
-std::shared_ptr<DataType> duration(TimeUnit::type unit) {
- return std::make_shared<DurationType>(unit);
-}
-
-std::shared_ptr<DataType> day_time_interval() {
- return std::make_shared<DayTimeIntervalType>();
-}
-
-std::shared_ptr<DataType> month_interval() {
- return std::make_shared<MonthIntervalType>();
-}
-
-std::shared_ptr<DataType> timestamp(TimeUnit::type unit) {
- return std::make_shared<TimestampType>(unit);
-}
-
-std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone) {
- return std::make_shared<TimestampType>(unit, timezone);
-}
-
-std::shared_ptr<DataType> time32(TimeUnit::type unit) {
- return std::make_shared<Time32Type>(unit);
-}
-
-std::shared_ptr<DataType> time64(TimeUnit::type unit) {
- return std::make_shared<Time64Type>(unit);
-}
-
-std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
- return std::make_shared<ListType>(value_type);
-}
-
-std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_field) {
- return std::make_shared<ListType>(value_field);
-}
-
-std::shared_ptr<DataType> large_list(const std::shared_ptr<DataType>& value_type) {
- return std::make_shared<LargeListType>(value_type);
-}
-
-std::shared_ptr<DataType> large_list(const std::shared_ptr<Field>& value_field) {
- return std::make_shared<LargeListType>(value_field);
-}
-
-std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
- std::shared_ptr<DataType> item_type, bool keys_sorted) {
- return std::make_shared<MapType>(std::move(key_type), std::move(item_type),
- keys_sorted);
-}
-
-std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
- std::shared_ptr<Field> item_field, bool keys_sorted) {
- return std::make_shared<MapType>(std::move(key_type), std::move(item_field),
- keys_sorted);
-}
-
-std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<DataType>& value_type,
- int32_t list_size) {
- return std::make_shared<FixedSizeListType>(value_type, list_size);
-}
-
-std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<Field>& value_field,
- int32_t list_size) {
- return std::make_shared<FixedSizeListType>(value_field, list_size);
-}
-
-std::shared_ptr<DataType> struct_(const std::vector<std::shared_ptr<Field>>& fields) {
- return std::make_shared<StructType>(fields);
-}
-
-std::shared_ptr<DataType> sparse_union(FieldVector child_fields,
- std::vector<int8_t> type_codes) {
- if (type_codes.empty()) {
- type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
- }
- return std::make_shared<SparseUnionType>(std::move(child_fields),
- std::move(type_codes));
-}
-std::shared_ptr<DataType> dense_union(FieldVector child_fields,
- std::vector<int8_t> type_codes) {
- if (type_codes.empty()) {
- type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
- }
- return std::make_shared<DenseUnionType>(std::move(child_fields), std::move(type_codes));
-}
-
-FieldVector FieldsFromArraysAndNames(std::vector<std::string> names,
- const ArrayVector& arrays) {
- FieldVector fields(arrays.size());
- int i = 0;
- if (names.empty()) {
- for (const auto& array : arrays) {
- fields[i] = field(std::to_string(i), array->type());
- ++i;
- }
- } else {
- DCHECK_EQ(names.size(), arrays.size());
- for (const auto& array : arrays) {
- fields[i] = field(std::move(names[i]), array->type());
- ++i;
- }
- }
- return fields;
-}
-
-std::shared_ptr<DataType> sparse_union(const ArrayVector& children,
- std::vector<std::string> field_names,
- std::vector<int8_t> type_codes) {
- if (type_codes.empty()) {
- type_codes = internal::Iota(static_cast<int8_t>(children.size()));
- }
- auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
- return sparse_union(std::move(fields), std::move(type_codes));
-}
-
-std::shared_ptr<DataType> dense_union(const ArrayVector& children,
- std::vector<std::string> field_names,
- std::vector<int8_t> type_codes) {
- if (type_codes.empty()) {
- type_codes = internal::Iota(static_cast<int8_t>(children.size()));
- }
- auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
- return dense_union(std::move(fields), std::move(type_codes));
-}
-
-std::shared_ptr<DataType> dictionary(const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& dict_type,
- bool ordered) {
- return std::make_shared<DictionaryType>(index_type, dict_type, ordered);
-}
-
-std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type,
- bool nullable,
- std::shared_ptr<const KeyValueMetadata> metadata) {
- return std::make_shared<Field>(std::move(name), std::move(type), nullable,
- std::move(metadata));
-}
-
+ ss << "}";
+ return ss.str();
+}
+
+std::string Schema::ComputeMetadataFingerprint() const {
+ std::stringstream ss;
+ if (HasMetadata()) {
+ AppendMetadataFingerprint(*metadata(), &ss);
+ }
+ ss << "S{";
+ for (const auto& field : fields()) {
+ const auto& field_fingerprint = field->metadata_fingerprint();
+ ss << field_fingerprint << ";";
+ }
+ ss << "}";
+ return ss.str();
+}
+
+void PrintTo(const Schema& s, std::ostream* os) { *os << s; }
+
+std::string DataType::ComputeFingerprint() const {
+ // Default implementation returns empty string, signalling non-implemented
+ // functionality.
+ return "";
+}
+
+std::string DataType::ComputeMetadataFingerprint() const {
+ // Whatever the data type, metadata can only be found on child fields
+ std::string s;
+ for (const auto& child : children_) {
+ s += child->metadata_fingerprint() + ";";
+ }
+ return s;
+}
+
+#define PARAMETER_LESS_FINGERPRINT(TYPE_CLASS) \
+ std::string TYPE_CLASS##Type::ComputeFingerprint() const { \
+ return TypeIdFingerprint(*this); \
+ }
+
+PARAMETER_LESS_FINGERPRINT(Null)
+PARAMETER_LESS_FINGERPRINT(Boolean)
+PARAMETER_LESS_FINGERPRINT(Int8)
+PARAMETER_LESS_FINGERPRINT(Int16)
+PARAMETER_LESS_FINGERPRINT(Int32)
+PARAMETER_LESS_FINGERPRINT(Int64)
+PARAMETER_LESS_FINGERPRINT(UInt8)
+PARAMETER_LESS_FINGERPRINT(UInt16)
+PARAMETER_LESS_FINGERPRINT(UInt32)
+PARAMETER_LESS_FINGERPRINT(UInt64)
+PARAMETER_LESS_FINGERPRINT(HalfFloat)
+PARAMETER_LESS_FINGERPRINT(Float)
+PARAMETER_LESS_FINGERPRINT(Double)
+PARAMETER_LESS_FINGERPRINT(Binary)
+PARAMETER_LESS_FINGERPRINT(LargeBinary)
+PARAMETER_LESS_FINGERPRINT(String)
+PARAMETER_LESS_FINGERPRINT(LargeString)
+PARAMETER_LESS_FINGERPRINT(Date32)
+PARAMETER_LESS_FINGERPRINT(Date64)
+
+#undef PARAMETER_LESS_FINGERPRINT
+
+std::string DictionaryType::ComputeFingerprint() const {
+ const auto& index_fingerprint = index_type_->fingerprint();
+ const auto& value_fingerprint = value_type_->fingerprint();
+ std::string ordered_fingerprint = ordered_ ? "1" : "0";
+
+ DCHECK(!index_fingerprint.empty()); // it's an integer type
+ if (!value_fingerprint.empty()) {
+ return TypeIdFingerprint(*this) + index_fingerprint + value_fingerprint +
+ ordered_fingerprint;
+ }
+ return ordered_fingerprint;
+}
+
+std::string ListType::ComputeFingerprint() const {
+ const auto& child_fingerprint = children_[0]->fingerprint();
+ if (!child_fingerprint.empty()) {
+ return TypeIdFingerprint(*this) + "{" + child_fingerprint + "}";
+ }
+ return "";
+}
+
+std::string LargeListType::ComputeFingerprint() const {
+ const auto& child_fingerprint = children_[0]->fingerprint();
+ if (!child_fingerprint.empty()) {
+ return TypeIdFingerprint(*this) + "{" + child_fingerprint + "}";
+ }
+ return "";
+}
+
+std::string MapType::ComputeFingerprint() const {
+ const auto& key_fingerprint = key_type()->fingerprint();
+ const auto& item_fingerprint = item_type()->fingerprint();
+ if (!key_fingerprint.empty() && !item_fingerprint.empty()) {
+ if (keys_sorted_) {
+ return TypeIdFingerprint(*this) + "s{" + key_fingerprint + item_fingerprint + "}";
+ } else {
+ return TypeIdFingerprint(*this) + "{" + key_fingerprint + item_fingerprint + "}";
+ }
+ }
+ return "";
+}
+
+std::string FixedSizeListType::ComputeFingerprint() const {
+ const auto& child_fingerprint = children_[0]->fingerprint();
+ if (!child_fingerprint.empty()) {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << "[" << list_size_ << "]"
+ << "{" << child_fingerprint << "}";
+ return ss.str();
+ }
+ return "";
+}
+
+std::string FixedSizeBinaryType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << "[" << byte_width_ << "]";
+ return ss.str();
+}
+
+std::string DecimalType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << "[" << byte_width_ << "," << precision_ << ","
+ << scale_ << "]";
+ return ss.str();
+}
+
+std::string StructType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << "{";
+ for (const auto& child : children_) {
+ const auto& child_fingerprint = child->fingerprint();
+ if (child_fingerprint.empty()) {
+ return "";
+ }
+ ss << child_fingerprint << ";";
+ }
+ ss << "}";
+ return ss.str();
+}
+
+std::string UnionType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this);
+ switch (mode()) {
+ case UnionMode::SPARSE:
+ ss << "[s";
+ break;
+ case UnionMode::DENSE:
+ ss << "[d";
+ break;
+ default:
+ DCHECK(false) << "Unexpected UnionMode";
+ }
+ for (const auto code : type_codes_) {
+ // Represent code as integer, not raw character
+ ss << ':' << static_cast<int32_t>(code);
+ }
+ ss << "]{";
+ for (const auto& child : children_) {
+ const auto& child_fingerprint = child->fingerprint();
+ if (child_fingerprint.empty()) {
+ return "";
+ }
+ ss << child_fingerprint << ";";
+ }
+ ss << "}";
+ return ss.str();
+}
+
+std::string TimeType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_);
+ return ss.str();
+}
+
+std::string TimestampType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_) << timezone_.length()
+ << ':' << timezone_;
+ return ss.str();
+}
+
+std::string IntervalType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << IntervalTypeFingerprint(interval_type());
+ return ss.str();
+}
+
+std::string DurationType::ComputeFingerprint() const {
+ std::stringstream ss;
+ ss << TypeIdFingerprint(*this) << TimeUnitFingerprint(unit_);
+ return ss.str();
+}
+
+// ----------------------------------------------------------------------
+// Visitors and factory functions
+
+Status DataType::Accept(TypeVisitor* visitor) const {
+ return VisitTypeInline(*this, visitor);
+}
+
+#define TYPE_FACTORY(NAME, KLASS) \
+ std::shared_ptr<DataType> NAME() { \
+ static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
+ return result; \
+ }
+
+TYPE_FACTORY(null, NullType)
+TYPE_FACTORY(boolean, BooleanType)
+TYPE_FACTORY(int8, Int8Type)
+TYPE_FACTORY(uint8, UInt8Type)
+TYPE_FACTORY(int16, Int16Type)
+TYPE_FACTORY(uint16, UInt16Type)
+TYPE_FACTORY(int32, Int32Type)
+TYPE_FACTORY(uint32, UInt32Type)
+TYPE_FACTORY(int64, Int64Type)
+TYPE_FACTORY(uint64, UInt64Type)
+TYPE_FACTORY(float16, HalfFloatType)
+TYPE_FACTORY(float32, FloatType)
+TYPE_FACTORY(float64, DoubleType)
+TYPE_FACTORY(utf8, StringType)
+TYPE_FACTORY(large_utf8, LargeStringType)
+TYPE_FACTORY(binary, BinaryType)
+TYPE_FACTORY(large_binary, LargeBinaryType)
+TYPE_FACTORY(date64, Date64Type)
+TYPE_FACTORY(date32, Date32Type)
+
+std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width) {
+ return std::make_shared<FixedSizeBinaryType>(byte_width);
+}
+
+std::shared_ptr<DataType> duration(TimeUnit::type unit) {
+ return std::make_shared<DurationType>(unit);
+}
+
+std::shared_ptr<DataType> day_time_interval() {
+ return std::make_shared<DayTimeIntervalType>();
+}
+
+std::shared_ptr<DataType> month_interval() {
+ return std::make_shared<MonthIntervalType>();
+}
+
+std::shared_ptr<DataType> timestamp(TimeUnit::type unit) {
+ return std::make_shared<TimestampType>(unit);
+}
+
+std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone) {
+ return std::make_shared<TimestampType>(unit, timezone);
+}
+
+std::shared_ptr<DataType> time32(TimeUnit::type unit) {
+ return std::make_shared<Time32Type>(unit);
+}
+
+std::shared_ptr<DataType> time64(TimeUnit::type unit) {
+ return std::make_shared<Time64Type>(unit);
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
+ return std::make_shared<ListType>(value_type);
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_field) {
+ return std::make_shared<ListType>(value_field);
+}
+
+std::shared_ptr<DataType> large_list(const std::shared_ptr<DataType>& value_type) {
+ return std::make_shared<LargeListType>(value_type);
+}
+
+std::shared_ptr<DataType> large_list(const std::shared_ptr<Field>& value_field) {
+ return std::make_shared<LargeListType>(value_field);
+}
+
+std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
+ std::shared_ptr<DataType> item_type, bool keys_sorted) {
+ return std::make_shared<MapType>(std::move(key_type), std::move(item_type),
+ keys_sorted);
+}
+
+std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
+ std::shared_ptr<Field> item_field, bool keys_sorted) {
+ return std::make_shared<MapType>(std::move(key_type), std::move(item_field),
+ keys_sorted);
+}
+
+std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<DataType>& value_type,
+ int32_t list_size) {
+ return std::make_shared<FixedSizeListType>(value_type, list_size);
+}
+
+std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<Field>& value_field,
+ int32_t list_size) {
+ return std::make_shared<FixedSizeListType>(value_field, list_size);
+}
+
+std::shared_ptr<DataType> struct_(const std::vector<std::shared_ptr<Field>>& fields) {
+ return std::make_shared<StructType>(fields);
+}
+
+std::shared_ptr<DataType> sparse_union(FieldVector child_fields,
+ std::vector<int8_t> type_codes) {
+ if (type_codes.empty()) {
+ type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
+ }
+ return std::make_shared<SparseUnionType>(std::move(child_fields),
+ std::move(type_codes));
+}
+std::shared_ptr<DataType> dense_union(FieldVector child_fields,
+ std::vector<int8_t> type_codes) {
+ if (type_codes.empty()) {
+ type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
+ }
+ return std::make_shared<DenseUnionType>(std::move(child_fields), std::move(type_codes));
+}
+
+FieldVector FieldsFromArraysAndNames(std::vector<std::string> names,
+ const ArrayVector& arrays) {
+ FieldVector fields(arrays.size());
+ int i = 0;
+ if (names.empty()) {
+ for (const auto& array : arrays) {
+ fields[i] = field(std::to_string(i), array->type());
+ ++i;
+ }
+ } else {
+ DCHECK_EQ(names.size(), arrays.size());
+ for (const auto& array : arrays) {
+ fields[i] = field(std::move(names[i]), array->type());
+ ++i;
+ }
+ }
+ return fields;
+}
+
+std::shared_ptr<DataType> sparse_union(const ArrayVector& children,
+ std::vector<std::string> field_names,
+ std::vector<int8_t> type_codes) {
+ if (type_codes.empty()) {
+ type_codes = internal::Iota(static_cast<int8_t>(children.size()));
+ }
+ auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
+ return sparse_union(std::move(fields), std::move(type_codes));
+}
+
+std::shared_ptr<DataType> dense_union(const ArrayVector& children,
+ std::vector<std::string> field_names,
+ std::vector<int8_t> type_codes) {
+ if (type_codes.empty()) {
+ type_codes = internal::Iota(static_cast<int8_t>(children.size()));
+ }
+ auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
+ return dense_union(std::move(fields), std::move(type_codes));
+}
+
+std::shared_ptr<DataType> dictionary(const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& dict_type,
+ bool ordered) {
+ return std::make_shared<DictionaryType>(index_type, dict_type, ordered);
+}
+
+std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type,
+ bool nullable,
+ std::shared_ptr<const KeyValueMetadata> metadata) {
+ return std::make_shared<Field>(std::move(name), std::move(type), nullable,
+ std::move(metadata));
+}
+
std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type,
std::shared_ptr<const KeyValueMetadata> metadata) {
return std::make_shared<Field>(std::move(name), std::move(type), /*nullable=*/true,
std::move(metadata));
}
-std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale) {
+std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale) {
return precision <= Decimal128Type::kMaxPrecision ? decimal128(precision, scale)
: decimal256(precision, scale);
}
std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale) {
- return std::make_shared<Decimal128Type>(precision, scale);
-}
-
+ return std::make_shared<Decimal128Type>(precision, scale);
+}
+
std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale) {
return std::make_shared<Decimal256Type>(precision, scale);
}
-std::string Decimal128Type::ToString() const {
- std::stringstream s;
+std::string Decimal128Type::ToString() const {
+ std::stringstream s;
s << "decimal128(" << precision_ << ", " << scale_ << ")";
- return s.str();
-}
-
+ return s.str();
+}
+
std::string Decimal256Type::ToString() const {
std::stringstream s;
s << "decimal256(" << precision_ << ", " << scale_ << ")";
return s.str();
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type.h b/contrib/libs/apache/arrow/cpp/src/arrow/type.h
index b933da66089..a3ecf6145aa 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type.h
@@ -1,187 +1,187 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic>
-#include <climits>
-#include <cstdint>
-#include <iosfwd>
-#include <limits>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/type_fwd.h" // IWYU pragma: export
-#include "arrow/util/checked_cast.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <climits>
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/type_fwd.h" // IWYU pragma: export
+#include "arrow/util/checked_cast.h"
#include "arrow/util/endian.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/variant.h"
-#include "arrow/util/visibility.h"
-#include "arrow/visitor.h" // IWYU pragma: keep
-
-namespace arrow {
-namespace detail {
-
-class ARROW_EXPORT Fingerprintable {
- public:
- virtual ~Fingerprintable();
-
- const std::string& fingerprint() const {
- auto p = fingerprint_.load();
- if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
- return *p;
- }
- return LoadFingerprintSlow();
- }
-
- const std::string& metadata_fingerprint() const {
- auto p = metadata_fingerprint_.load();
- if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
- return *p;
- }
- return LoadMetadataFingerprintSlow();
- }
-
- protected:
- const std::string& LoadFingerprintSlow() const;
- const std::string& LoadMetadataFingerprintSlow() const;
-
- virtual std::string ComputeFingerprint() const = 0;
- virtual std::string ComputeMetadataFingerprint() const = 0;
-
- mutable std::atomic<std::string*> fingerprint_;
- mutable std::atomic<std::string*> metadata_fingerprint_;
-};
-
-} // namespace detail
-
-/// EXPERIMENTAL: Layout specification for a data type
-struct ARROW_EXPORT DataTypeLayout {
- enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };
-
- /// Layout specification for a single data type buffer
- struct BufferSpec {
- BufferKind kind;
- int64_t byte_width; // For FIXED_WIDTH
-
- bool operator==(const BufferSpec& other) const {
- return kind == other.kind &&
- (kind != FIXED_WIDTH || byte_width == other.byte_width);
- }
- bool operator!=(const BufferSpec& other) const { return !(*this == other); }
- };
-
- static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
- static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
- static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
- static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }
-
- /// A vector of buffer layout specifications, one for each expected buffer
- std::vector<BufferSpec> buffers;
- /// Whether this type expects an associated dictionary array.
- bool has_dictionary = false;
-
- explicit DataTypeLayout(std::vector<BufferSpec> v) : buffers(std::move(v)) {}
-};
-
-/// \brief Base class for all data types
-///
-/// Data types in this library are all *logical*. They can be expressed as
-/// either a primitive physical type (bytes or bits of some fixed size), a
-/// nested type consisting of other data types, or another data type (e.g. a
-/// timestamp encoded as an int64).
-///
-/// Simple datatypes may be entirely described by their Type::type id, but
-/// complex datatypes are usually parametric.
-class ARROW_EXPORT DataType : public detail::Fingerprintable {
- public:
- explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
- ~DataType() override;
-
- /// \brief Return whether the types are equal
- ///
- /// Types that are logically convertible from one to another (e.g. List<UInt8>
- /// and Binary) are NOT equal.
- bool Equals(const DataType& other, bool check_metadata = false) const;
-
- /// \brief Return whether the types are equal
- bool Equals(const std::shared_ptr<DataType>& other) const;
-
- ARROW_DEPRECATED("Use field(i)")
- const std::shared_ptr<Field>& child(int i) const { return field(i); }
-
+#include "arrow/util/macros.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/visibility.h"
+#include "arrow/visitor.h" // IWYU pragma: keep
+
+namespace arrow {
+namespace detail {
+
+class ARROW_EXPORT Fingerprintable {
+ public:
+ virtual ~Fingerprintable();
+
+ const std::string& fingerprint() const {
+ auto p = fingerprint_.load();
+ if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
+ return *p;
+ }
+ return LoadFingerprintSlow();
+ }
+
+ const std::string& metadata_fingerprint() const {
+ auto p = metadata_fingerprint_.load();
+ if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
+ return *p;
+ }
+ return LoadMetadataFingerprintSlow();
+ }
+
+ protected:
+ const std::string& LoadFingerprintSlow() const;
+ const std::string& LoadMetadataFingerprintSlow() const;
+
+ virtual std::string ComputeFingerprint() const = 0;
+ virtual std::string ComputeMetadataFingerprint() const = 0;
+
+ mutable std::atomic<std::string*> fingerprint_;
+ mutable std::atomic<std::string*> metadata_fingerprint_;
+};
+
+} // namespace detail
+
+/// EXPERIMENTAL: Layout specification for a data type
+struct ARROW_EXPORT DataTypeLayout {
+ enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };
+
+ /// Layout specification for a single data type buffer
+ struct BufferSpec {
+ BufferKind kind;
+ int64_t byte_width; // For FIXED_WIDTH
+
+ bool operator==(const BufferSpec& other) const {
+ return kind == other.kind &&
+ (kind != FIXED_WIDTH || byte_width == other.byte_width);
+ }
+ bool operator!=(const BufferSpec& other) const { return !(*this == other); }
+ };
+
+ static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
+ static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
+ static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
+ static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }
+
+ /// A vector of buffer layout specifications, one for each expected buffer
+ std::vector<BufferSpec> buffers;
+ /// Whether this type expects an associated dictionary array.
+ bool has_dictionary = false;
+
+ explicit DataTypeLayout(std::vector<BufferSpec> v) : buffers(std::move(v)) {}
+};
+
+/// \brief Base class for all data types
+///
+/// Data types in this library are all *logical*. They can be expressed as
+/// either a primitive physical type (bytes or bits of some fixed size), a
+/// nested type consisting of other data types, or another data type (e.g. a
+/// timestamp encoded as an int64).
+///
+/// Simple datatypes may be entirely described by their Type::type id, but
+/// complex datatypes are usually parametric.
+class ARROW_EXPORT DataType : public detail::Fingerprintable {
+ public:
+ explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
+ ~DataType() override;
+
+ /// \brief Return whether the types are equal
+ ///
+ /// Types that are logically convertible from one to another (e.g. List<UInt8>
+ /// and Binary) are NOT equal.
+ bool Equals(const DataType& other, bool check_metadata = false) const;
+
+ /// \brief Return whether the types are equal
+ bool Equals(const std::shared_ptr<DataType>& other) const;
+
+ ARROW_DEPRECATED("Use field(i)")
+ const std::shared_ptr<Field>& child(int i) const { return field(i); }
+
/// Returns the child-field at index i.
- const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
-
- ARROW_DEPRECATED("Use fields()")
- const std::vector<std::shared_ptr<Field>>& children() const { return fields(); }
-
- /// \brief Returns the children fields associated with this type.
- const std::vector<std::shared_ptr<Field>>& fields() const { return children_; }
-
- ARROW_DEPRECATED("Use num_fields()")
- int num_children() const { return num_fields(); }
-
- /// \brief Returns the number of children fields associated with this type.
- int num_fields() const { return static_cast<int>(children_.size()); }
-
- Status Accept(TypeVisitor* visitor) const;
-
- /// \brief A string representation of the type, including any children
- virtual std::string ToString() const = 0;
-
- /// \brief Return hash value (excluding metadata in child fields)
- size_t Hash() const;
-
- /// \brief A string name of the type, omitting any child fields
- ///
- /// \note Experimental API
- /// \since 0.7.0
- virtual std::string name() const = 0;
-
- /// \brief Return the data type layout. Children are not included.
- ///
- /// \note Experimental API
- virtual DataTypeLayout layout() const = 0;
-
- /// \brief Return the type category
- Type::type id() const { return id_; }
-
- protected:
- // Dummy version that returns a null string (indicating not implemented).
- // Subclasses should override for fast equality checks.
- std::string ComputeFingerprint() const override;
-
- // Generic versions that works for all regular types, nested or not.
- std::string ComputeMetadataFingerprint() const override;
-
- Type::type id_;
- std::vector<std::shared_ptr<Field>> children_;
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
-};
-
-ARROW_EXPORT
-std::ostream& operator<<(std::ostream& os, const DataType& type);
-
+ const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
+
+ ARROW_DEPRECATED("Use fields()")
+ const std::vector<std::shared_ptr<Field>>& children() const { return fields(); }
+
+ /// \brief Returns the children fields associated with this type.
+ const std::vector<std::shared_ptr<Field>>& fields() const { return children_; }
+
+ ARROW_DEPRECATED("Use num_fields()")
+ int num_children() const { return num_fields(); }
+
+ /// \brief Returns the number of children fields associated with this type.
+ int num_fields() const { return static_cast<int>(children_.size()); }
+
+ Status Accept(TypeVisitor* visitor) const;
+
+ /// \brief A string representation of the type, including any children
+ virtual std::string ToString() const = 0;
+
+ /// \brief Return hash value (excluding metadata in child fields)
+ size_t Hash() const;
+
+ /// \brief A string name of the type, omitting any child fields
+ ///
+ /// \note Experimental API
+ /// \since 0.7.0
+ virtual std::string name() const = 0;
+
+ /// \brief Return the data type layout. Children are not included.
+ ///
+ /// \note Experimental API
+ virtual DataTypeLayout layout() const = 0;
+
+ /// \brief Return the type category
+ Type::type id() const { return id_; }
+
+ protected:
+ // Dummy version that returns a null string (indicating not implemented).
+ // Subclasses should override for fast equality checks.
+ std::string ComputeFingerprint() const override;
+
+ // Generic versions that works for all regular types, nested or not.
+ std::string ComputeMetadataFingerprint() const override;
+
+ Type::type id_;
+ std::vector<std::shared_ptr<Field>> children_;
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
+};
+
+ARROW_EXPORT
+std::ostream& operator<<(std::ostream& os, const DataType& type);
+
/// \brief Return the compatible physical data type
///
/// Some types may have distinct logical meanings but the exact same physical
@@ -194,712 +194,712 @@ std::ostream& operator<<(std::ostream& os, const DataType& type);
/// - otherwise, return the input type itself.
std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);
-/// \brief Base class for all fixed-width data types
-class ARROW_EXPORT FixedWidthType : public DataType {
- public:
- using DataType::DataType;
-
- virtual int bit_width() const = 0;
-};
-
-/// \brief Base class for all data types representing primitive values
-class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
- public:
- using FixedWidthType::FixedWidthType;
-};
-
-/// \brief Base class for all numeric data types
-class ARROW_EXPORT NumberType : public PrimitiveCType {
- public:
- using PrimitiveCType::PrimitiveCType;
-};
-
-/// \brief Base class for all integral data types
-class ARROW_EXPORT IntegerType : public NumberType {
- public:
- using NumberType::NumberType;
- virtual bool is_signed() const = 0;
-};
-
-/// \brief Base class for all floating-point data types
-class ARROW_EXPORT FloatingPointType : public NumberType {
- public:
- using NumberType::NumberType;
- enum Precision { HALF, SINGLE, DOUBLE };
- virtual Precision precision() const = 0;
-};
-
-/// \brief Base class for all parametric data types
-class ParametricType {};
-
-class ARROW_EXPORT NestedType : public DataType, public ParametricType {
- public:
- using DataType::DataType;
-};
-
-/// \brief The combination of a field name and data type, with optional metadata
-///
-/// Fields are used to describe the individual constituents of a
-/// nested DataType or a Schema.
-///
-/// A field's metadata is represented by a KeyValueMetadata instance,
-/// which holds arbitrary key-value pairs.
-class ARROW_EXPORT Field : public detail::Fingerprintable {
- public:
- Field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
- std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR)
- : detail::Fingerprintable(),
- name_(std::move(name)),
- type_(std::move(type)),
- nullable_(nullable),
- metadata_(std::move(metadata)) {}
-
- ~Field() override;
-
- /// \brief Return the field's attached metadata
- std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
-
- /// \brief Return whether the field has non-empty metadata
- bool HasMetadata() const;
-
- /// \brief Return a copy of this field with the given metadata attached to it
- std::shared_ptr<Field> WithMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const;
-
- /// \brief EXPERIMENTAL: Return a copy of this field with the given metadata
- /// merged with existing metadata (any colliding keys will be overridden by
- /// the passed metadata)
- std::shared_ptr<Field> WithMergedMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const;
-
- /// \brief Return a copy of this field without any metadata attached to it
- std::shared_ptr<Field> RemoveMetadata() const;
-
- /// \brief Return a copy of this field with the replaced type.
- std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;
-
- /// \brief Return a copy of this field with the replaced name.
- std::shared_ptr<Field> WithName(const std::string& name) const;
-
- /// \brief Return a copy of this field with the replaced nullability.
- std::shared_ptr<Field> WithNullable(bool nullable) const;
-
- /// \brief Options that control the behavior of `MergeWith`.
- /// Options are to be added to allow type conversions, including integer
- /// widening, promotion from integer to float, or conversion to or from boolean.
- struct MergeOptions {
- /// If true, a Field of NullType can be unified with a Field of another type.
- /// The unified field will be of the other type and become nullable.
- /// Nullability will be promoted to the looser option (nullable if one is not
- /// nullable).
- bool promote_nullability = true;
-
- static MergeOptions Defaults() { return MergeOptions(); }
- };
-
- /// \brief Merge the current field with a field of the same name.
- ///
- /// The two fields must be compatible, i.e:
- /// - have the same name
- /// - have the same type, or of compatible types according to `options`.
- ///
- /// The metadata of the current field is preserved; the metadata of the other
- /// field is discarded.
- Result<std::shared_ptr<Field>> MergeWith(
- const Field& other, MergeOptions options = MergeOptions::Defaults()) const;
- Result<std::shared_ptr<Field>> MergeWith(
- const std::shared_ptr<Field>& other,
- MergeOptions options = MergeOptions::Defaults()) const;
-
- std::vector<std::shared_ptr<Field>> Flatten() const;
-
- /// \brief Indicate if fields are equals.
- ///
- /// \param[in] other field to check equality with.
- /// \param[in] check_metadata controls if it should check for metadata
- /// equality.
- ///
- /// \return true if fields are equal, false otherwise.
- bool Equals(const Field& other, bool check_metadata = false) const;
- bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = false) const;
-
- /// \brief Indicate if fields are compatibles.
- ///
- /// See the criteria of MergeWith.
- ///
- /// \return true if fields are compatible, false otherwise.
- bool IsCompatibleWith(const Field& other) const;
- bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;
-
- /// \brief Return a string representation ot the field
- /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
- /// print keys and values in the output
- std::string ToString(bool show_metadata = false) const;
-
- /// \brief Return the field name
- const std::string& name() const { return name_; }
- /// \brief Return the field data type
- const std::shared_ptr<DataType>& type() const { return type_; }
- /// \brief Return whether the field is nullable
- bool nullable() const { return nullable_; }
-
- std::shared_ptr<Field> Copy() const;
-
- private:
- std::string ComputeFingerprint() const override;
- std::string ComputeMetadataFingerprint() const override;
-
- // Field name
- std::string name_;
-
- // The field's data type
- std::shared_ptr<DataType> type_;
-
- // Fields can be nullable
- bool nullable_;
-
- // The field's metadata, if any
- std::shared_ptr<const KeyValueMetadata> metadata_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(Field);
-};
-
-namespace detail {
-
-template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
-class ARROW_EXPORT CTypeImpl : public BASE {
- public:
- static constexpr Type::type type_id = TYPE_ID;
- using c_type = C_TYPE;
- using PhysicalType = DERIVED;
-
- CTypeImpl() : BASE(TYPE_ID) {}
-
- int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout(
- {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(C_TYPE))});
- }
-
- std::string name() const override { return DERIVED::type_name(); }
-
- std::string ToString() const override { return this->name(); }
-};
-
-template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
-constexpr Type::type CTypeImpl<DERIVED, BASE, TYPE_ID, C_TYPE>::type_id;
-
-template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
-class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, IntegerType, TYPE_ID, C_TYPE> {
- bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
-};
-
-} // namespace detail
-
-/// Concrete type class for always-null data
-class ARROW_EXPORT NullType : public DataType {
- public:
- static constexpr Type::type type_id = Type::NA;
-
- static constexpr const char* type_name() { return "null"; }
-
- NullType() : DataType(Type::NA) {}
-
- std::string ToString() const override;
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::AlwaysNull()});
- }
-
- std::string name() const override { return "null"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for boolean data
-class ARROW_EXPORT BooleanType
- : public detail::CTypeImpl<BooleanType, PrimitiveCType, Type::BOOL, bool> {
- public:
- static constexpr const char* type_name() { return "bool"; }
-
- // BooleanType within arrow use a single bit instead of the C 8-bits layout.
- int bit_width() const final { return 1; }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::Bitmap()});
- }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for unsigned 8-bit integer data
-class ARROW_EXPORT UInt8Type
- : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
- public:
- static constexpr const char* type_name() { return "uint8"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for signed 8-bit integer data
-class ARROW_EXPORT Int8Type
- : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
- public:
- static constexpr const char* type_name() { return "int8"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for unsigned 16-bit integer data
-class ARROW_EXPORT UInt16Type
- : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
- public:
- static constexpr const char* type_name() { return "uint16"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for signed 16-bit integer data
-class ARROW_EXPORT Int16Type
- : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
- public:
- static constexpr const char* type_name() { return "int16"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for unsigned 32-bit integer data
-class ARROW_EXPORT UInt32Type
- : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
- public:
- static constexpr const char* type_name() { return "uint32"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for signed 32-bit integer data
-class ARROW_EXPORT Int32Type
- : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
- public:
- static constexpr const char* type_name() { return "int32"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for unsigned 64-bit integer data
-class ARROW_EXPORT UInt64Type
- : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
- public:
- static constexpr const char* type_name() { return "uint64"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for signed 64-bit integer data
-class ARROW_EXPORT Int64Type
- : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
- public:
- static constexpr const char* type_name() { return "int64"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for 16-bit floating-point data
-class ARROW_EXPORT HalfFloatType
- : public detail::CTypeImpl<HalfFloatType, FloatingPointType, Type::HALF_FLOAT,
- uint16_t> {
- public:
- Precision precision() const override;
- static constexpr const char* type_name() { return "halffloat"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for 32-bit floating-point data (C "float")
-class ARROW_EXPORT FloatType
- : public detail::CTypeImpl<FloatType, FloatingPointType, Type::FLOAT, float> {
- public:
- Precision precision() const override;
- static constexpr const char* type_name() { return "float"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for 64-bit floating-point data (C "double")
-class ARROW_EXPORT DoubleType
- : public detail::CTypeImpl<DoubleType, FloatingPointType, Type::DOUBLE, double> {
- public:
- Precision precision() const override;
- static constexpr const char* type_name() { return "double"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Base class for all variable-size list data types
-class ARROW_EXPORT BaseListType : public NestedType {
- public:
- using NestedType::NestedType;
- std::shared_ptr<Field> value_field() const { return children_[0]; }
-
- std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
-};
-
-/// \brief Concrete type class for list data
-///
-/// List data is nested data where each value is a variable number of
-/// child items. Lists can be recursively nested, for example
-/// list(list(int32)).
-class ARROW_EXPORT ListType : public BaseListType {
- public:
- static constexpr Type::type type_id = Type::LIST;
- using offset_type = int32_t;
-
- static constexpr const char* type_name() { return "list"; }
-
- // List can contain any other logical value type
- explicit ListType(const std::shared_ptr<DataType>& value_type)
- : ListType(std::make_shared<Field>("item", value_type)) {}
-
- explicit ListType(const std::shared_ptr<Field>& value_field) : BaseListType(type_id) {
- children_ = {value_field};
- }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout(
- {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
- }
-
- std::string ToString() const override;
-
- std::string name() const override { return "list"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Concrete type class for large list data
-///
-/// LargeListType is like ListType but with 64-bit rather than 32-bit offsets.
-class ARROW_EXPORT LargeListType : public BaseListType {
- public:
- static constexpr Type::type type_id = Type::LARGE_LIST;
- using offset_type = int64_t;
-
- static constexpr const char* type_name() { return "large_list"; }
-
- // List can contain any other logical value type
- explicit LargeListType(const std::shared_ptr<DataType>& value_type)
- : LargeListType(std::make_shared<Field>("item", value_type)) {}
-
- explicit LargeListType(const std::shared_ptr<Field>& value_field)
- : BaseListType(type_id) {
- children_ = {value_field};
- }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout(
- {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
- }
-
- std::string ToString() const override;
-
- std::string name() const override { return "large_list"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Concrete type class for map data
-///
-/// Map data is nested data where each value is a variable number of
+/// \brief Base class for all fixed-width data types
+class ARROW_EXPORT FixedWidthType : public DataType {
+ public:
+ using DataType::DataType;
+
+ virtual int bit_width() const = 0;
+};
+
+/// \brief Base class for all data types representing primitive values
+class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
+ public:
+ using FixedWidthType::FixedWidthType;
+};
+
+/// \brief Base class for all numeric data types
+class ARROW_EXPORT NumberType : public PrimitiveCType {
+ public:
+ using PrimitiveCType::PrimitiveCType;
+};
+
+/// \brief Base class for all integral data types
+class ARROW_EXPORT IntegerType : public NumberType {
+ public:
+ using NumberType::NumberType;
+ virtual bool is_signed() const = 0;
+};
+
+/// \brief Base class for all floating-point data types
+class ARROW_EXPORT FloatingPointType : public NumberType {
+ public:
+ using NumberType::NumberType;
+ enum Precision { HALF, SINGLE, DOUBLE };
+ virtual Precision precision() const = 0;
+};
+
+/// \brief Base class for all parametric data types
+class ParametricType {};
+
+class ARROW_EXPORT NestedType : public DataType, public ParametricType {
+ public:
+ using DataType::DataType;
+};
+
+/// \brief The combination of a field name and data type, with optional metadata
+///
+/// Fields are used to describe the individual constituents of a
+/// nested DataType or a Schema.
+///
+/// A field's metadata is represented by a KeyValueMetadata instance,
+/// which holds arbitrary key-value pairs.
+class ARROW_EXPORT Field : public detail::Fingerprintable {
+ public:
+ Field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
+ std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR)
+ : detail::Fingerprintable(),
+ name_(std::move(name)),
+ type_(std::move(type)),
+ nullable_(nullable),
+ metadata_(std::move(metadata)) {}
+
+ ~Field() override;
+
+ /// \brief Return the field's attached metadata
+ std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
+
+ /// \brief Return whether the field has non-empty metadata
+ bool HasMetadata() const;
+
+ /// \brief Return a copy of this field with the given metadata attached to it
+ std::shared_ptr<Field> WithMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const;
+
+ /// \brief EXPERIMENTAL: Return a copy of this field with the given metadata
+ /// merged with existing metadata (any colliding keys will be overridden by
+ /// the passed metadata)
+ std::shared_ptr<Field> WithMergedMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const;
+
+ /// \brief Return a copy of this field without any metadata attached to it
+ std::shared_ptr<Field> RemoveMetadata() const;
+
+ /// \brief Return a copy of this field with the replaced type.
+ std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;
+
+ /// \brief Return a copy of this field with the replaced name.
+ std::shared_ptr<Field> WithName(const std::string& name) const;
+
+ /// \brief Return a copy of this field with the replaced nullability.
+ std::shared_ptr<Field> WithNullable(bool nullable) const;
+
+ /// \brief Options that control the behavior of `MergeWith`.
+ /// Options are to be added to allow type conversions, including integer
+ /// widening, promotion from integer to float, or conversion to or from boolean.
+ struct MergeOptions {
+ /// If true, a Field of NullType can be unified with a Field of another type.
+ /// The unified field will be of the other type and become nullable.
+ /// Nullability will be promoted to the looser option (nullable if one is not
+ /// nullable).
+ bool promote_nullability = true;
+
+ static MergeOptions Defaults() { return MergeOptions(); }
+ };
+
+ /// \brief Merge the current field with a field of the same name.
+ ///
+ /// The two fields must be compatible, i.e:
+ /// - have the same name
+ /// - have the same type, or of compatible types according to `options`.
+ ///
+ /// The metadata of the current field is preserved; the metadata of the other
+ /// field is discarded.
+ Result<std::shared_ptr<Field>> MergeWith(
+ const Field& other, MergeOptions options = MergeOptions::Defaults()) const;
+ Result<std::shared_ptr<Field>> MergeWith(
+ const std::shared_ptr<Field>& other,
+ MergeOptions options = MergeOptions::Defaults()) const;
+
+ std::vector<std::shared_ptr<Field>> Flatten() const;
+
+ /// \brief Indicate if fields are equals.
+ ///
+ /// \param[in] other field to check equality with.
+ /// \param[in] check_metadata controls if it should check for metadata
+ /// equality.
+ ///
+ /// \return true if fields are equal, false otherwise.
+ bool Equals(const Field& other, bool check_metadata = false) const;
+ bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = false) const;
+
+ /// \brief Indicate if fields are compatibles.
+ ///
+ /// See the criteria of MergeWith.
+ ///
+ /// \return true if fields are compatible, false otherwise.
+ bool IsCompatibleWith(const Field& other) const;
+ bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;
+
+ /// \brief Return a string representation ot the field
+ /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
+ /// print keys and values in the output
+ std::string ToString(bool show_metadata = false) const;
+
+ /// \brief Return the field name
+ const std::string& name() const { return name_; }
+ /// \brief Return the field data type
+ const std::shared_ptr<DataType>& type() const { return type_; }
+ /// \brief Return whether the field is nullable
+ bool nullable() const { return nullable_; }
+
+ std::shared_ptr<Field> Copy() const;
+
+ private:
+ std::string ComputeFingerprint() const override;
+ std::string ComputeMetadataFingerprint() const override;
+
+ // Field name
+ std::string name_;
+
+ // The field's data type
+ std::shared_ptr<DataType> type_;
+
+ // Fields can be nullable
+ bool nullable_;
+
+ // The field's metadata, if any
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Field);
+};
+
+namespace detail {
+
+template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
+class ARROW_EXPORT CTypeImpl : public BASE {
+ public:
+ static constexpr Type::type type_id = TYPE_ID;
+ using c_type = C_TYPE;
+ using PhysicalType = DERIVED;
+
+ CTypeImpl() : BASE(TYPE_ID) {}
+
+ int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout(
+ {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(C_TYPE))});
+ }
+
+ std::string name() const override { return DERIVED::type_name(); }
+
+ std::string ToString() const override { return this->name(); }
+};
+
+template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
+constexpr Type::type CTypeImpl<DERIVED, BASE, TYPE_ID, C_TYPE>::type_id;
+
+template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
+class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, IntegerType, TYPE_ID, C_TYPE> {
+ bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
+};
+
+} // namespace detail
+
+/// Concrete type class for always-null data
+class ARROW_EXPORT NullType : public DataType {
+ public:
+ static constexpr Type::type type_id = Type::NA;
+
+ static constexpr const char* type_name() { return "null"; }
+
+ NullType() : DataType(Type::NA) {}
+
+ std::string ToString() const override;
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::AlwaysNull()});
+ }
+
+ std::string name() const override { return "null"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for boolean data
+class ARROW_EXPORT BooleanType
+ : public detail::CTypeImpl<BooleanType, PrimitiveCType, Type::BOOL, bool> {
+ public:
+ static constexpr const char* type_name() { return "bool"; }
+
+ // BooleanType within arrow use a single bit instead of the C 8-bits layout.
+ int bit_width() const final { return 1; }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::Bitmap()});
+ }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for unsigned 8-bit integer data
+class ARROW_EXPORT UInt8Type
+ : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
+ public:
+ static constexpr const char* type_name() { return "uint8"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for signed 8-bit integer data
+class ARROW_EXPORT Int8Type
+ : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
+ public:
+ static constexpr const char* type_name() { return "int8"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for unsigned 16-bit integer data
+class ARROW_EXPORT UInt16Type
+ : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
+ public:
+ static constexpr const char* type_name() { return "uint16"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for signed 16-bit integer data
+class ARROW_EXPORT Int16Type
+ : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
+ public:
+ static constexpr const char* type_name() { return "int16"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for unsigned 32-bit integer data
+class ARROW_EXPORT UInt32Type
+ : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
+ public:
+ static constexpr const char* type_name() { return "uint32"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for signed 32-bit integer data
+class ARROW_EXPORT Int32Type
+ : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
+ public:
+ static constexpr const char* type_name() { return "int32"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for unsigned 64-bit integer data
+class ARROW_EXPORT UInt64Type
+ : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
+ public:
+ static constexpr const char* type_name() { return "uint64"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for signed 64-bit integer data
+class ARROW_EXPORT Int64Type
+ : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
+ public:
+ static constexpr const char* type_name() { return "int64"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for 16-bit floating-point data
+class ARROW_EXPORT HalfFloatType
+ : public detail::CTypeImpl<HalfFloatType, FloatingPointType, Type::HALF_FLOAT,
+ uint16_t> {
+ public:
+ Precision precision() const override;
+ static constexpr const char* type_name() { return "halffloat"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for 32-bit floating-point data (C "float")
+class ARROW_EXPORT FloatType
+ : public detail::CTypeImpl<FloatType, FloatingPointType, Type::FLOAT, float> {
+ public:
+ Precision precision() const override;
+ static constexpr const char* type_name() { return "float"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for 64-bit floating-point data (C "double")
+class ARROW_EXPORT DoubleType
+ : public detail::CTypeImpl<DoubleType, FloatingPointType, Type::DOUBLE, double> {
+ public:
+ Precision precision() const override;
+ static constexpr const char* type_name() { return "double"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Base class for all variable-size list data types
+class ARROW_EXPORT BaseListType : public NestedType {
+ public:
+ using NestedType::NestedType;
+ std::shared_ptr<Field> value_field() const { return children_[0]; }
+
+ std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
+};
+
+/// \brief Concrete type class for list data
+///
+/// List data is nested data where each value is a variable number of
+/// child items. Lists can be recursively nested, for example
+/// list(list(int32)).
+class ARROW_EXPORT ListType : public BaseListType {
+ public:
+ static constexpr Type::type type_id = Type::LIST;
+ using offset_type = int32_t;
+
+ static constexpr const char* type_name() { return "list"; }
+
+ // List can contain any other logical value type
+ explicit ListType(const std::shared_ptr<DataType>& value_type)
+ : ListType(std::make_shared<Field>("item", value_type)) {}
+
+ explicit ListType(const std::shared_ptr<Field>& value_field) : BaseListType(type_id) {
+ children_ = {value_field};
+ }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout(
+ {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
+ }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "list"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Concrete type class for large list data
+///
+/// LargeListType is like ListType but with 64-bit rather than 32-bit offsets.
+class ARROW_EXPORT LargeListType : public BaseListType {
+ public:
+ static constexpr Type::type type_id = Type::LARGE_LIST;
+ using offset_type = int64_t;
+
+ static constexpr const char* type_name() { return "large_list"; }
+
+ // List can contain any other logical value type
+ explicit LargeListType(const std::shared_ptr<DataType>& value_type)
+ : LargeListType(std::make_shared<Field>("item", value_type)) {}
+
+ explicit LargeListType(const std::shared_ptr<Field>& value_field)
+ : BaseListType(type_id) {
+ children_ = {value_field};
+ }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout(
+ {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
+ }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "large_list"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Concrete type class for map data
+///
+/// Map data is nested data where each value is a variable number of
/// key-item pairs. Its physical representation is the same as
/// a list of `{key, item}` structs.
///
/// Maps can be recursively nested, for example map(utf8, map(utf8, int32)).
-class ARROW_EXPORT MapType : public ListType {
- public:
- static constexpr Type::type type_id = Type::MAP;
-
- static constexpr const char* type_name() { return "map"; }
-
- MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
- bool keys_sorted = false);
-
- MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
- bool keys_sorted = false);
-
- MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
- bool keys_sorted = false);
-
- explicit MapType(std::shared_ptr<Field> value_field, bool keys_sorted = false);
-
- // Validating constructor
- static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<Field> value_field,
- bool keys_sorted = false);
-
- std::shared_ptr<Field> key_field() const { return value_type()->field(0); }
- std::shared_ptr<DataType> key_type() const { return key_field()->type(); }
-
- std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
- std::shared_ptr<DataType> item_type() const { return item_field()->type(); }
-
- std::string ToString() const override;
-
- std::string name() const override { return "map"; }
-
- bool keys_sorted() const { return keys_sorted_; }
-
- private:
- std::string ComputeFingerprint() const override;
-
- bool keys_sorted_;
-};
-
-/// \brief Concrete type class for fixed size list data
-class ARROW_EXPORT FixedSizeListType : public BaseListType {
- public:
- static constexpr Type::type type_id = Type::FIXED_SIZE_LIST;
- using offset_type = int32_t;
-
- static constexpr const char* type_name() { return "fixed_size_list"; }
-
- // List can contain any other logical value type
- FixedSizeListType(const std::shared_ptr<DataType>& value_type, int32_t list_size)
- : FixedSizeListType(std::make_shared<Field>("item", value_type), list_size) {}
-
- FixedSizeListType(const std::shared_ptr<Field>& value_field, int32_t list_size)
- : BaseListType(type_id), list_size_(list_size) {
- children_ = {value_field};
- }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::Bitmap()});
- }
-
- std::string ToString() const override;
-
- std::string name() const override { return "fixed_size_list"; }
-
- int32_t list_size() const { return list_size_; }
-
- protected:
- std::string ComputeFingerprint() const override;
-
- int32_t list_size_;
-};
-
-/// \brief Base class for all variable-size binary data types
-class ARROW_EXPORT BaseBinaryType : public DataType {
- public:
- using DataType::DataType;
-};
-
-constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
-
-/// \brief Concrete type class for variable-size binary data
-class ARROW_EXPORT BinaryType : public BaseBinaryType {
- public:
- static constexpr Type::type type_id = Type::BINARY;
- static constexpr bool is_utf8 = false;
- using offset_type = int32_t;
- using PhysicalType = BinaryType;
-
- static constexpr const char* type_name() { return "binary"; }
-
- BinaryType() : BinaryType(Type::BINARY) {}
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::Bitmap(),
- DataTypeLayout::FixedWidth(sizeof(offset_type)),
- DataTypeLayout::VariableWidth()});
- }
-
- std::string ToString() const override;
- std::string name() const override { return "binary"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-
- // Allow subclasses like StringType to change the logical type.
- explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
-};
-
-/// \brief Concrete type class for large variable-size binary data
-class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
- public:
- static constexpr Type::type type_id = Type::LARGE_BINARY;
- static constexpr bool is_utf8 = false;
- using offset_type = int64_t;
- using PhysicalType = LargeBinaryType;
-
- static constexpr const char* type_name() { return "large_binary"; }
-
- LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::Bitmap(),
- DataTypeLayout::FixedWidth(sizeof(offset_type)),
- DataTypeLayout::VariableWidth()});
- }
-
- std::string ToString() const override;
- std::string name() const override { return "large_binary"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-
- // Allow subclasses like LargeStringType to change the logical type.
- explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
-};
-
-/// \brief Concrete type class for variable-size string data, utf8-encoded
-class ARROW_EXPORT StringType : public BinaryType {
- public:
- static constexpr Type::type type_id = Type::STRING;
- static constexpr bool is_utf8 = true;
- using PhysicalType = BinaryType;
-
- static constexpr const char* type_name() { return "utf8"; }
-
- StringType() : BinaryType(Type::STRING) {}
-
- std::string ToString() const override;
- std::string name() const override { return "utf8"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Concrete type class for large variable-size string data, utf8-encoded
-class ARROW_EXPORT LargeStringType : public LargeBinaryType {
- public:
- static constexpr Type::type type_id = Type::LARGE_STRING;
- static constexpr bool is_utf8 = true;
- using PhysicalType = LargeBinaryType;
-
- static constexpr const char* type_name() { return "large_utf8"; }
-
- LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
-
- std::string ToString() const override;
- std::string name() const override { return "large_utf8"; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Concrete type class for fixed-size binary data
-class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
- public:
- static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
- static constexpr bool is_utf8 = false;
-
- static constexpr const char* type_name() { return "fixed_size_binary"; }
-
- explicit FixedSizeBinaryType(int32_t byte_width)
- : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
- explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
- : FixedWidthType(override_type_id), byte_width_(byte_width) {}
-
- std::string ToString() const override;
- std::string name() const override { return "fixed_size_binary"; }
-
- DataTypeLayout layout() const override {
- return DataTypeLayout(
- {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(byte_width())});
- }
-
- int32_t byte_width() const { return byte_width_; }
- int bit_width() const override;
-
- // Validating constructor
- static Result<std::shared_ptr<DataType>> Make(int32_t byte_width);
-
- protected:
- std::string ComputeFingerprint() const override;
-
- int32_t byte_width_;
-};
-
-/// \brief Concrete type class for struct data
-class ARROW_EXPORT StructType : public NestedType {
- public:
- static constexpr Type::type type_id = Type::STRUCT;
-
- static constexpr const char* type_name() { return "struct"; }
-
- explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
-
- ~StructType() override;
-
- DataTypeLayout layout() const override {
- return DataTypeLayout({DataTypeLayout::Bitmap()});
- }
-
- std::string ToString() const override;
- std::string name() const override { return "struct"; }
-
- /// Returns null if name not found
- std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
-
- /// Return all fields having this name
- std::vector<std::shared_ptr<Field>> GetAllFieldsByName(const std::string& name) const;
-
- /// Returns -1 if name not found or if there are multiple fields having the
- /// same name
- int GetFieldIndex(const std::string& name) const;
-
- /// \brief Return the indices of all fields having this name in sorted order
- std::vector<int> GetAllFieldIndices(const std::string& name) const;
-
- private:
- std::string ComputeFingerprint() const override;
-
- class Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-/// \brief Base type class for (fixed-size) decimal data
-class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
- public:
+class ARROW_EXPORT MapType : public ListType {
+ public:
+ static constexpr Type::type type_id = Type::MAP;
+
+ static constexpr const char* type_name() { return "map"; }
+
+ MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
+ bool keys_sorted = false);
+
+ MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
+ bool keys_sorted = false);
+
+ MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
+ bool keys_sorted = false);
+
+ explicit MapType(std::shared_ptr<Field> value_field, bool keys_sorted = false);
+
+ // Validating constructor
+ static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<Field> value_field,
+ bool keys_sorted = false);
+
+ std::shared_ptr<Field> key_field() const { return value_type()->field(0); }
+ std::shared_ptr<DataType> key_type() const { return key_field()->type(); }
+
+ std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
+ std::shared_ptr<DataType> item_type() const { return item_field()->type(); }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "map"; }
+
+ bool keys_sorted() const { return keys_sorted_; }
+
+ private:
+ std::string ComputeFingerprint() const override;
+
+ bool keys_sorted_;
+};
+
+/// \brief Concrete type class for fixed size list data
+class ARROW_EXPORT FixedSizeListType : public BaseListType {
+ public:
+ static constexpr Type::type type_id = Type::FIXED_SIZE_LIST;
+ using offset_type = int32_t;
+
+ static constexpr const char* type_name() { return "fixed_size_list"; }
+
+ // List can contain any other logical value type
+ FixedSizeListType(const std::shared_ptr<DataType>& value_type, int32_t list_size)
+ : FixedSizeListType(std::make_shared<Field>("item", value_type), list_size) {}
+
+ FixedSizeListType(const std::shared_ptr<Field>& value_field, int32_t list_size)
+ : BaseListType(type_id), list_size_(list_size) {
+ children_ = {value_field};
+ }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::Bitmap()});
+ }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "fixed_size_list"; }
+
+ int32_t list_size() const { return list_size_; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ int32_t list_size_;
+};
+
+/// \brief Base class for all variable-size binary data types
+class ARROW_EXPORT BaseBinaryType : public DataType {
+ public:
+ using DataType::DataType;
+};
+
+constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
+
+/// \brief Concrete type class for variable-size binary data
+class ARROW_EXPORT BinaryType : public BaseBinaryType {
+ public:
+ static constexpr Type::type type_id = Type::BINARY;
+ static constexpr bool is_utf8 = false;
+ using offset_type = int32_t;
+ using PhysicalType = BinaryType;
+
+ static constexpr const char* type_name() { return "binary"; }
+
+ BinaryType() : BinaryType(Type::BINARY) {}
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::Bitmap(),
+ DataTypeLayout::FixedWidth(sizeof(offset_type)),
+ DataTypeLayout::VariableWidth()});
+ }
+
+ std::string ToString() const override;
+ std::string name() const override { return "binary"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ // Allow subclasses like StringType to change the logical type.
+ explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
+ public:
+ static constexpr Type::type type_id = Type::LARGE_BINARY;
+ static constexpr bool is_utf8 = false;
+ using offset_type = int64_t;
+ using PhysicalType = LargeBinaryType;
+
+ static constexpr const char* type_name() { return "large_binary"; }
+
+ LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::Bitmap(),
+ DataTypeLayout::FixedWidth(sizeof(offset_type)),
+ DataTypeLayout::VariableWidth()});
+ }
+
+ std::string ToString() const override;
+ std::string name() const override { return "large_binary"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ // Allow subclasses like LargeStringType to change the logical type.
+ explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for variable-size string data, utf8-encoded
+class ARROW_EXPORT StringType : public BinaryType {
+ public:
+ static constexpr Type::type type_id = Type::STRING;
+ static constexpr bool is_utf8 = true;
+ using PhysicalType = BinaryType;
+
+ static constexpr const char* type_name() { return "utf8"; }
+
+ StringType() : BinaryType(Type::STRING) {}
+
+ std::string ToString() const override;
+ std::string name() const override { return "utf8"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Concrete type class for large variable-size string data, utf8-encoded
+class ARROW_EXPORT LargeStringType : public LargeBinaryType {
+ public:
+ static constexpr Type::type type_id = Type::LARGE_STRING;
+ static constexpr bool is_utf8 = true;
+ using PhysicalType = LargeBinaryType;
+
+ static constexpr const char* type_name() { return "large_utf8"; }
+
+ LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
+
+ std::string ToString() const override;
+ std::string name() const override { return "large_utf8"; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Concrete type class for fixed-size binary data
+class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
+ public:
+ static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
+ static constexpr bool is_utf8 = false;
+
+ static constexpr const char* type_name() { return "fixed_size_binary"; }
+
+ explicit FixedSizeBinaryType(int32_t byte_width)
+ : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
+ explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
+ : FixedWidthType(override_type_id), byte_width_(byte_width) {}
+
+ std::string ToString() const override;
+ std::string name() const override { return "fixed_size_binary"; }
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout(
+ {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(byte_width())});
+ }
+
+ int32_t byte_width() const { return byte_width_; }
+ int bit_width() const override;
+
+ // Validating constructor
+ static Result<std::shared_ptr<DataType>> Make(int32_t byte_width);
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ int32_t byte_width_;
+};
+
+/// \brief Concrete type class for struct data
+class ARROW_EXPORT StructType : public NestedType {
+ public:
+ static constexpr Type::type type_id = Type::STRUCT;
+
+ static constexpr const char* type_name() { return "struct"; }
+
+ explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
+
+ ~StructType() override;
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout({DataTypeLayout::Bitmap()});
+ }
+
+ std::string ToString() const override;
+ std::string name() const override { return "struct"; }
+
+ /// Returns null if name not found
+ std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
+
+ /// Return all fields having this name
+ std::vector<std::shared_ptr<Field>> GetAllFieldsByName(const std::string& name) const;
+
+ /// Returns -1 if name not found or if there are multiple fields having the
+ /// same name
+ int GetFieldIndex(const std::string& name) const;
+
+ /// \brief Return the indices of all fields having this name in sorted order
+ std::vector<int> GetAllFieldIndices(const std::string& name) const;
+
+ private:
+ std::string ComputeFingerprint() const override;
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+/// \brief Base type class for (fixed-size) decimal data
+class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
+ public:
explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision,
int32_t scale)
: FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
-
+
/// Constructs concrete decimal types
static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
int32_t scale);
- int32_t precision() const { return precision_; }
- int32_t scale() const { return scale_; }
-
+ int32_t precision() const { return precision_; }
+ int32_t scale() const { return scale_; }
+
/// \brief Returns the number of bytes needed for precision.
///
/// precision must be >= 1
static int32_t DecimalSize(int32_t precision);
- protected:
- std::string ComputeFingerprint() const override;
-
- int32_t precision_;
- int32_t scale_;
-};
-
-/// \brief Concrete type class for 128-bit decimal data
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ int32_t precision_;
+ int32_t scale_;
+};
+
+/// \brief Concrete type class for 128-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer. The precision is the number of significant digits that the
@@ -913,26 +913,26 @@ class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
/// Decimal128Type has a maximum precision of 38 significant digits
/// (also available as Decimal128Type::kMaxPrecision).
/// If higher precision is needed, consider using Decimal256Type.
-class ARROW_EXPORT Decimal128Type : public DecimalType {
- public:
+class ARROW_EXPORT Decimal128Type : public DecimalType {
+ public:
static constexpr Type::type type_id = Type::DECIMAL128;
-
+
static constexpr const char* type_name() { return "decimal128"; }
-
- /// Decimal128Type constructor that aborts on invalid input.
- explicit Decimal128Type(int32_t precision, int32_t scale);
-
- /// Decimal128Type constructor that returns an error on invalid input.
- static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
-
- std::string ToString() const override;
+
+ /// Decimal128Type constructor that aborts on invalid input.
+ explicit Decimal128Type(int32_t precision, int32_t scale);
+
+ /// Decimal128Type constructor that returns an error on invalid input.
+ static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
+
+ std::string ToString() const override;
std::string name() const override { return "decimal128"; }
-
- static constexpr int32_t kMinPrecision = 1;
- static constexpr int32_t kMaxPrecision = 38;
+
+ static constexpr int32_t kMinPrecision = 1;
+ static constexpr int32_t kMaxPrecision = 38;
static constexpr int32_t kByteWidth = 16;
-};
-
+};
+
/// \brief Concrete type class for 256-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
@@ -967,53 +967,53 @@ class ARROW_EXPORT Decimal256Type : public DecimalType {
};
/// \brief Base type class for union data
-class ARROW_EXPORT UnionType : public NestedType {
- public:
- static constexpr int8_t kMaxTypeCode = 127;
- static constexpr int kInvalidChildId = -1;
-
- static Result<std::shared_ptr<DataType>> Make(
- const std::vector<std::shared_ptr<Field>>& fields,
- const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(fields, type_codes);
- } else {
- return dense_union(fields, type_codes);
- }
- }
-
- DataTypeLayout layout() const override;
-
- std::string ToString() const override;
-
- /// The array of logical type ids.
- ///
- /// For example, the first type in the union might be denoted by the id 5
- /// (instead of 0).
- const std::vector<int8_t>& type_codes() const { return type_codes_; }
-
- /// An array mapping logical type ids to physical child ids.
- const std::vector<int>& child_ids() const { return child_ids_; }
-
- uint8_t max_type_code() const;
-
- UnionMode::type mode() const;
-
- protected:
- UnionType(std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes,
- Type::type id);
-
- static Status ValidateParameters(const std::vector<std::shared_ptr<Field>>& fields,
- const std::vector<int8_t>& type_codes,
- UnionMode::type mode);
-
- private:
- std::string ComputeFingerprint() const override;
-
- std::vector<int8_t> type_codes_;
- std::vector<int> child_ids_;
-};
-
+class ARROW_EXPORT UnionType : public NestedType {
+ public:
+ static constexpr int8_t kMaxTypeCode = 127;
+ static constexpr int kInvalidChildId = -1;
+
+ static Result<std::shared_ptr<DataType>> Make(
+ const std::vector<std::shared_ptr<Field>>& fields,
+ const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(fields, type_codes);
+ } else {
+ return dense_union(fields, type_codes);
+ }
+ }
+
+ DataTypeLayout layout() const override;
+
+ std::string ToString() const override;
+
+ /// The array of logical type ids.
+ ///
+ /// For example, the first type in the union might be denoted by the id 5
+ /// (instead of 0).
+ const std::vector<int8_t>& type_codes() const { return type_codes_; }
+
+ /// An array mapping logical type ids to physical child ids.
+ const std::vector<int>& child_ids() const { return child_ids_; }
+
+ uint8_t max_type_code() const;
+
+ UnionMode::type mode() const;
+
+ protected:
+ UnionType(std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes,
+ Type::type id);
+
+ static Status ValidateParameters(const std::vector<std::shared_ptr<Field>>& fields,
+ const std::vector<int8_t>& type_codes,
+ UnionMode::type mode);
+
+ private:
+ std::string ComputeFingerprint() const override;
+
+ std::vector<int8_t> type_codes_;
+ std::vector<int> child_ids_;
+};
+
/// \brief Concrete type class for sparse union data
///
/// A sparse union is a nested type where each logical value is taken from
@@ -1025,22 +1025,22 @@ class ARROW_EXPORT UnionType : public NestedType {
/// refer to it.
///
/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
-class ARROW_EXPORT SparseUnionType : public UnionType {
- public:
- static constexpr Type::type type_id = Type::SPARSE_UNION;
-
- static constexpr const char* type_name() { return "sparse_union"; }
-
- SparseUnionType(std::vector<std::shared_ptr<Field>> fields,
- std::vector<int8_t> type_codes);
-
- // A constructor variant that validates input parameters
- static Result<std::shared_ptr<DataType>> Make(
- std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes);
-
- std::string name() const override { return "sparse_union"; }
-};
-
+class ARROW_EXPORT SparseUnionType : public UnionType {
+ public:
+ static constexpr Type::type type_id = Type::SPARSE_UNION;
+
+ static constexpr const char* type_name() { return "sparse_union"; }
+
+ SparseUnionType(std::vector<std::shared_ptr<Field>> fields,
+ std::vector<int8_t> type_codes);
+
+ // A constructor variant that validates input parameters
+ static Result<std::shared_ptr<DataType>> Make(
+ std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes);
+
+ std::string name() const override { return "sparse_union"; }
+};
+
/// \brief Concrete type class for dense union data
///
/// A dense union is a nested type where each logical value is taken from
@@ -1055,509 +1055,509 @@ class ARROW_EXPORT SparseUnionType : public UnionType {
/// the additional indirection cost when looking up values.
///
/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
-class ARROW_EXPORT DenseUnionType : public UnionType {
- public:
- static constexpr Type::type type_id = Type::DENSE_UNION;
-
- static constexpr const char* type_name() { return "dense_union"; }
-
- DenseUnionType(std::vector<std::shared_ptr<Field>> fields,
- std::vector<int8_t> type_codes);
-
- // A constructor variant that validates input parameters
- static Result<std::shared_ptr<DataType>> Make(
- std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes);
-
- std::string name() const override { return "dense_union"; }
-};
-
-// ----------------------------------------------------------------------
-// Date and time types
-
-/// \brief Base type for all date and time types
-class ARROW_EXPORT TemporalType : public FixedWidthType {
- public:
- using FixedWidthType::FixedWidthType;
-
- DataTypeLayout layout() const override {
- return DataTypeLayout(
- {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(bit_width() / 8)});
- }
-};
-
-/// \brief Base type class for date data
-class ARROW_EXPORT DateType : public TemporalType {
- public:
- virtual DateUnit unit() const = 0;
-
- protected:
- explicit DateType(Type::type type_id);
-};
-
-/// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
-class ARROW_EXPORT Date32Type : public DateType {
- public:
- static constexpr Type::type type_id = Type::DATE32;
- static constexpr DateUnit UNIT = DateUnit::DAY;
- using c_type = int32_t;
- using PhysicalType = Int32Type;
-
- static constexpr const char* type_name() { return "date32"; }
-
- Date32Type();
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- std::string ToString() const override;
-
- std::string name() const override { return "date32"; }
- DateUnit unit() const override { return UNIT; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-/// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
-class ARROW_EXPORT Date64Type : public DateType {
- public:
- static constexpr Type::type type_id = Type::DATE64;
- static constexpr DateUnit UNIT = DateUnit::MILLI;
- using c_type = int64_t;
- using PhysicalType = Int64Type;
-
- static constexpr const char* type_name() { return "date64"; }
-
- Date64Type();
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- std::string ToString() const override;
-
- std::string name() const override { return "date64"; }
- DateUnit unit() const override { return UNIT; }
-
- protected:
- std::string ComputeFingerprint() const override;
-};
-
-ARROW_EXPORT
-std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
-
-/// Base type class for time data
-class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
- public:
- TimeUnit::type unit() const { return unit_; }
-
- protected:
- TimeType(Type::type type_id, TimeUnit::type unit);
- std::string ComputeFingerprint() const override;
-
- TimeUnit::type unit_;
-};
-
-/// Concrete type class for 32-bit time data (as number of seconds or milliseconds
-/// since midnight)
-class ARROW_EXPORT Time32Type : public TimeType {
- public:
- static constexpr Type::type type_id = Type::TIME32;
- using c_type = int32_t;
- using PhysicalType = Int32Type;
-
- static constexpr const char* type_name() { return "time32"; }
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
-
- std::string ToString() const override;
-
- std::string name() const override { return "time32"; }
-};
-
-/// Concrete type class for 64-bit time data (as number of microseconds or nanoseconds
-/// since midnight)
-class ARROW_EXPORT Time64Type : public TimeType {
- public:
- static constexpr Type::type type_id = Type::TIME64;
- using c_type = int64_t;
- using PhysicalType = Int64Type;
-
- static constexpr const char* type_name() { return "time64"; }
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
-
- std::string ToString() const override;
-
- std::string name() const override { return "time64"; }
-};
-
-/// \brief Concrete type class for datetime data (as number of seconds, milliseconds,
-/// microseconds or nanoseconds since UNIX epoch)
-///
-/// If supplied, the timezone string should take either the form (i) "Area/Location",
-/// with values drawn from the names in the IANA Time Zone Database (such as
-/// "Europe/Zurich"); or (ii) "(+|-)HH:MM" indicating an absolute offset from GMT
-/// (such as "-08:00"). To indicate a native UTC timestamp, one of the strings "UTC",
-/// "Etc/UTC" or "+00:00" should be used.
-///
-/// If any non-empty string is supplied as the timezone for a TimestampType, then the
-/// Arrow field containing that timestamp type (and by extension the column associated
-/// with such a field) is considered "timezone-aware". The integer arrays that comprise
-/// a timezone-aware column must contain UTC normalized datetime values, regardless of
-/// the contents of their timezone string. More precisely, (i) the producer of a
-/// timezone-aware column must populate its constituent arrays with valid UTC values
-/// (performing offset conversions from non-UTC values if necessary); and (ii) the
-/// consumer of a timezone-aware column may assume that the column's values are directly
-/// comparable (that is, with no offset adjustment required) to the values of any other
-/// timezone-aware column or to any other valid UTC datetime value (provided all values
-/// are expressed in the same units).
-///
-/// If a TimestampType is constructed without a timezone (or, equivalently, if the
-/// timezone supplied is an empty string) then the resulting Arrow field (column) is
-/// considered "timezone-naive". The producer of a timezone-naive column may populate
-/// its constituent integer arrays with datetime values from any timezone; the consumer
-/// of a timezone-naive column should make no assumptions about the interoperability or
-/// comparability of the values of such a column with those of any other timestamp
-/// column or datetime value.
-///
-/// If a timezone-aware field contains a recognized timezone, its values may be
-/// localized to that locale upon display; the values of timezone-naive fields must
-/// always be displayed "as is", with no localization performed on them.
-class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType {
- public:
- using Unit = TimeUnit;
-
- static constexpr Type::type type_id = Type::TIMESTAMP;
- using c_type = int64_t;
- using PhysicalType = Int64Type;
-
- static constexpr const char* type_name() { return "timestamp"; }
-
- int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
-
- explicit TimestampType(TimeUnit::type unit = TimeUnit::MILLI)
- : TemporalType(Type::TIMESTAMP), unit_(unit) {}
-
- explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
- : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
-
- std::string ToString() const override;
- std::string name() const override { return "timestamp"; }
-
- TimeUnit::type unit() const { return unit_; }
- const std::string& timezone() const { return timezone_; }
-
- protected:
- std::string ComputeFingerprint() const override;
-
- private:
- TimeUnit::type unit_;
- std::string timezone_;
-};
-
-// Base class for the different kinds of calendar intervals.
-class ARROW_EXPORT IntervalType : public TemporalType, public ParametricType {
- public:
- enum type { MONTHS, DAY_TIME };
-
- virtual type interval_type() const = 0;
-
- protected:
- explicit IntervalType(Type::type subtype) : TemporalType(subtype) {}
- std::string ComputeFingerprint() const override;
-};
-
-/// \brief Represents a number of months.
-///
-/// Type representing a number of months. Corresponds to YearMonth type
-/// in Schema.fbs (years are defined as 12 months).
-class ARROW_EXPORT MonthIntervalType : public IntervalType {
- public:
- static constexpr Type::type type_id = Type::INTERVAL_MONTHS;
- using c_type = int32_t;
- using PhysicalType = Int32Type;
-
- static constexpr const char* type_name() { return "month_interval"; }
-
- IntervalType::type interval_type() const override { return IntervalType::MONTHS; }
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- MonthIntervalType() : IntervalType(type_id) {}
-
- std::string ToString() const override { return name(); }
- std::string name() const override { return "month_interval"; }
-};
-
-/// \brief Represents a number of days and milliseconds (fraction of day).
-class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
- public:
- struct DayMilliseconds {
- int32_t days;
- int32_t milliseconds;
- bool operator==(DayMilliseconds other) const {
- return this->days == other.days && this->milliseconds == other.milliseconds;
- }
- bool operator!=(DayMilliseconds other) const { return !(*this == other); }
- bool operator<(DayMilliseconds other) const {
- return this->days < other.days || this->milliseconds < other.milliseconds;
- }
- };
- using c_type = DayMilliseconds;
- using PhysicalType = DayTimeIntervalType;
-
- static_assert(sizeof(DayMilliseconds) == 8,
- "DayMilliseconds struct assumed to be of size 8 bytes");
- static constexpr Type::type type_id = Type::INTERVAL_DAY_TIME;
-
- static constexpr const char* type_name() { return "day_time_interval"; }
-
- IntervalType::type interval_type() const override { return IntervalType::DAY_TIME; }
-
- DayTimeIntervalType() : IntervalType(type_id) {}
-
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
-
- std::string ToString() const override { return name(); }
- std::string name() const override { return "day_time_interval"; }
-};
-
-/// \brief Represents an elapsed time without any relation to a calendar artifact.
-class ARROW_EXPORT DurationType : public TemporalType, public ParametricType {
- public:
- using Unit = TimeUnit;
-
- static constexpr Type::type type_id = Type::DURATION;
- using c_type = int64_t;
- using PhysicalType = Int64Type;
-
- static constexpr const char* type_name() { return "duration"; }
-
- int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
-
- explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
- : TemporalType(Type::DURATION), unit_(unit) {}
-
- std::string ToString() const override;
- std::string name() const override { return "duration"; }
-
- TimeUnit::type unit() const { return unit_; }
-
- protected:
- std::string ComputeFingerprint() const override;
-
- private:
- TimeUnit::type unit_;
-};
-
-// ----------------------------------------------------------------------
-// Dictionary type (for representing categorical or dictionary-encoded
-// in memory)
-
-/// \brief Dictionary-encoded value type with data-dependent
-/// dictionary. Indices are represented by any integer types.
-class ARROW_EXPORT DictionaryType : public FixedWidthType {
- public:
- static constexpr Type::type type_id = Type::DICTIONARY;
-
- static constexpr const char* type_name() { return "dictionary"; }
-
- DictionaryType(const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& value_type, bool ordered = false);
-
- // A constructor variant that validates its input parameters
- static Result<std::shared_ptr<DataType>> Make(
- const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& value_type, bool ordered = false);
-
- std::string ToString() const override;
- std::string name() const override { return "dictionary"; }
-
- int bit_width() const override;
-
- DataTypeLayout layout() const override;
-
- const std::shared_ptr<DataType>& index_type() const { return index_type_; }
- const std::shared_ptr<DataType>& value_type() const { return value_type_; }
-
- bool ordered() const { return ordered_; }
-
- protected:
- static Status ValidateParameters(const DataType& index_type,
- const DataType& value_type);
-
- std::string ComputeFingerprint() const override;
-
- // Must be an integer type (not currently checked)
- std::shared_ptr<DataType> index_type_;
- std::shared_ptr<DataType> value_type_;
- bool ordered_;
-};
-
-// ----------------------------------------------------------------------
-// FieldRef
-
-/// \class FieldPath
-///
-/// Represents a path to a nested field using indices of child fields.
-/// For example, given indices {5, 9, 3} the field would be retrieved with
-/// schema->field(5)->type()->field(9)->type()->field(3)
-///
-/// Attempting to retrieve a child field using a FieldPath which is not valid for
-/// a given schema will raise an error. Invalid FieldPaths include:
-/// - an index is out of range
-/// - the path is empty (note: a default constructed FieldPath will be empty)
-///
-/// FieldPaths provide a number of accessors for drilling down to potentially nested
-/// children. They are overloaded for convenience to support Schema (returns a field),
-/// DataType (returns a child field), Field (returns a child field of this field's type)
+class ARROW_EXPORT DenseUnionType : public UnionType {
+ public:
+ static constexpr Type::type type_id = Type::DENSE_UNION;
+
+ static constexpr const char* type_name() { return "dense_union"; }
+
+ DenseUnionType(std::vector<std::shared_ptr<Field>> fields,
+ std::vector<int8_t> type_codes);
+
+ // A constructor variant that validates input parameters
+ static Result<std::shared_ptr<DataType>> Make(
+ std::vector<std::shared_ptr<Field>> fields, std::vector<int8_t> type_codes);
+
+ std::string name() const override { return "dense_union"; }
+};
+
+// ----------------------------------------------------------------------
+// Date and time types
+
+/// \brief Base type for all date and time types
+class ARROW_EXPORT TemporalType : public FixedWidthType {
+ public:
+ using FixedWidthType::FixedWidthType;
+
+ DataTypeLayout layout() const override {
+ return DataTypeLayout(
+ {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(bit_width() / 8)});
+ }
+};
+
+/// \brief Base type class for date data
+class ARROW_EXPORT DateType : public TemporalType {
+ public:
+ virtual DateUnit unit() const = 0;
+
+ protected:
+ explicit DateType(Type::type type_id);
+};
+
+/// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
+class ARROW_EXPORT Date32Type : public DateType {
+ public:
+ static constexpr Type::type type_id = Type::DATE32;
+ static constexpr DateUnit UNIT = DateUnit::DAY;
+ using c_type = int32_t;
+ using PhysicalType = Int32Type;
+
+ static constexpr const char* type_name() { return "date32"; }
+
+ Date32Type();
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "date32"; }
+ DateUnit unit() const override { return UNIT; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+/// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
+class ARROW_EXPORT Date64Type : public DateType {
+ public:
+ static constexpr Type::type type_id = Type::DATE64;
+ static constexpr DateUnit UNIT = DateUnit::MILLI;
+ using c_type = int64_t;
+ using PhysicalType = Int64Type;
+
+ static constexpr const char* type_name() { return "date64"; }
+
+ Date64Type();
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "date64"; }
+ DateUnit unit() const override { return UNIT; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+};
+
+ARROW_EXPORT
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
+
+/// Base type class for time data
+class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
+ public:
+ TimeUnit::type unit() const { return unit_; }
+
+ protected:
+ TimeType(Type::type type_id, TimeUnit::type unit);
+ std::string ComputeFingerprint() const override;
+
+ TimeUnit::type unit_;
+};
+
+/// Concrete type class for 32-bit time data (as number of seconds or milliseconds
+/// since midnight)
+class ARROW_EXPORT Time32Type : public TimeType {
+ public:
+ static constexpr Type::type type_id = Type::TIME32;
+ using c_type = int32_t;
+ using PhysicalType = Int32Type;
+
+ static constexpr const char* type_name() { return "time32"; }
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "time32"; }
+};
+
+/// Concrete type class for 64-bit time data (as number of microseconds or nanoseconds
+/// since midnight)
+class ARROW_EXPORT Time64Type : public TimeType {
+ public:
+ static constexpr Type::type type_id = Type::TIME64;
+ using c_type = int64_t;
+ using PhysicalType = Int64Type;
+
+ static constexpr const char* type_name() { return "time64"; }
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
+
+ std::string ToString() const override;
+
+ std::string name() const override { return "time64"; }
+};
+
+/// \brief Concrete type class for datetime data (as number of seconds, milliseconds,
+/// microseconds or nanoseconds since UNIX epoch)
+///
+/// If supplied, the timezone string should take either the form (i) "Area/Location",
+/// with values drawn from the names in the IANA Time Zone Database (such as
+/// "Europe/Zurich"); or (ii) "(+|-)HH:MM" indicating an absolute offset from GMT
+/// (such as "-08:00"). To indicate a native UTC timestamp, one of the strings "UTC",
+/// "Etc/UTC" or "+00:00" should be used.
+///
+/// If any non-empty string is supplied as the timezone for a TimestampType, then the
+/// Arrow field containing that timestamp type (and by extension the column associated
+/// with such a field) is considered "timezone-aware". The integer arrays that comprise
+/// a timezone-aware column must contain UTC normalized datetime values, regardless of
+/// the contents of their timezone string. More precisely, (i) the producer of a
+/// timezone-aware column must populate its constituent arrays with valid UTC values
+/// (performing offset conversions from non-UTC values if necessary); and (ii) the
+/// consumer of a timezone-aware column may assume that the column's values are directly
+/// comparable (that is, with no offset adjustment required) to the values of any other
+/// timezone-aware column or to any other valid UTC datetime value (provided all values
+/// are expressed in the same units).
+///
+/// If a TimestampType is constructed without a timezone (or, equivalently, if the
+/// timezone supplied is an empty string) then the resulting Arrow field (column) is
+/// considered "timezone-naive". The producer of a timezone-naive column may populate
+/// its constituent integer arrays with datetime values from any timezone; the consumer
+/// of a timezone-naive column should make no assumptions about the interoperability or
+/// comparability of the values of such a column with those of any other timestamp
+/// column or datetime value.
+///
+/// If a timezone-aware field contains a recognized timezone, its values may be
+/// localized to that locale upon display; the values of timezone-naive fields must
+/// always be displayed "as is", with no localization performed on them.
+class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType {
+ public:
+ using Unit = TimeUnit;
+
+ static constexpr Type::type type_id = Type::TIMESTAMP;
+ using c_type = int64_t;
+ using PhysicalType = Int64Type;
+
+ static constexpr const char* type_name() { return "timestamp"; }
+
+ int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
+
+ explicit TimestampType(TimeUnit::type unit = TimeUnit::MILLI)
+ : TemporalType(Type::TIMESTAMP), unit_(unit) {}
+
+ explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
+ : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
+
+ std::string ToString() const override;
+ std::string name() const override { return "timestamp"; }
+
+ TimeUnit::type unit() const { return unit_; }
+ const std::string& timezone() const { return timezone_; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ private:
+ TimeUnit::type unit_;
+ std::string timezone_;
+};
+
+// Base class for the different kinds of calendar intervals.
+class ARROW_EXPORT IntervalType : public TemporalType, public ParametricType {
+ public:
+ enum type { MONTHS, DAY_TIME };
+
+ virtual type interval_type() const = 0;
+
+ protected:
+ explicit IntervalType(Type::type subtype) : TemporalType(subtype) {}
+ std::string ComputeFingerprint() const override;
+};
+
+/// \brief Represents a number of months.
+///
+/// Type representing a number of months. Corresponds to YearMonth type
+/// in Schema.fbs (years are defined as 12 months).
+class ARROW_EXPORT MonthIntervalType : public IntervalType {
+ public:
+ static constexpr Type::type type_id = Type::INTERVAL_MONTHS;
+ using c_type = int32_t;
+ using PhysicalType = Int32Type;
+
+ static constexpr const char* type_name() { return "month_interval"; }
+
+ IntervalType::type interval_type() const override { return IntervalType::MONTHS; }
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ MonthIntervalType() : IntervalType(type_id) {}
+
+ std::string ToString() const override { return name(); }
+ std::string name() const override { return "month_interval"; }
+};
+
+/// \brief Represents a number of days and milliseconds (fraction of day).
+class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
+ public:
+ struct DayMilliseconds {
+ int32_t days;
+ int32_t milliseconds;
+ bool operator==(DayMilliseconds other) const {
+ return this->days == other.days && this->milliseconds == other.milliseconds;
+ }
+ bool operator!=(DayMilliseconds other) const { return !(*this == other); }
+ bool operator<(DayMilliseconds other) const {
+ return this->days < other.days || this->milliseconds < other.milliseconds;
+ }
+ };
+ using c_type = DayMilliseconds;
+ using PhysicalType = DayTimeIntervalType;
+
+ static_assert(sizeof(DayMilliseconds) == 8,
+ "DayMilliseconds struct assumed to be of size 8 bytes");
+ static constexpr Type::type type_id = Type::INTERVAL_DAY_TIME;
+
+ static constexpr const char* type_name() { return "day_time_interval"; }
+
+ IntervalType::type interval_type() const override { return IntervalType::DAY_TIME; }
+
+ DayTimeIntervalType() : IntervalType(type_id) {}
+
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
+
+ std::string ToString() const override { return name(); }
+ std::string name() const override { return "day_time_interval"; }
+};
+
+/// \brief Represents an elapsed time without any relation to a calendar artifact.
+class ARROW_EXPORT DurationType : public TemporalType, public ParametricType {
+ public:
+ using Unit = TimeUnit;
+
+ static constexpr Type::type type_id = Type::DURATION;
+ using c_type = int64_t;
+ using PhysicalType = Int64Type;
+
+ static constexpr const char* type_name() { return "duration"; }
+
+ int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
+
+ explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
+ : TemporalType(Type::DURATION), unit_(unit) {}
+
+ std::string ToString() const override;
+ std::string name() const override { return "duration"; }
+
+ TimeUnit::type unit() const { return unit_; }
+
+ protected:
+ std::string ComputeFingerprint() const override;
+
+ private:
+ TimeUnit::type unit_;
+};
+
+// ----------------------------------------------------------------------
+// Dictionary type (for representing categorical or dictionary-encoded
+// in memory)
+
+/// \brief Dictionary-encoded value type with data-dependent
+/// dictionary. Indices are represented by any integer types.
+class ARROW_EXPORT DictionaryType : public FixedWidthType {
+ public:
+ static constexpr Type::type type_id = Type::DICTIONARY;
+
+ static constexpr const char* type_name() { return "dictionary"; }
+
+ DictionaryType(const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& value_type, bool ordered = false);
+
+ // A constructor variant that validates its input parameters
+ static Result<std::shared_ptr<DataType>> Make(
+ const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& value_type, bool ordered = false);
+
+ std::string ToString() const override;
+ std::string name() const override { return "dictionary"; }
+
+ int bit_width() const override;
+
+ DataTypeLayout layout() const override;
+
+ const std::shared_ptr<DataType>& index_type() const { return index_type_; }
+ const std::shared_ptr<DataType>& value_type() const { return value_type_; }
+
+ bool ordered() const { return ordered_; }
+
+ protected:
+ static Status ValidateParameters(const DataType& index_type,
+ const DataType& value_type);
+
+ std::string ComputeFingerprint() const override;
+
+ // Must be an integer type (not currently checked)
+ std::shared_ptr<DataType> index_type_;
+ std::shared_ptr<DataType> value_type_;
+ bool ordered_;
+};
+
+// ----------------------------------------------------------------------
+// FieldRef
+
+/// \class FieldPath
+///
+/// Represents a path to a nested field using indices of child fields.
+/// For example, given indices {5, 9, 3} the field would be retrieved with
+/// schema->field(5)->type()->field(9)->type()->field(3)
+///
+/// Attempting to retrieve a child field using a FieldPath which is not valid for
+/// a given schema will raise an error. Invalid FieldPaths include:
+/// - an index is out of range
+/// - the path is empty (note: a default constructed FieldPath will be empty)
+///
+/// FieldPaths provide a number of accessors for drilling down to potentially nested
+/// children. They are overloaded for convenience to support Schema (returns a field),
+/// DataType (returns a child field), Field (returns a child field of this field's type)
/// Array (returns a child array), RecordBatch (returns a column).
-class ARROW_EXPORT FieldPath {
- public:
- FieldPath() = default;
-
- FieldPath(std::vector<int> indices) // NOLINT runtime/explicit
- : indices_(std::move(indices)) {}
-
- FieldPath(std::initializer_list<int> indices) // NOLINT runtime/explicit
- : indices_(std::move(indices)) {}
-
- std::string ToString() const;
-
- size_t hash() const;
+class ARROW_EXPORT FieldPath {
+ public:
+ FieldPath() = default;
+
+ FieldPath(std::vector<int> indices) // NOLINT runtime/explicit
+ : indices_(std::move(indices)) {}
+
+ FieldPath(std::initializer_list<int> indices) // NOLINT runtime/explicit
+ : indices_(std::move(indices)) {}
+
+ std::string ToString() const;
+
+ size_t hash() const;
struct Hash {
size_t operator()(const FieldPath& path) const { return path.hash(); }
};
-
+
bool empty() const { return indices_.empty(); }
- bool operator==(const FieldPath& other) const { return indices() == other.indices(); }
- bool operator!=(const FieldPath& other) const { return indices() != other.indices(); }
-
- const std::vector<int>& indices() const { return indices_; }
- int operator[](size_t i) const { return indices_[i]; }
- std::vector<int>::const_iterator begin() const { return indices_.begin(); }
- std::vector<int>::const_iterator end() const { return indices_.end(); }
-
- /// \brief Retrieve the referenced child Field from a Schema, Field, or DataType
- Result<std::shared_ptr<Field>> Get(const Schema& schema) const;
- Result<std::shared_ptr<Field>> Get(const Field& field) const;
- Result<std::shared_ptr<Field>> Get(const DataType& type) const;
- Result<std::shared_ptr<Field>> Get(const FieldVector& fields) const;
-
- /// \brief Retrieve the referenced column from a RecordBatch or Table
- Result<std::shared_ptr<Array>> Get(const RecordBatch& batch) const;
-
+ bool operator==(const FieldPath& other) const { return indices() == other.indices(); }
+ bool operator!=(const FieldPath& other) const { return indices() != other.indices(); }
+
+ const std::vector<int>& indices() const { return indices_; }
+ int operator[](size_t i) const { return indices_[i]; }
+ std::vector<int>::const_iterator begin() const { return indices_.begin(); }
+ std::vector<int>::const_iterator end() const { return indices_.end(); }
+
+ /// \brief Retrieve the referenced child Field from a Schema, Field, or DataType
+ Result<std::shared_ptr<Field>> Get(const Schema& schema) const;
+ Result<std::shared_ptr<Field>> Get(const Field& field) const;
+ Result<std::shared_ptr<Field>> Get(const DataType& type) const;
+ Result<std::shared_ptr<Field>> Get(const FieldVector& fields) const;
+
+ /// \brief Retrieve the referenced column from a RecordBatch or Table
+ Result<std::shared_ptr<Array>> Get(const RecordBatch& batch) const;
+
/// \brief Retrieve the referenced child from an Array or ArrayData
- Result<std::shared_ptr<Array>> Get(const Array& array) const;
+ Result<std::shared_ptr<Array>> Get(const Array& array) const;
Result<std::shared_ptr<ArrayData>> Get(const ArrayData& data) const;
-
- private:
- std::vector<int> indices_;
-};
-
-/// \class FieldRef
-/// \brief Descriptor of a (potentially nested) field within a schema.
-///
-/// Unlike FieldPath (which exclusively uses indices of child fields), FieldRef may
-/// reference a field by name. It is intended to replace parameters like `int field_index`
-/// and `const std::string& field_name`; it can be implicitly constructed from either a
-/// field index or a name.
-///
-/// Nested fields can be referenced as well. Given
-/// schema({field("a", struct_({field("n", null())})), field("b", int32())})
-///
-/// the following all indicate the nested field named "n":
-/// FieldRef ref1(0, 0);
-/// FieldRef ref2("a", 0);
-/// FieldRef ref3("a", "n");
-/// FieldRef ref4(0, "n");
-/// ARROW_ASSIGN_OR_RAISE(FieldRef ref5,
-/// FieldRef::FromDotPath(".a[0]"));
-///
-/// FieldPaths matching a FieldRef are retrieved using the member function FindAll.
-/// Multiple matches are possible because field names may be duplicated within a schema.
-/// For example:
-/// Schema a_is_ambiguous({field("a", int32()), field("a", float32())});
-/// auto matches = FieldRef("a").FindAll(a_is_ambiguous);
-/// assert(matches.size() == 2);
-/// assert(matches[0].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(0)));
-/// assert(matches[1].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(1)));
-///
-/// Convenience accessors are available which raise a helpful error if the field is not
-/// found or ambiguous, and for immediately calling FieldPath::Get to retrieve any
-/// matching children:
-/// auto maybe_match = FieldRef("struct", "field_i32").FindOneOrNone(schema);
-/// auto maybe_column = FieldRef("struct", "field_i32").GetOne(some_table);
-class ARROW_EXPORT FieldRef {
- public:
- FieldRef() = default;
-
- /// Construct a FieldRef using a string of indices. The reference will be retrieved as:
- /// schema.fields[self.indices[0]].type.fields[self.indices[1]] ...
- ///
- /// Empty indices are not valid.
- FieldRef(FieldPath indices); // NOLINT runtime/explicit
-
- /// Construct a by-name FieldRef. Multiple fields may match a by-name FieldRef:
- /// [f for f in schema.fields where f.name == self.name]
- FieldRef(std::string name) : impl_(std::move(name)) {} // NOLINT runtime/explicit
- FieldRef(const char* name) : impl_(std::string(name)) {} // NOLINT runtime/explicit
-
- /// Equivalent to a single index string of indices.
- FieldRef(int index) : impl_(FieldPath({index})) {} // NOLINT runtime/explicit
-
- /// Convenience constructor for nested FieldRefs: each argument will be used to
- /// construct a FieldRef
- template <typename A0, typename A1, typename... A>
- FieldRef(A0&& a0, A1&& a1, A&&... a) {
- Flatten({// cpplint thinks the following are constructor decls
- FieldRef(std::forward<A0>(a0)), // NOLINT runtime/explicit
- FieldRef(std::forward<A1>(a1)), // NOLINT runtime/explicit
- FieldRef(std::forward<A>(a))...}); // NOLINT runtime/explicit
- }
-
- /// Parse a dot path into a FieldRef.
- ///
- /// dot_path = '.' name
- /// | '[' digit+ ']'
- /// | dot_path+
- ///
- /// Examples:
- /// ".alpha" => FieldRef("alpha")
- /// "[2]" => FieldRef(2)
- /// ".beta[3]" => FieldRef("beta", 3)
- /// "[5].gamma.delta[7]" => FieldRef(5, "gamma", "delta", 7)
- /// ".hello world" => FieldRef("hello world")
- /// R"(.\[y\]\\tho\.\)" => FieldRef(R"([y]\tho.\)")
- ///
- /// Note: When parsing a name, a '\' preceding any other character will be dropped from
- /// the resulting name. Therefore if a name must contain the characters '.', '\', or '['
- /// those must be escaped with a preceding '\'.
- static Result<FieldRef> FromDotPath(const std::string& dot_path);
-
- bool Equals(const FieldRef& other) const { return impl_ == other.impl_; }
- bool operator==(const FieldRef& other) const { return Equals(other); }
-
- std::string ToString() const;
-
- size_t hash() const;
+
+ private:
+ std::vector<int> indices_;
+};
+
+/// \class FieldRef
+/// \brief Descriptor of a (potentially nested) field within a schema.
+///
+/// Unlike FieldPath (which exclusively uses indices of child fields), FieldRef may
+/// reference a field by name. It is intended to replace parameters like `int field_index`
+/// and `const std::string& field_name`; it can be implicitly constructed from either a
+/// field index or a name.
+///
+/// Nested fields can be referenced as well. Given
+/// schema({field("a", struct_({field("n", null())})), field("b", int32())})
+///
+/// the following all indicate the nested field named "n":
+/// FieldRef ref1(0, 0);
+/// FieldRef ref2("a", 0);
+/// FieldRef ref3("a", "n");
+/// FieldRef ref4(0, "n");
+/// ARROW_ASSIGN_OR_RAISE(FieldRef ref5,
+/// FieldRef::FromDotPath(".a[0]"));
+///
+/// FieldPaths matching a FieldRef are retrieved using the member function FindAll.
+/// Multiple matches are possible because field names may be duplicated within a schema.
+/// For example:
+/// Schema a_is_ambiguous({field("a", int32()), field("a", float32())});
+/// auto matches = FieldRef("a").FindAll(a_is_ambiguous);
+/// assert(matches.size() == 2);
+/// assert(matches[0].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(0)));
+/// assert(matches[1].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(1)));
+///
+/// Convenience accessors are available which raise a helpful error if the field is not
+/// found or ambiguous, and for immediately calling FieldPath::Get to retrieve any
+/// matching children:
+/// auto maybe_match = FieldRef("struct", "field_i32").FindOneOrNone(schema);
+/// auto maybe_column = FieldRef("struct", "field_i32").GetOne(some_table);
+class ARROW_EXPORT FieldRef {
+ public:
+ FieldRef() = default;
+
+ /// Construct a FieldRef using a string of indices. The reference will be retrieved as:
+ /// schema.fields[self.indices[0]].type.fields[self.indices[1]] ...
+ ///
+ /// Empty indices are not valid.
+ FieldRef(FieldPath indices); // NOLINT runtime/explicit
+
+ /// Construct a by-name FieldRef. Multiple fields may match a by-name FieldRef:
+ /// [f for f in schema.fields where f.name == self.name]
+ FieldRef(std::string name) : impl_(std::move(name)) {} // NOLINT runtime/explicit
+ FieldRef(const char* name) : impl_(std::string(name)) {} // NOLINT runtime/explicit
+
+ /// Equivalent to a single index string of indices.
+ FieldRef(int index) : impl_(FieldPath({index})) {} // NOLINT runtime/explicit
+
+ /// Convenience constructor for nested FieldRefs: each argument will be used to
+ /// construct a FieldRef
+ template <typename A0, typename A1, typename... A>
+ FieldRef(A0&& a0, A1&& a1, A&&... a) {
+ Flatten({// cpplint thinks the following are constructor decls
+ FieldRef(std::forward<A0>(a0)), // NOLINT runtime/explicit
+ FieldRef(std::forward<A1>(a1)), // NOLINT runtime/explicit
+ FieldRef(std::forward<A>(a))...}); // NOLINT runtime/explicit
+ }
+
+ /// Parse a dot path into a FieldRef.
+ ///
+ /// dot_path = '.' name
+ /// | '[' digit+ ']'
+ /// | dot_path+
+ ///
+ /// Examples:
+ /// ".alpha" => FieldRef("alpha")
+ /// "[2]" => FieldRef(2)
+ /// ".beta[3]" => FieldRef("beta", 3)
+ /// "[5].gamma.delta[7]" => FieldRef(5, "gamma", "delta", 7)
+ /// ".hello world" => FieldRef("hello world")
+ /// R"(.\[y\]\\tho\.\)" => FieldRef(R"([y]\tho.\)")
+ ///
+ /// Note: When parsing a name, a '\' preceding any other character will be dropped from
+ /// the resulting name. Therefore if a name must contain the characters '.', '\', or '['
+ /// those must be escaped with a preceding '\'.
+ static Result<FieldRef> FromDotPath(const std::string& dot_path);
+
+ bool Equals(const FieldRef& other) const { return impl_ == other.impl_; }
+ bool operator==(const FieldRef& other) const { return Equals(other); }
+
+ std::string ToString() const;
+
+ size_t hash() const;
struct Hash {
size_t operator()(const FieldRef& ref) const { return ref.hash(); }
};
-
+
explicit operator bool() const { return Equals(FieldPath{}); }
bool operator!() const { return !Equals(FieldPath{}); }
- bool IsFieldPath() const { return util::holds_alternative<FieldPath>(impl_); }
- bool IsName() const { return util::holds_alternative<std::string>(impl_); }
- bool IsNested() const {
- if (IsName()) return false;
- if (IsFieldPath()) return util::get<FieldPath>(impl_).indices().size() > 1;
- return true;
- }
-
+ bool IsFieldPath() const { return util::holds_alternative<FieldPath>(impl_); }
+ bool IsName() const { return util::holds_alternative<std::string>(impl_); }
+ bool IsNested() const {
+ if (IsName()) return false;
+ if (IsFieldPath()) return util::get<FieldPath>(impl_).indices().size() > 1;
+ return true;
+ }
+
const FieldPath* field_path() const {
return IsFieldPath() ? &util::get<FieldPath>(impl_) : NULLPTR;
}
@@ -1565,102 +1565,102 @@ class ARROW_EXPORT FieldRef {
return IsName() ? &util::get<std::string>(impl_) : NULLPTR;
}
- /// \brief Retrieve FieldPath of every child field which matches this FieldRef.
- std::vector<FieldPath> FindAll(const Schema& schema) const;
- std::vector<FieldPath> FindAll(const Field& field) const;
- std::vector<FieldPath> FindAll(const DataType& type) const;
- std::vector<FieldPath> FindAll(const FieldVector& fields) const;
-
- /// \brief Convenience function which applies FindAll to arg's type or schema.
+ /// \brief Retrieve FieldPath of every child field which matches this FieldRef.
+ std::vector<FieldPath> FindAll(const Schema& schema) const;
+ std::vector<FieldPath> FindAll(const Field& field) const;
+ std::vector<FieldPath> FindAll(const DataType& type) const;
+ std::vector<FieldPath> FindAll(const FieldVector& fields) const;
+
+ /// \brief Convenience function which applies FindAll to arg's type or schema.
std::vector<FieldPath> FindAll(const ArrayData& array) const;
- std::vector<FieldPath> FindAll(const Array& array) const;
- std::vector<FieldPath> FindAll(const RecordBatch& batch) const;
-
- /// \brief Convenience function: raise an error if matches is empty.
- template <typename T>
- Status CheckNonEmpty(const std::vector<FieldPath>& matches, const T& root) const {
- if (matches.empty()) {
- return Status::Invalid("No match for ", ToString(), " in ", root.ToString());
- }
- return Status::OK();
- }
-
- /// \brief Convenience function: raise an error if matches contains multiple FieldPaths.
- template <typename T>
- Status CheckNonMultiple(const std::vector<FieldPath>& matches, const T& root) const {
- if (matches.size() > 1) {
- return Status::Invalid("Multiple matches for ", ToString(), " in ",
- root.ToString());
- }
- return Status::OK();
- }
-
- /// \brief Retrieve FieldPath of a single child field which matches this
- /// FieldRef. Emit an error if none or multiple match.
- template <typename T>
- Result<FieldPath> FindOne(const T& root) const {
- auto matches = FindAll(root);
- ARROW_RETURN_NOT_OK(CheckNonEmpty(matches, root));
- ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
- return std::move(matches[0]);
- }
-
- /// \brief Retrieve FieldPath of a single child field which matches this
- /// FieldRef. Emit an error if multiple match. An empty (invalid) FieldPath
- /// will be returned if none match.
- template <typename T>
- Result<FieldPath> FindOneOrNone(const T& root) const {
- auto matches = FindAll(root);
- ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
- if (matches.empty()) {
- return FieldPath();
- }
- return std::move(matches[0]);
- }
-
- template <typename T>
- using GetType = decltype(std::declval<FieldPath>().Get(std::declval<T>()).ValueOrDie());
-
- /// \brief Get all children matching this FieldRef.
- template <typename T>
- std::vector<GetType<T>> GetAll(const T& root) const {
- std::vector<GetType<T>> out;
- for (const auto& match : FindAll(root)) {
- out.push_back(match.Get(root).ValueOrDie());
- }
- return out;
- }
-
- /// \brief Get the single child matching this FieldRef.
- /// Emit an error if none or multiple match.
- template <typename T>
- Result<GetType<T>> GetOne(const T& root) const {
- ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
- return match.Get(root).ValueOrDie();
- }
-
- /// \brief Get the single child matching this FieldRef.
- /// Return nullptr if none match, emit an error if multiple match.
- template <typename T>
- Result<GetType<T>> GetOneOrNone(const T& root) const {
- ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
+ std::vector<FieldPath> FindAll(const Array& array) const;
+ std::vector<FieldPath> FindAll(const RecordBatch& batch) const;
+
+ /// \brief Convenience function: raise an error if matches is empty.
+ template <typename T>
+ Status CheckNonEmpty(const std::vector<FieldPath>& matches, const T& root) const {
+ if (matches.empty()) {
+ return Status::Invalid("No match for ", ToString(), " in ", root.ToString());
+ }
+ return Status::OK();
+ }
+
+ /// \brief Convenience function: raise an error if matches contains multiple FieldPaths.
+ template <typename T>
+ Status CheckNonMultiple(const std::vector<FieldPath>& matches, const T& root) const {
+ if (matches.size() > 1) {
+ return Status::Invalid("Multiple matches for ", ToString(), " in ",
+ root.ToString());
+ }
+ return Status::OK();
+ }
+
+ /// \brief Retrieve FieldPath of a single child field which matches this
+ /// FieldRef. Emit an error if none or multiple match.
+ template <typename T>
+ Result<FieldPath> FindOne(const T& root) const {
+ auto matches = FindAll(root);
+ ARROW_RETURN_NOT_OK(CheckNonEmpty(matches, root));
+ ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
+ return std::move(matches[0]);
+ }
+
+ /// \brief Retrieve FieldPath of a single child field which matches this
+ /// FieldRef. Emit an error if multiple match. An empty (invalid) FieldPath
+ /// will be returned if none match.
+ template <typename T>
+ Result<FieldPath> FindOneOrNone(const T& root) const {
+ auto matches = FindAll(root);
+ ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
+ if (matches.empty()) {
+ return FieldPath();
+ }
+ return std::move(matches[0]);
+ }
+
+ template <typename T>
+ using GetType = decltype(std::declval<FieldPath>().Get(std::declval<T>()).ValueOrDie());
+
+ /// \brief Get all children matching this FieldRef.
+ template <typename T>
+ std::vector<GetType<T>> GetAll(const T& root) const {
+ std::vector<GetType<T>> out;
+ for (const auto& match : FindAll(root)) {
+ out.push_back(match.Get(root).ValueOrDie());
+ }
+ return out;
+ }
+
+ /// \brief Get the single child matching this FieldRef.
+ /// Emit an error if none or multiple match.
+ template <typename T>
+ Result<GetType<T>> GetOne(const T& root) const {
+ ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
+ return match.Get(root).ValueOrDie();
+ }
+
+ /// \brief Get the single child matching this FieldRef.
+ /// Return nullptr if none match, emit an error if multiple match.
+ template <typename T>
+ Result<GetType<T>> GetOneOrNone(const T& root) const {
+ ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
if (match.empty()) {
return static_cast<GetType<T>>(NULLPTR);
- }
+ }
return match.Get(root).ValueOrDie();
- }
-
- private:
- void Flatten(std::vector<FieldRef> children);
-
+ }
+
+ private:
+ void Flatten(std::vector<FieldRef> children);
+
util::Variant<FieldPath, std::string, std::vector<FieldRef>> impl_;
-
- ARROW_EXPORT friend void PrintTo(const FieldRef& ref, std::ostream* os);
-};
-
-// ----------------------------------------------------------------------
-// Schema
-
+
+ ARROW_EXPORT friend void PrintTo(const FieldRef& ref, std::ostream* os);
+};
+
+// ----------------------------------------------------------------------
+// Schema
+
enum class Endianness {
Little = 0,
Big = 1,
@@ -1671,27 +1671,27 @@ enum class Endianness {
#endif
};
-/// \class Schema
-/// \brief Sequence of arrow::Field objects describing the columns of a record
-/// batch or table data structure
-class ARROW_EXPORT Schema : public detail::Fingerprintable,
- public util::EqualityComparable<Schema>,
- public util::ToStringOstreamable<Schema> {
- public:
+/// \class Schema
+/// \brief Sequence of arrow::Field objects describing the columns of a record
+/// batch or table data structure
+class ARROW_EXPORT Schema : public detail::Fingerprintable,
+ public util::EqualityComparable<Schema>,
+ public util::ToStringOstreamable<Schema> {
+ public:
explicit Schema(FieldVector fields, Endianness endianness,
- std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
-
+ std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
explicit Schema(FieldVector fields,
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
- Schema(const Schema&);
-
- ~Schema() override;
-
- /// Returns true if all of the schema fields are equal
- bool Equals(const Schema& other, bool check_metadata = false) const;
- bool Equals(const std::shared_ptr<Schema>& other, bool check_metadata = false) const;
-
+ Schema(const Schema&);
+
+ ~Schema() override;
+
+ /// Returns true if all of the schema fields are equal
+ bool Equals(const Schema& other, bool check_metadata = false) const;
+ bool Equals(const std::shared_ptr<Schema>& other, bool check_metadata = false) const;
+
/// \brief Set endianness in the schema
///
/// \return new Schema
@@ -1703,228 +1703,228 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
/// \brief Indicate if endianness is equal to platform-native endianness
bool is_native_endian() const;
- /// \brief Return the number of fields (columns) in the schema
- int num_fields() const;
-
- /// Return the ith schema element. Does not boundscheck
- const std::shared_ptr<Field>& field(int i) const;
-
+ /// \brief Return the number of fields (columns) in the schema
+ int num_fields() const;
+
+ /// Return the ith schema element. Does not boundscheck
+ const std::shared_ptr<Field>& field(int i) const;
+
const FieldVector& fields() const;
-
- std::vector<std::string> field_names() const;
-
- /// Returns null if name not found
- std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
-
- /// \brief Return the indices of all fields having this name in sorted order
+
+ std::vector<std::string> field_names() const;
+
+ /// Returns null if name not found
+ std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
+
+ /// \brief Return the indices of all fields having this name in sorted order
FieldVector GetAllFieldsByName(const std::string& name) const;
-
- /// Returns -1 if name not found
- int GetFieldIndex(const std::string& name) const;
-
- /// Return the indices of all fields having this name
- std::vector<int> GetAllFieldIndices(const std::string& name) const;
-
- /// Indicate if fields named `names` can be found unambiguously in the schema.
- Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;
-
- /// \brief The custom key-value metadata, if any
- ///
- /// \return metadata may be null
+
+ /// Returns -1 if name not found
+ int GetFieldIndex(const std::string& name) const;
+
+ /// Return the indices of all fields having this name
+ std::vector<int> GetAllFieldIndices(const std::string& name) const;
+
+ /// Indicate if fields named `names` can be found unambiguously in the schema.
+ Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;
+
+ /// \brief The custom key-value metadata, if any
+ ///
+ /// \return metadata may be null
const std::shared_ptr<const KeyValueMetadata>& metadata() const;
-
- /// \brief Render a string representation of the schema suitable for debugging
- /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
- /// print keys and values in the output
- std::string ToString(bool show_metadata = false) const;
-
- Result<std::shared_ptr<Schema>> AddField(int i,
- const std::shared_ptr<Field>& field) const;
- Result<std::shared_ptr<Schema>> RemoveField(int i) const;
- Result<std::shared_ptr<Schema>> SetField(int i,
- const std::shared_ptr<Field>& field) const;
-
- /// \brief Replace key-value metadata with new metadata
- ///
- /// \param[in] metadata new KeyValueMetadata
- /// \return new Schema
- std::shared_ptr<Schema> WithMetadata(
- const std::shared_ptr<const KeyValueMetadata>& metadata) const;
-
- /// \brief Return copy of Schema without the KeyValueMetadata
- std::shared_ptr<Schema> RemoveMetadata() const;
-
- /// \brief Indicate that the Schema has non-empty KevValueMetadata
- bool HasMetadata() const;
-
- /// \brief Indicate that the Schema has distinct field names.
- bool HasDistinctFieldNames() const;
-
- protected:
- std::string ComputeFingerprint() const override;
- std::string ComputeMetadataFingerprint() const override;
-
- private:
- ARROW_EXPORT friend void PrintTo(const Schema& s, std::ostream* os);
-
- class Impl;
- std::unique_ptr<Impl> impl_;
-};
-
+
+ /// \brief Render a string representation of the schema suitable for debugging
+ /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
+ /// print keys and values in the output
+ std::string ToString(bool show_metadata = false) const;
+
+ Result<std::shared_ptr<Schema>> AddField(int i,
+ const std::shared_ptr<Field>& field) const;
+ Result<std::shared_ptr<Schema>> RemoveField(int i) const;
+ Result<std::shared_ptr<Schema>> SetField(int i,
+ const std::shared_ptr<Field>& field) const;
+
+ /// \brief Replace key-value metadata with new metadata
+ ///
+ /// \param[in] metadata new KeyValueMetadata
+ /// \return new Schema
+ std::shared_ptr<Schema> WithMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& metadata) const;
+
+ /// \brief Return copy of Schema without the KeyValueMetadata
+ std::shared_ptr<Schema> RemoveMetadata() const;
+
+ /// \brief Indicate that the Schema has non-empty KevValueMetadata
+ bool HasMetadata() const;
+
+ /// \brief Indicate that the Schema has distinct field names.
+ bool HasDistinctFieldNames() const;
+
+ protected:
+ std::string ComputeFingerprint() const override;
+ std::string ComputeMetadataFingerprint() const override;
+
+ private:
+ ARROW_EXPORT friend void PrintTo(const Schema& s, std::ostream* os);
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
ARROW_EXPORT
std::string EndiannessToString(Endianness endianness);
-// ----------------------------------------------------------------------
-
-/// \brief Convenience class to incrementally construct/merge schemas.
-///
-/// This class amortizes the cost of validating field name conflicts by
-/// maintaining the mapping. The caller also controls the conflict resolution
-/// scheme.
-class ARROW_EXPORT SchemaBuilder {
- public:
- // Indicate how field conflict(s) should be resolved when building a schema. A
- // conflict arise when a field is added to the builder and one or more field(s)
- // with the same name already exists.
- enum ConflictPolicy {
- // Ignore the conflict and append the field. This is the default behavior of the
- // Schema constructor and the `arrow::schema` factory function.
- CONFLICT_APPEND = 0,
- // Keep the existing field and ignore the newer one.
- CONFLICT_IGNORE,
- // Replace the existing field with the newer one.
- CONFLICT_REPLACE,
- // Merge the fields. The merging behavior can be controlled by `Field::MergeOptions`
- // specified at construction time. Also see documentation of `Field::MergeWith`.
- CONFLICT_MERGE,
- // Refuse the new field and error out.
- CONFLICT_ERROR
- };
-
- /// \brief Construct an empty SchemaBuilder
+// ----------------------------------------------------------------------
+
+/// \brief Convenience class to incrementally construct/merge schemas.
+///
+/// This class amortizes the cost of validating field name conflicts by
+/// maintaining the mapping. The caller also controls the conflict resolution
+/// scheme.
+class ARROW_EXPORT SchemaBuilder {
+ public:
+ // Indicate how field conflict(s) should be resolved when building a schema. A
+ // conflict arise when a field is added to the builder and one or more field(s)
+ // with the same name already exists.
+ enum ConflictPolicy {
+ // Ignore the conflict and append the field. This is the default behavior of the
+ // Schema constructor and the `arrow::schema` factory function.
+ CONFLICT_APPEND = 0,
+ // Keep the existing field and ignore the newer one.
+ CONFLICT_IGNORE,
+ // Replace the existing field with the newer one.
+ CONFLICT_REPLACE,
+ // Merge the fields. The merging behavior can be controlled by `Field::MergeOptions`
+ // specified at construction time. Also see documentation of `Field::MergeWith`.
+ CONFLICT_MERGE,
+ // Refuse the new field and error out.
+ CONFLICT_ERROR
+ };
+
+ /// \brief Construct an empty SchemaBuilder
/// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
- SchemaBuilder(
- ConflictPolicy conflict_policy = CONFLICT_APPEND,
- Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
- /// \brief Construct a SchemaBuilder from a list of fields
+ SchemaBuilder(
+ ConflictPolicy conflict_policy = CONFLICT_APPEND,
+ Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
+ /// \brief Construct a SchemaBuilder from a list of fields
/// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
- SchemaBuilder(
- std::vector<std::shared_ptr<Field>> fields,
- ConflictPolicy conflict_policy = CONFLICT_APPEND,
- Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
- /// \brief Construct a SchemaBuilder from a schema, preserving the metadata
+ SchemaBuilder(
+ std::vector<std::shared_ptr<Field>> fields,
+ ConflictPolicy conflict_policy = CONFLICT_APPEND,
+ Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
+ /// \brief Construct a SchemaBuilder from a schema, preserving the metadata
/// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
- SchemaBuilder(
- const std::shared_ptr<Schema>& schema,
- ConflictPolicy conflict_policy = CONFLICT_APPEND,
- Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
-
- /// \brief Return the conflict resolution method.
- ConflictPolicy policy() const;
-
- /// \brief Set the conflict resolution method.
- void SetPolicy(ConflictPolicy resolution);
-
- /// \brief Add a field to the constructed schema.
- ///
- /// \param[in] field to add to the constructed Schema.
- /// \return A failure if encountered.
- Status AddField(const std::shared_ptr<Field>& field);
-
- /// \brief Add multiple fields to the constructed schema.
- ///
- /// \param[in] fields to add to the constructed Schema.
- /// \return The first failure encountered, if any.
- Status AddFields(const std::vector<std::shared_ptr<Field>>& fields);
-
- /// \brief Add fields of a Schema to the constructed Schema.
- ///
- /// \param[in] schema to take fields to add to the constructed Schema.
- /// \return The first failure encountered, if any.
- Status AddSchema(const std::shared_ptr<Schema>& schema);
-
- /// \brief Add fields of multiple Schemas to the constructed Schema.
- ///
- /// \param[in] schemas to take fields to add to the constructed Schema.
- /// \return The first failure encountered, if any.
- Status AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas);
-
- Status AddMetadata(const KeyValueMetadata& metadata);
-
- /// \brief Return the constructed Schema.
- ///
- /// The builder internal state is not affected by invoking this method, i.e.
- /// a single builder can yield multiple incrementally constructed schemas.
- ///
- /// \return the constructed schema.
- Result<std::shared_ptr<Schema>> Finish() const;
-
- /// \brief Merge schemas in a unified schema according to policy.
- static Result<std::shared_ptr<Schema>> Merge(
- const std::vector<std::shared_ptr<Schema>>& schemas,
- ConflictPolicy policy = CONFLICT_MERGE);
-
- /// \brief Indicate if schemas are compatible to merge according to policy.
- static Status AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
- ConflictPolicy policy = CONFLICT_MERGE);
-
- /// \brief Reset internal state with an empty schema (and metadata).
- void Reset();
-
- ~SchemaBuilder();
-
- private:
- class Impl;
- std::unique_ptr<Impl> impl_;
-
- Status AppendField(const std::shared_ptr<Field>& field);
-};
-
-/// \brief Unifies schemas by merging fields by name.
-///
-/// The behavior of field merging can be controlled via `Field::MergeOptions`.
-///
-/// The resulting schema will contain the union of fields from all schemas.
-/// Fields with the same name will be merged. See `Field::MergeOptions`.
-/// - They are expected to be mergeable under provided `field_merge_options`.
-/// - The unified field will inherit the metadata from the schema where
-/// that field is first defined.
-/// - The first N fields in the schema will be ordered the same as the
-/// N fields in the first schema.
-/// The resulting schema will inherit its metadata from the first input schema.
-/// Returns an error if:
-/// - Any input schema contains fields with duplicate names.
-/// - Fields of the same name are not mergeable.
-ARROW_EXPORT
-Result<std::shared_ptr<Schema>> UnifySchemas(
- const std::vector<std::shared_ptr<Schema>>& schemas,
- Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
-
-namespace internal {
-
-static inline bool HasValidityBitmap(Type::type id) {
- switch (id) {
- case Type::NA:
- case Type::DENSE_UNION:
- case Type::SPARSE_UNION:
- return false;
- default:
- return true;
- }
-}
-
-ARROW_EXPORT
-std::string ToString(Type::type id);
-
-ARROW_EXPORT
-std::string ToTypeName(Type::type id);
-
-ARROW_EXPORT
-std::string ToString(TimeUnit::type unit);
-
-ARROW_EXPORT
-int GetByteWidth(const DataType& type);
-
-} // namespace internal
-
-} // namespace arrow
+ SchemaBuilder(
+ const std::shared_ptr<Schema>& schema,
+ ConflictPolicy conflict_policy = CONFLICT_APPEND,
+ Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
+
+ /// \brief Return the conflict resolution method.
+ ConflictPolicy policy() const;
+
+ /// \brief Set the conflict resolution method.
+ void SetPolicy(ConflictPolicy resolution);
+
+ /// \brief Add a field to the constructed schema.
+ ///
+ /// \param[in] field to add to the constructed Schema.
+ /// \return A failure if encountered.
+ Status AddField(const std::shared_ptr<Field>& field);
+
+ /// \brief Add multiple fields to the constructed schema.
+ ///
+ /// \param[in] fields to add to the constructed Schema.
+ /// \return The first failure encountered, if any.
+ Status AddFields(const std::vector<std::shared_ptr<Field>>& fields);
+
+ /// \brief Add fields of a Schema to the constructed Schema.
+ ///
+ /// \param[in] schema to take fields to add to the constructed Schema.
+ /// \return The first failure encountered, if any.
+ Status AddSchema(const std::shared_ptr<Schema>& schema);
+
+ /// \brief Add fields of multiple Schemas to the constructed Schema.
+ ///
+ /// \param[in] schemas to take fields to add to the constructed Schema.
+ /// \return The first failure encountered, if any.
+ Status AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas);
+
+ Status AddMetadata(const KeyValueMetadata& metadata);
+
+ /// \brief Return the constructed Schema.
+ ///
+ /// The builder internal state is not affected by invoking this method, i.e.
+ /// a single builder can yield multiple incrementally constructed schemas.
+ ///
+ /// \return the constructed schema.
+ Result<std::shared_ptr<Schema>> Finish() const;
+
+ /// \brief Merge schemas in a unified schema according to policy.
+ static Result<std::shared_ptr<Schema>> Merge(
+ const std::vector<std::shared_ptr<Schema>>& schemas,
+ ConflictPolicy policy = CONFLICT_MERGE);
+
+ /// \brief Indicate if schemas are compatible to merge according to policy.
+ static Status AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
+ ConflictPolicy policy = CONFLICT_MERGE);
+
+ /// \brief Reset internal state with an empty schema (and metadata).
+ void Reset();
+
+ ~SchemaBuilder();
+
+ private:
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+
+ Status AppendField(const std::shared_ptr<Field>& field);
+};
+
+/// \brief Unifies schemas by merging fields by name.
+///
+/// The behavior of field merging can be controlled via `Field::MergeOptions`.
+///
+/// The resulting schema will contain the union of fields from all schemas.
+/// Fields with the same name will be merged. See `Field::MergeOptions`.
+/// - They are expected to be mergeable under provided `field_merge_options`.
+/// - The unified field will inherit the metadata from the schema where
+/// that field is first defined.
+/// - The first N fields in the schema will be ordered the same as the
+/// N fields in the first schema.
+/// The resulting schema will inherit its metadata from the first input schema.
+/// Returns an error if:
+/// - Any input schema contains fields with duplicate names.
+/// - Fields of the same name are not mergeable.
+ARROW_EXPORT
+Result<std::shared_ptr<Schema>> UnifySchemas(
+ const std::vector<std::shared_ptr<Schema>>& schemas,
+ Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
+
+namespace internal {
+
+static inline bool HasValidityBitmap(Type::type id) {
+ switch (id) {
+ case Type::NA:
+ case Type::DENSE_UNION:
+ case Type::SPARSE_UNION:
+ return false;
+ default:
+ return true;
+ }
+}
+
+ARROW_EXPORT
+std::string ToString(Type::type id);
+
+ARROW_EXPORT
+std::string ToTypeName(Type::type id);
+
+ARROW_EXPORT
+std::string ToString(TimeUnit::type unit);
+
+ARROW_EXPORT
+int GetByteWidth(const DataType& type);
+
+} // namespace internal
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
index 7e564106bbe..be2a85b0a59 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
@@ -1,459 +1,459 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-template <typename T>
-class Iterator;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+template <typename T>
+class Iterator;
template <typename T>
struct IterationTraits;
-
-template <typename T>
-class Result;
-
-class Status;
-
+
+template <typename T>
+class Result;
+
+class Status;
+
namespace internal {
struct Empty;
} // namespace internal
template <typename T = internal::Empty>
class Future;
-namespace util {
-class Codec;
-} // namespace util
-
-class Buffer;
-class Device;
-class MemoryManager;
-class MemoryPool;
-class MutableBuffer;
-class ResizableBuffer;
-
-using BufferVector = std::vector<std::shared_ptr<Buffer>>;
-
-class DataType;
-class Field;
-class FieldRef;
-class KeyValueMetadata;
+namespace util {
+class Codec;
+} // namespace util
+
+class Buffer;
+class Device;
+class MemoryManager;
+class MemoryPool;
+class MutableBuffer;
+class ResizableBuffer;
+
+using BufferVector = std::vector<std::shared_ptr<Buffer>>;
+
+class DataType;
+class Field;
+class FieldRef;
+class KeyValueMetadata;
enum class Endianness;
-class Schema;
-
-using DataTypeVector = std::vector<std::shared_ptr<DataType>>;
-using FieldVector = std::vector<std::shared_ptr<Field>>;
-
-class Array;
-struct ArrayData;
-class ArrayBuilder;
-struct Scalar;
-
-using ArrayDataVector = std::vector<std::shared_ptr<ArrayData>>;
-using ArrayVector = std::vector<std::shared_ptr<Array>>;
-using ScalarVector = std::vector<std::shared_ptr<Scalar>>;
-
-class ChunkedArray;
-class RecordBatch;
-class RecordBatchReader;
-class Table;
-
+class Schema;
+
+using DataTypeVector = std::vector<std::shared_ptr<DataType>>;
+using FieldVector = std::vector<std::shared_ptr<Field>>;
+
+class Array;
+struct ArrayData;
+class ArrayBuilder;
+struct Scalar;
+
+using ArrayDataVector = std::vector<std::shared_ptr<ArrayData>>;
+using ArrayVector = std::vector<std::shared_ptr<Array>>;
+using ScalarVector = std::vector<std::shared_ptr<Scalar>>;
+
+class ChunkedArray;
+class RecordBatch;
+class RecordBatchReader;
+class Table;
+
struct Datum;
struct ValueDescr;
-using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
-using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
-using RecordBatchIterator = Iterator<std::shared_ptr<RecordBatch>>;
-
-class DictionaryType;
-class DictionaryArray;
-struct DictionaryScalar;
-
-class NullType;
-class NullArray;
-class NullBuilder;
-struct NullScalar;
-
-class FixedWidthType;
-
-class BooleanType;
-class BooleanArray;
-class BooleanBuilder;
-struct BooleanScalar;
-
-class BinaryType;
-class BinaryArray;
-class BinaryBuilder;
-struct BinaryScalar;
-
-class LargeBinaryType;
-class LargeBinaryArray;
-class LargeBinaryBuilder;
-struct LargeBinaryScalar;
-
-class FixedSizeBinaryType;
-class FixedSizeBinaryArray;
-class FixedSizeBinaryBuilder;
-struct FixedSizeBinaryScalar;
-
-class StringType;
-class StringArray;
-class StringBuilder;
-struct StringScalar;
-
-class LargeStringType;
-class LargeStringArray;
-class LargeStringBuilder;
-struct LargeStringScalar;
-
-class ListType;
-class ListArray;
-class ListBuilder;
-struct ListScalar;
-
-class LargeListType;
-class LargeListArray;
-class LargeListBuilder;
-struct LargeListScalar;
-
-class MapType;
-class MapArray;
-class MapBuilder;
-struct MapScalar;
-
-class FixedSizeListType;
-class FixedSizeListArray;
-class FixedSizeListBuilder;
-struct FixedSizeListScalar;
-
-class StructType;
-class StructArray;
-class StructBuilder;
-struct StructScalar;
-
-class Decimal128;
+using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
+using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
+using RecordBatchIterator = Iterator<std::shared_ptr<RecordBatch>>;
+
+class DictionaryType;
+class DictionaryArray;
+struct DictionaryScalar;
+
+class NullType;
+class NullArray;
+class NullBuilder;
+struct NullScalar;
+
+class FixedWidthType;
+
+class BooleanType;
+class BooleanArray;
+class BooleanBuilder;
+struct BooleanScalar;
+
+class BinaryType;
+class BinaryArray;
+class BinaryBuilder;
+struct BinaryScalar;
+
+class LargeBinaryType;
+class LargeBinaryArray;
+class LargeBinaryBuilder;
+struct LargeBinaryScalar;
+
+class FixedSizeBinaryType;
+class FixedSizeBinaryArray;
+class FixedSizeBinaryBuilder;
+struct FixedSizeBinaryScalar;
+
+class StringType;
+class StringArray;
+class StringBuilder;
+struct StringScalar;
+
+class LargeStringType;
+class LargeStringArray;
+class LargeStringBuilder;
+struct LargeStringScalar;
+
+class ListType;
+class ListArray;
+class ListBuilder;
+struct ListScalar;
+
+class LargeListType;
+class LargeListArray;
+class LargeListBuilder;
+struct LargeListScalar;
+
+class MapType;
+class MapArray;
+class MapBuilder;
+struct MapScalar;
+
+class FixedSizeListType;
+class FixedSizeListArray;
+class FixedSizeListBuilder;
+struct FixedSizeListScalar;
+
+class StructType;
+class StructArray;
+class StructBuilder;
+struct StructScalar;
+
+class Decimal128;
class Decimal256;
-class DecimalType;
-class Decimal128Type;
+class DecimalType;
+class Decimal128Type;
class Decimal256Type;
-class Decimal128Array;
+class Decimal128Array;
class Decimal256Array;
-class Decimal128Builder;
+class Decimal128Builder;
class Decimal256Builder;
-struct Decimal128Scalar;
+struct Decimal128Scalar;
struct Decimal256Scalar;
-
-struct UnionMode {
- enum type { SPARSE, DENSE };
-};
-
-class SparseUnionType;
-class SparseUnionArray;
-class SparseUnionBuilder;
-struct SparseUnionScalar;
-
-class DenseUnionType;
-class DenseUnionArray;
-class DenseUnionBuilder;
-struct DenseUnionScalar;
-
-template <typename TypeClass>
-class NumericArray;
-
-template <typename TypeClass>
-class NumericBuilder;
-
-template <typename TypeClass>
-class NumericTensor;
-
-#define _NUMERIC_TYPE_DECL(KLASS) \
- class KLASS##Type; \
- using KLASS##Array = NumericArray<KLASS##Type>; \
- using KLASS##Builder = NumericBuilder<KLASS##Type>; \
- struct KLASS##Scalar; \
- using KLASS##Tensor = NumericTensor<KLASS##Type>;
-
-_NUMERIC_TYPE_DECL(Int8)
-_NUMERIC_TYPE_DECL(Int16)
-_NUMERIC_TYPE_DECL(Int32)
-_NUMERIC_TYPE_DECL(Int64)
-_NUMERIC_TYPE_DECL(UInt8)
-_NUMERIC_TYPE_DECL(UInt16)
-_NUMERIC_TYPE_DECL(UInt32)
-_NUMERIC_TYPE_DECL(UInt64)
-_NUMERIC_TYPE_DECL(HalfFloat)
-_NUMERIC_TYPE_DECL(Float)
-_NUMERIC_TYPE_DECL(Double)
-
-#undef _NUMERIC_TYPE_DECL
-
-enum class DateUnit : char { DAY = 0, MILLI = 1 };
-
-class DateType;
-class Date32Type;
-using Date32Array = NumericArray<Date32Type>;
-using Date32Builder = NumericBuilder<Date32Type>;
-struct Date32Scalar;
-
-class Date64Type;
-using Date64Array = NumericArray<Date64Type>;
-using Date64Builder = NumericBuilder<Date64Type>;
-struct Date64Scalar;
-
-struct TimeUnit {
- /// The unit for a time or timestamp DataType
- enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
-};
-
-class TimeType;
-class Time32Type;
-using Time32Array = NumericArray<Time32Type>;
-using Time32Builder = NumericBuilder<Time32Type>;
-struct Time32Scalar;
-
-class Time64Type;
-using Time64Array = NumericArray<Time64Type>;
-using Time64Builder = NumericBuilder<Time64Type>;
-struct Time64Scalar;
-
-class TimestampType;
-using TimestampArray = NumericArray<TimestampType>;
-using TimestampBuilder = NumericBuilder<TimestampType>;
-struct TimestampScalar;
-
-class MonthIntervalType;
-using MonthIntervalArray = NumericArray<MonthIntervalType>;
-using MonthIntervalBuilder = NumericBuilder<MonthIntervalType>;
-struct MonthIntervalScalar;
-
-class DayTimeIntervalType;
-class DayTimeIntervalArray;
-class DayTimeIntervalBuilder;
-struct DayTimeIntervalScalar;
-
-class DurationType;
-using DurationArray = NumericArray<DurationType>;
-using DurationBuilder = NumericBuilder<DurationType>;
-struct DurationScalar;
-
-class ExtensionType;
-class ExtensionArray;
-struct ExtensionScalar;
-
+
+struct UnionMode {
+ enum type { SPARSE, DENSE };
+};
+
+class SparseUnionType;
+class SparseUnionArray;
+class SparseUnionBuilder;
+struct SparseUnionScalar;
+
+class DenseUnionType;
+class DenseUnionArray;
+class DenseUnionBuilder;
+struct DenseUnionScalar;
+
+template <typename TypeClass>
+class NumericArray;
+
+template <typename TypeClass>
+class NumericBuilder;
+
+template <typename TypeClass>
+class NumericTensor;
+
+#define _NUMERIC_TYPE_DECL(KLASS) \
+ class KLASS##Type; \
+ using KLASS##Array = NumericArray<KLASS##Type>; \
+ using KLASS##Builder = NumericBuilder<KLASS##Type>; \
+ struct KLASS##Scalar; \
+ using KLASS##Tensor = NumericTensor<KLASS##Type>;
+
+_NUMERIC_TYPE_DECL(Int8)
+_NUMERIC_TYPE_DECL(Int16)
+_NUMERIC_TYPE_DECL(Int32)
+_NUMERIC_TYPE_DECL(Int64)
+_NUMERIC_TYPE_DECL(UInt8)
+_NUMERIC_TYPE_DECL(UInt16)
+_NUMERIC_TYPE_DECL(UInt32)
+_NUMERIC_TYPE_DECL(UInt64)
+_NUMERIC_TYPE_DECL(HalfFloat)
+_NUMERIC_TYPE_DECL(Float)
+_NUMERIC_TYPE_DECL(Double)
+
+#undef _NUMERIC_TYPE_DECL
+
+enum class DateUnit : char { DAY = 0, MILLI = 1 };
+
+class DateType;
+class Date32Type;
+using Date32Array = NumericArray<Date32Type>;
+using Date32Builder = NumericBuilder<Date32Type>;
+struct Date32Scalar;
+
+class Date64Type;
+using Date64Array = NumericArray<Date64Type>;
+using Date64Builder = NumericBuilder<Date64Type>;
+struct Date64Scalar;
+
+struct TimeUnit {
+ /// The unit for a time or timestamp DataType
+ enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
+};
+
+class TimeType;
+class Time32Type;
+using Time32Array = NumericArray<Time32Type>;
+using Time32Builder = NumericBuilder<Time32Type>;
+struct Time32Scalar;
+
+class Time64Type;
+using Time64Array = NumericArray<Time64Type>;
+using Time64Builder = NumericBuilder<Time64Type>;
+struct Time64Scalar;
+
+class TimestampType;
+using TimestampArray = NumericArray<TimestampType>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
+struct TimestampScalar;
+
+class MonthIntervalType;
+using MonthIntervalArray = NumericArray<MonthIntervalType>;
+using MonthIntervalBuilder = NumericBuilder<MonthIntervalType>;
+struct MonthIntervalScalar;
+
+class DayTimeIntervalType;
+class DayTimeIntervalArray;
+class DayTimeIntervalBuilder;
+struct DayTimeIntervalScalar;
+
+class DurationType;
+using DurationArray = NumericArray<DurationType>;
+using DurationBuilder = NumericBuilder<DurationType>;
+struct DurationScalar;
+
+class ExtensionType;
+class ExtensionArray;
+struct ExtensionScalar;
+
class Tensor;
class SparseTensor;
-// ----------------------------------------------------------------------
-
-struct Type {
- /// \brief Main data type enumeration
- ///
- /// This enumeration provides a quick way to interrogate the category
- /// of a DataType instance.
- enum type {
- /// A NULL type having no physical storage
- NA = 0,
-
- /// Boolean as 1 bit, LSB bit-packed ordering
- BOOL,
-
- /// Unsigned 8-bit little-endian integer
- UINT8,
-
- /// Signed 8-bit little-endian integer
- INT8,
-
- /// Unsigned 16-bit little-endian integer
- UINT16,
-
- /// Signed 16-bit little-endian integer
- INT16,
-
- /// Unsigned 32-bit little-endian integer
- UINT32,
-
- /// Signed 32-bit little-endian integer
- INT32,
-
- /// Unsigned 64-bit little-endian integer
- UINT64,
-
- /// Signed 64-bit little-endian integer
- INT64,
-
- /// 2-byte floating point value
- HALF_FLOAT,
-
- /// 4-byte floating point value
- FLOAT,
-
- /// 8-byte floating point value
- DOUBLE,
-
- /// UTF8 variable-length string as List<Char>
- STRING,
-
- /// Variable-length bytes (no guarantee of UTF8-ness)
- BINARY,
-
- /// Fixed-size binary. Each value occupies the same number of bytes
- FIXED_SIZE_BINARY,
-
- /// int32_t days since the UNIX epoch
- DATE32,
-
- /// int64_t milliseconds since the UNIX epoch
- DATE64,
-
- /// Exact timestamp encoded with int64 since UNIX epoch
- /// Default unit millisecond
- TIMESTAMP,
-
- /// Time as signed 32-bit integer, representing either seconds or
- /// milliseconds since midnight
- TIME32,
-
- /// Time as signed 64-bit integer, representing either microseconds or
- /// nanoseconds since midnight
- TIME64,
-
- /// YEAR_MONTH interval in SQL style
- INTERVAL_MONTHS,
-
- /// DAY_TIME interval in SQL style
- INTERVAL_DAY_TIME,
-
+// ----------------------------------------------------------------------
+
+struct Type {
+ /// \brief Main data type enumeration
+ ///
+ /// This enumeration provides a quick way to interrogate the category
+ /// of a DataType instance.
+ enum type {
+ /// A NULL type having no physical storage
+ NA = 0,
+
+ /// Boolean as 1 bit, LSB bit-packed ordering
+ BOOL,
+
+ /// Unsigned 8-bit little-endian integer
+ UINT8,
+
+ /// Signed 8-bit little-endian integer
+ INT8,
+
+ /// Unsigned 16-bit little-endian integer
+ UINT16,
+
+ /// Signed 16-bit little-endian integer
+ INT16,
+
+ /// Unsigned 32-bit little-endian integer
+ UINT32,
+
+ /// Signed 32-bit little-endian integer
+ INT32,
+
+ /// Unsigned 64-bit little-endian integer
+ UINT64,
+
+ /// Signed 64-bit little-endian integer
+ INT64,
+
+ /// 2-byte floating point value
+ HALF_FLOAT,
+
+ /// 4-byte floating point value
+ FLOAT,
+
+ /// 8-byte floating point value
+ DOUBLE,
+
+ /// UTF8 variable-length string as List<Char>
+ STRING,
+
+ /// Variable-length bytes (no guarantee of UTF8-ness)
+ BINARY,
+
+ /// Fixed-size binary. Each value occupies the same number of bytes
+ FIXED_SIZE_BINARY,
+
+ /// int32_t days since the UNIX epoch
+ DATE32,
+
+ /// int64_t milliseconds since the UNIX epoch
+ DATE64,
+
+ /// Exact timestamp encoded with int64 since UNIX epoch
+ /// Default unit millisecond
+ TIMESTAMP,
+
+ /// Time as signed 32-bit integer, representing either seconds or
+ /// milliseconds since midnight
+ TIME32,
+
+ /// Time as signed 64-bit integer, representing either microseconds or
+ /// nanoseconds since midnight
+ TIME64,
+
+ /// YEAR_MONTH interval in SQL style
+ INTERVAL_MONTHS,
+
+ /// DAY_TIME interval in SQL style
+ INTERVAL_DAY_TIME,
+
/// Precision- and scale-based decimal type with 128 bits.
DECIMAL128,
-
+
/// Defined for backward-compatibility.
DECIMAL = DECIMAL128,
/// Precision- and scale-based decimal type with 256 bits.
DECIMAL256,
- /// A list of some logical data type
- LIST,
-
- /// Struct of logical types
- STRUCT,
-
- /// Sparse unions of logical types
- SPARSE_UNION,
-
- /// Dense unions of logical types
- DENSE_UNION,
-
- /// Dictionary-encoded type, also called "categorical" or "factor"
- /// in other programming languages. Holds the dictionary value
- /// type but not the dictionary itself, which is part of the
- /// ArrayData struct
- DICTIONARY,
-
- /// Map, a repeated struct logical type
- MAP,
-
- /// Custom data type, implemented by user
- EXTENSION,
-
- /// Fixed size list of some logical type
- FIXED_SIZE_LIST,
-
- /// Measure of elapsed time in either seconds, milliseconds, microseconds
- /// or nanoseconds.
- DURATION,
-
- /// Like STRING, but with 64-bit offsets
- LARGE_STRING,
-
- /// Like BINARY, but with 64-bit offsets
- LARGE_BINARY,
-
- /// Like LIST, but with 64-bit offsets
- LARGE_LIST,
-
- // Leave this at the end
- MAX_ID
- };
-};
-
-/// \defgroup type-factories Factory functions for creating data types
-///
-/// Factory functions for creating data types
-/// @{
-
-/// \brief Return a NullType instance
-std::shared_ptr<DataType> ARROW_EXPORT null();
-/// \brief Return a BooleanType instance
-std::shared_ptr<DataType> ARROW_EXPORT boolean();
-/// \brief Return a Int8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int8();
-/// \brief Return a Int16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int16();
-/// \brief Return a Int32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int32();
-/// \brief Return a Int64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int64();
-/// \brief Return a UInt8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint8();
-/// \brief Return a UInt16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint16();
-/// \brief Return a UInt32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint32();
-/// \brief Return a UInt64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint64();
-/// \brief Return a HalfFloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float16();
-/// \brief Return a FloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float32();
-/// \brief Return a DoubleType instance
-std::shared_ptr<DataType> ARROW_EXPORT float64();
-/// \brief Return a StringType instance
-std::shared_ptr<DataType> ARROW_EXPORT utf8();
-/// \brief Return a LargeStringType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
-/// \brief Return a BinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT binary();
-/// \brief Return a LargeBinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_binary();
-/// \brief Return a Date32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date32();
-/// \brief Return a Date64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date64();
-
-/// \brief Create a FixedSizeBinaryType instance.
-ARROW_EXPORT
-std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
-
+ /// A list of some logical data type
+ LIST,
+
+ /// Struct of logical types
+ STRUCT,
+
+ /// Sparse unions of logical types
+ SPARSE_UNION,
+
+ /// Dense unions of logical types
+ DENSE_UNION,
+
+ /// Dictionary-encoded type, also called "categorical" or "factor"
+ /// in other programming languages. Holds the dictionary value
+ /// type but not the dictionary itself, which is part of the
+ /// ArrayData struct
+ DICTIONARY,
+
+ /// Map, a repeated struct logical type
+ MAP,
+
+ /// Custom data type, implemented by user
+ EXTENSION,
+
+ /// Fixed size list of some logical type
+ FIXED_SIZE_LIST,
+
+ /// Measure of elapsed time in either seconds, milliseconds, microseconds
+ /// or nanoseconds.
+ DURATION,
+
+ /// Like STRING, but with 64-bit offsets
+ LARGE_STRING,
+
+ /// Like BINARY, but with 64-bit offsets
+ LARGE_BINARY,
+
+ /// Like LIST, but with 64-bit offsets
+ LARGE_LIST,
+
+ // Leave this at the end
+ MAX_ID
+ };
+};
+
+/// \defgroup type-factories Factory functions for creating data types
+///
+/// Factory functions for creating data types
+/// @{
+
+/// \brief Return a NullType instance
+std::shared_ptr<DataType> ARROW_EXPORT null();
+/// \brief Return a BooleanType instance
+std::shared_ptr<DataType> ARROW_EXPORT boolean();
+/// \brief Return a Int8Type instance
+std::shared_ptr<DataType> ARROW_EXPORT int8();
+/// \brief Return a Int16Type instance
+std::shared_ptr<DataType> ARROW_EXPORT int16();
+/// \brief Return a Int32Type instance
+std::shared_ptr<DataType> ARROW_EXPORT int32();
+/// \brief Return a Int64Type instance
+std::shared_ptr<DataType> ARROW_EXPORT int64();
+/// \brief Return a UInt8Type instance
+std::shared_ptr<DataType> ARROW_EXPORT uint8();
+/// \brief Return a UInt16Type instance
+std::shared_ptr<DataType> ARROW_EXPORT uint16();
+/// \brief Return a UInt32Type instance
+std::shared_ptr<DataType> ARROW_EXPORT uint32();
+/// \brief Return a UInt64Type instance
+std::shared_ptr<DataType> ARROW_EXPORT uint64();
+/// \brief Return a HalfFloatType instance
+std::shared_ptr<DataType> ARROW_EXPORT float16();
+/// \brief Return a FloatType instance
+std::shared_ptr<DataType> ARROW_EXPORT float32();
+/// \brief Return a DoubleType instance
+std::shared_ptr<DataType> ARROW_EXPORT float64();
+/// \brief Return a StringType instance
+std::shared_ptr<DataType> ARROW_EXPORT utf8();
+/// \brief Return a LargeStringType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
+/// \brief Return a BinaryType instance
+std::shared_ptr<DataType> ARROW_EXPORT binary();
+/// \brief Return a LargeBinaryType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_binary();
+/// \brief Return a Date32Type instance
+std::shared_ptr<DataType> ARROW_EXPORT date32();
+/// \brief Return a Date64Type instance
+std::shared_ptr<DataType> ARROW_EXPORT date64();
+
+/// \brief Create a FixedSizeBinaryType instance.
+ARROW_EXPORT
+std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
+
/// \brief Create a DecimalType instance depending on the precision
///
/// If the precision is greater than 38, a Decimal256Type is returned,
/// otherwise a Decimal128Type.
-ARROW_EXPORT
-std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
-
+ARROW_EXPORT
+std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
+
/// \brief Create a Decimal128Type instance
ARROW_EXPORT
std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale);
@@ -462,182 +462,182 @@ std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale);
ARROW_EXPORT
std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale);
-/// \brief Create a ListType instance from its child Field type
-ARROW_EXPORT
-std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_type);
-
-/// \brief Create a ListType instance from its child DataType
-ARROW_EXPORT
-std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type);
-
-/// \brief Create a LargeListType instance from its child Field type
-ARROW_EXPORT
-std::shared_ptr<DataType> large_list(const std::shared_ptr<Field>& value_type);
-
-/// \brief Create a LargeListType instance from its child DataType
-ARROW_EXPORT
-std::shared_ptr<DataType> large_list(const std::shared_ptr<DataType>& value_type);
-
-/// \brief Create a MapType instance from its key and value DataTypes
-ARROW_EXPORT
-std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
- std::shared_ptr<DataType> item_type,
- bool keys_sorted = false);
-
-/// \brief Create a MapType instance from its key DataType and value field.
-///
-/// The field override is provided to communicate nullability of the value.
-ARROW_EXPORT
-std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
- std::shared_ptr<Field> item_field,
- bool keys_sorted = false);
-
-/// \brief Create a FixedSizeListType instance from its child Field type
-ARROW_EXPORT
-std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<Field>& value_type,
- int32_t list_size);
-
-/// \brief Create a FixedSizeListType instance from its child DataType
-ARROW_EXPORT
-std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<DataType>& value_type,
- int32_t list_size);
-/// \brief Return a Duration instance (naming use _type to avoid namespace conflict with
+/// \brief Create a ListType instance from its child Field type
+ARROW_EXPORT
+std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_type);
+
+/// \brief Create a ListType instance from its child DataType
+ARROW_EXPORT
+std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type);
+
+/// \brief Create a LargeListType instance from its child Field type
+ARROW_EXPORT
+std::shared_ptr<DataType> large_list(const std::shared_ptr<Field>& value_type);
+
+/// \brief Create a LargeListType instance from its child DataType
+ARROW_EXPORT
+std::shared_ptr<DataType> large_list(const std::shared_ptr<DataType>& value_type);
+
+/// \brief Create a MapType instance from its key and value DataTypes
+ARROW_EXPORT
+std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
+ std::shared_ptr<DataType> item_type,
+ bool keys_sorted = false);
+
+/// \brief Create a MapType instance from its key DataType and value field.
+///
+/// The field override is provided to communicate nullability of the value.
+ARROW_EXPORT
+std::shared_ptr<DataType> map(std::shared_ptr<DataType> key_type,
+ std::shared_ptr<Field> item_field,
+ bool keys_sorted = false);
+
+/// \brief Create a FixedSizeListType instance from its child Field type
+ARROW_EXPORT
+std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<Field>& value_type,
+ int32_t list_size);
+
+/// \brief Create a FixedSizeListType instance from its child DataType
+ARROW_EXPORT
+std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<DataType>& value_type,
+ int32_t list_size);
+/// \brief Return a Duration instance (naming use _type to avoid namespace conflict with
/// built in time classes).
-std::shared_ptr<DataType> ARROW_EXPORT duration(TimeUnit::type unit);
-
-/// \brief Return a DayTimeIntervalType instance
-std::shared_ptr<DataType> ARROW_EXPORT day_time_interval();
-
-/// \brief Return a MonthIntervalType instance
-std::shared_ptr<DataType> ARROW_EXPORT month_interval();
-
-/// \brief Create a TimestampType instance from its unit
-ARROW_EXPORT
-std::shared_ptr<DataType> timestamp(TimeUnit::type unit);
-
-/// \brief Create a TimestampType instance from its unit and timezone
-ARROW_EXPORT
-std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone);
-
-/// \brief Create a 32-bit time type instance
-///
-/// Unit can be either SECOND or MILLI
-std::shared_ptr<DataType> ARROW_EXPORT time32(TimeUnit::type unit);
-
-/// \brief Create a 64-bit time type instance
-///
-/// Unit can be either MICRO or NANO
-std::shared_ptr<DataType> ARROW_EXPORT time64(TimeUnit::type unit);
-
-/// \brief Create a StructType instance
-std::shared_ptr<DataType> ARROW_EXPORT
-struct_(const std::vector<std::shared_ptr<Field>>& fields);
-
-/// \brief Create a SparseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT sparse_union(FieldVector child_fields,
- std::vector<int8_t> type_codes = {});
-/// \brief Create a DenseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
- std::vector<int8_t> type_codes = {});
-
-/// \brief Create a SparseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT
-sparse_union(const ArrayVector& children, std::vector<std::string> field_names = {},
- std::vector<int8_t> type_codes = {});
-/// \brief Create a DenseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT
-dense_union(const ArrayVector& children, std::vector<std::string> field_names = {},
- std::vector<int8_t> type_codes = {});
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
- const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(child_fields, type_codes);
- } else {
- return dense_union(child_fields, type_codes);
- }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
- UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(child_fields);
- } else {
- return dense_union(child_fields);
- }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
- const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes,
- UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(children, field_names, type_codes);
- } else {
- return dense_union(children, field_names, type_codes);
- }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
- const std::vector<std::string>& field_names,
- UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(children, field_names);
- } else {
- return dense_union(children, field_names);
- }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
- UnionMode::type mode = UnionMode::SPARSE) {
- if (mode == UnionMode::SPARSE) {
- return sparse_union(children);
- } else {
- return dense_union(children);
- }
-}
-/// \brief Create a DictionaryType instance
-/// \param[in] index_type the type of the dictionary indices (must be
-/// a signed integer)
-/// \param[in] dict_type the type of the values in the variable dictionary
-/// \param[in] ordered true if the order of the dictionary values has
-/// semantic meaning and should be preserved where possible
-ARROW_EXPORT
-std::shared_ptr<DataType> dictionary(const std::shared_ptr<DataType>& index_type,
- const std::shared_ptr<DataType>& dict_type,
- bool ordered = false);
-
-/// @}
-
-/// \defgroup schema-factories Factory functions for fields and schemas
-///
-/// Factory functions for fields and schemas
-/// @{
-
-/// \brief Create a Field instance
-///
-/// \param name the field name
-/// \param type the field value type
-/// \param nullable whether the values are nullable, default true
-/// \param metadata any custom key-value metadata, default null
-std::shared_ptr<Field> ARROW_EXPORT
-field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
- std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
-
+std::shared_ptr<DataType> ARROW_EXPORT duration(TimeUnit::type unit);
+
+/// \brief Return a DayTimeIntervalType instance
+std::shared_ptr<DataType> ARROW_EXPORT day_time_interval();
+
+/// \brief Return a MonthIntervalType instance
+std::shared_ptr<DataType> ARROW_EXPORT month_interval();
+
+/// \brief Create a TimestampType instance from its unit
+ARROW_EXPORT
+std::shared_ptr<DataType> timestamp(TimeUnit::type unit);
+
+/// \brief Create a TimestampType instance from its unit and timezone
+ARROW_EXPORT
+std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone);
+
+/// \brief Create a 32-bit time type instance
+///
+/// Unit can be either SECOND or MILLI
+std::shared_ptr<DataType> ARROW_EXPORT time32(TimeUnit::type unit);
+
+/// \brief Create a 64-bit time type instance
+///
+/// Unit can be either MICRO or NANO
+std::shared_ptr<DataType> ARROW_EXPORT time64(TimeUnit::type unit);
+
+/// \brief Create a StructType instance
+std::shared_ptr<DataType> ARROW_EXPORT
+struct_(const std::vector<std::shared_ptr<Field>>& fields);
+
+/// \brief Create a SparseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT sparse_union(FieldVector child_fields,
+ std::vector<int8_t> type_codes = {});
+/// \brief Create a DenseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
+ std::vector<int8_t> type_codes = {});
+
+/// \brief Create a SparseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT
+sparse_union(const ArrayVector& children, std::vector<std::string> field_names = {},
+ std::vector<int8_t> type_codes = {});
+/// \brief Create a DenseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT
+dense_union(const ArrayVector& children, std::vector<std::string> field_names = {},
+ std::vector<int8_t> type_codes = {});
+
+/// \brief Create a UnionType instance
+ARROW_DEPRECATED("Deprecated in 1.0.0")
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Field>>& child_fields,
+ const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(child_fields, type_codes);
+ } else {
+ return dense_union(child_fields, type_codes);
+ }
+}
+
+/// \brief Create a UnionType instance
+ARROW_DEPRECATED("Deprecated in 1.0.0")
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Field>>& child_fields,
+ UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(child_fields);
+ } else {
+ return dense_union(child_fields);
+ }
+}
+
+/// \brief Create a UnionType instance
+ARROW_DEPRECATED("Deprecated in 1.0.0")
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes,
+ UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(children, field_names, type_codes);
+ } else {
+ return dense_union(children, field_names, type_codes);
+ }
+}
+
+/// \brief Create a UnionType instance
+ARROW_DEPRECATED("Deprecated in 1.0.0")
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::string>& field_names,
+ UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(children, field_names);
+ } else {
+ return dense_union(children, field_names);
+ }
+}
+
+/// \brief Create a UnionType instance
+ARROW_DEPRECATED("Deprecated in 1.0.0")
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Array>>& children,
+ UnionMode::type mode = UnionMode::SPARSE) {
+ if (mode == UnionMode::SPARSE) {
+ return sparse_union(children);
+ } else {
+ return dense_union(children);
+ }
+}
+/// \brief Create a DictionaryType instance
+/// \param[in] index_type the type of the dictionary indices (must be
+/// a signed integer)
+/// \param[in] dict_type the type of the values in the variable dictionary
+/// \param[in] ordered true if the order of the dictionary values has
+/// semantic meaning and should be preserved where possible
+ARROW_EXPORT
+std::shared_ptr<DataType> dictionary(const std::shared_ptr<DataType>& index_type,
+ const std::shared_ptr<DataType>& dict_type,
+ bool ordered = false);
+
+/// @}
+
+/// \defgroup schema-factories Factory functions for fields and schemas
+///
+/// Factory functions for fields and schemas
+/// @{
+
+/// \brief Create a Field instance
+///
+/// \param name the field name
+/// \param type the field value type
+/// \param nullable whether the values are nullable, default true
+/// \param metadata any custom key-value metadata, default null
+std::shared_ptr<Field> ARROW_EXPORT
+field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
+ std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
/// \brief Create a Field instance with metadata
///
/// The field will be assumed to be nullable.
@@ -649,16 +649,16 @@ std::shared_ptr<Field> ARROW_EXPORT
field(std::string name, std::shared_ptr<DataType> type,
std::shared_ptr<const KeyValueMetadata> metadata);
-/// \brief Create a Schema instance
-///
-/// \param fields the schema's fields
-/// \param metadata any custom key-value metadata, default null
-/// \return schema shared_ptr to Schema
-ARROW_EXPORT
-std::shared_ptr<Schema> schema(
- std::vector<std::shared_ptr<Field>> fields,
- std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
-
+/// \brief Create a Schema instance
+///
+/// \param fields the schema's fields
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+ std::vector<std::shared_ptr<Field>> fields,
+ std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
/// \brief Create a Schema instance
///
/// \param fields the schema's fields
@@ -670,9 +670,9 @@ std::shared_ptr<Schema> schema(
std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
-/// @}
-
-/// Return the process-wide default memory pool.
-ARROW_EXPORT MemoryPool* default_memory_pool();
-
-} // namespace arrow
+/// @}
+
+/// Return the process-wide default memory pool.
+ARROW_EXPORT MemoryPool* default_memory_pool();
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h b/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
index e4d809967f9..87fbbaa4ef4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
@@ -1,296 +1,296 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "arrow/type.h"
-#include "arrow/util/bit_util.h"
-
-namespace arrow {
-
-//
-// Per-type id type lookup
-//
-
-template <Type::type id>
-struct TypeIdTraits {};
-
-#define TYPE_ID_TRAIT(_id, _typeclass) \
- template <> \
- struct TypeIdTraits<Type::_id> { \
- using Type = _typeclass; \
- };
-
-TYPE_ID_TRAIT(NA, NullType)
-TYPE_ID_TRAIT(BOOL, BooleanType)
-TYPE_ID_TRAIT(INT8, Int8Type)
-TYPE_ID_TRAIT(INT16, Int16Type)
-TYPE_ID_TRAIT(INT32, Int32Type)
-TYPE_ID_TRAIT(INT64, Int64Type)
-TYPE_ID_TRAIT(UINT8, UInt8Type)
-TYPE_ID_TRAIT(UINT16, UInt16Type)
-TYPE_ID_TRAIT(UINT32, UInt32Type)
-TYPE_ID_TRAIT(UINT64, UInt64Type)
-TYPE_ID_TRAIT(HALF_FLOAT, HalfFloatType)
-TYPE_ID_TRAIT(FLOAT, FloatType)
-TYPE_ID_TRAIT(DOUBLE, DoubleType)
-TYPE_ID_TRAIT(STRING, StringType)
-TYPE_ID_TRAIT(BINARY, BinaryType)
-TYPE_ID_TRAIT(LARGE_STRING, LargeStringType)
-TYPE_ID_TRAIT(LARGE_BINARY, LargeBinaryType)
-TYPE_ID_TRAIT(FIXED_SIZE_BINARY, FixedSizeBinaryType)
-TYPE_ID_TRAIT(DATE32, Date32Type)
-TYPE_ID_TRAIT(DATE64, Date64Type)
-TYPE_ID_TRAIT(TIME32, Time32Type)
-TYPE_ID_TRAIT(TIME64, Time64Type)
-TYPE_ID_TRAIT(TIMESTAMP, TimestampType)
-TYPE_ID_TRAIT(INTERVAL_DAY_TIME, DayTimeIntervalType)
-TYPE_ID_TRAIT(INTERVAL_MONTHS, MonthIntervalType)
-TYPE_ID_TRAIT(DURATION, DurationType)
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/type.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+
+//
+// Per-type id type lookup
+//
+
+template <Type::type id>
+struct TypeIdTraits {};
+
+#define TYPE_ID_TRAIT(_id, _typeclass) \
+ template <> \
+ struct TypeIdTraits<Type::_id> { \
+ using Type = _typeclass; \
+ };
+
+TYPE_ID_TRAIT(NA, NullType)
+TYPE_ID_TRAIT(BOOL, BooleanType)
+TYPE_ID_TRAIT(INT8, Int8Type)
+TYPE_ID_TRAIT(INT16, Int16Type)
+TYPE_ID_TRAIT(INT32, Int32Type)
+TYPE_ID_TRAIT(INT64, Int64Type)
+TYPE_ID_TRAIT(UINT8, UInt8Type)
+TYPE_ID_TRAIT(UINT16, UInt16Type)
+TYPE_ID_TRAIT(UINT32, UInt32Type)
+TYPE_ID_TRAIT(UINT64, UInt64Type)
+TYPE_ID_TRAIT(HALF_FLOAT, HalfFloatType)
+TYPE_ID_TRAIT(FLOAT, FloatType)
+TYPE_ID_TRAIT(DOUBLE, DoubleType)
+TYPE_ID_TRAIT(STRING, StringType)
+TYPE_ID_TRAIT(BINARY, BinaryType)
+TYPE_ID_TRAIT(LARGE_STRING, LargeStringType)
+TYPE_ID_TRAIT(LARGE_BINARY, LargeBinaryType)
+TYPE_ID_TRAIT(FIXED_SIZE_BINARY, FixedSizeBinaryType)
+TYPE_ID_TRAIT(DATE32, Date32Type)
+TYPE_ID_TRAIT(DATE64, Date64Type)
+TYPE_ID_TRAIT(TIME32, Time32Type)
+TYPE_ID_TRAIT(TIME64, Time64Type)
+TYPE_ID_TRAIT(TIMESTAMP, TimestampType)
+TYPE_ID_TRAIT(INTERVAL_DAY_TIME, DayTimeIntervalType)
+TYPE_ID_TRAIT(INTERVAL_MONTHS, MonthIntervalType)
+TYPE_ID_TRAIT(DURATION, DurationType)
TYPE_ID_TRAIT(DECIMAL128, Decimal128Type)
TYPE_ID_TRAIT(DECIMAL256, Decimal256Type)
-TYPE_ID_TRAIT(STRUCT, StructType)
-TYPE_ID_TRAIT(LIST, ListType)
-TYPE_ID_TRAIT(LARGE_LIST, LargeListType)
-TYPE_ID_TRAIT(FIXED_SIZE_LIST, FixedSizeListType)
-TYPE_ID_TRAIT(MAP, MapType)
-TYPE_ID_TRAIT(DENSE_UNION, DenseUnionType)
-TYPE_ID_TRAIT(SPARSE_UNION, SparseUnionType)
-TYPE_ID_TRAIT(DICTIONARY, DictionaryType)
-TYPE_ID_TRAIT(EXTENSION, ExtensionType)
-
-#undef TYPE_ID_TRAIT
-
-//
-// Per-type type traits
-//
-
-template <typename T>
-struct TypeTraits {};
-
-template <typename T>
-struct CTypeTraits {};
-
-template <>
-struct TypeTraits<NullType> {
- using ArrayType = NullArray;
- using BuilderType = NullBuilder;
- using ScalarType = NullScalar;
-
- static constexpr int64_t bytes_required(int64_t) { return 0; }
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return null(); }
-};
-
-template <>
-struct TypeTraits<BooleanType> {
- using ArrayType = BooleanArray;
- using BuilderType = BooleanBuilder;
- using ScalarType = BooleanScalar;
- using CType = bool;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return BitUtil::BytesForBits(elements);
- }
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return boolean(); }
-};
-
-template <>
-struct CTypeTraits<bool> : public TypeTraits<BooleanType> {
- using ArrowType = BooleanType;
-};
-
-#define PRIMITIVE_TYPE_TRAITS_DEF_(CType_, ArrowType_, ArrowArrayType, ArrowBuilderType, \
- ArrowScalarType, ArrowTensorType, SingletonFn) \
- template <> \
- struct TypeTraits<ArrowType_> { \
- using ArrayType = ArrowArrayType; \
- using BuilderType = ArrowBuilderType; \
- using ScalarType = ArrowScalarType; \
- using TensorType = ArrowTensorType; \
- using CType = ArrowType_::c_type; \
- static constexpr int64_t bytes_required(int64_t elements) { \
- return elements * static_cast<int64_t>(sizeof(CType)); \
- } \
- constexpr static bool is_parameter_free = true; \
- static inline std::shared_ptr<DataType> type_singleton() { return SingletonFn(); } \
- }; \
- \
- template <> \
- struct CTypeTraits<CType_> : public TypeTraits<ArrowType_> { \
- using ArrowType = ArrowType_; \
- };
-
-#define PRIMITIVE_TYPE_TRAITS_DEF(CType, ArrowShort, SingletonFn) \
- PRIMITIVE_TYPE_TRAITS_DEF_( \
- CType, ARROW_CONCAT(ArrowShort, Type), ARROW_CONCAT(ArrowShort, Array), \
- ARROW_CONCAT(ArrowShort, Builder), ARROW_CONCAT(ArrowShort, Scalar), \
- ARROW_CONCAT(ArrowShort, Tensor), SingletonFn)
-
-PRIMITIVE_TYPE_TRAITS_DEF(uint8_t, UInt8, uint8)
-PRIMITIVE_TYPE_TRAITS_DEF(int8_t, Int8, int8)
-PRIMITIVE_TYPE_TRAITS_DEF(uint16_t, UInt16, uint16)
-PRIMITIVE_TYPE_TRAITS_DEF(int16_t, Int16, int16)
-PRIMITIVE_TYPE_TRAITS_DEF(uint32_t, UInt32, uint32)
-PRIMITIVE_TYPE_TRAITS_DEF(int32_t, Int32, int32)
-PRIMITIVE_TYPE_TRAITS_DEF(uint64_t, UInt64, uint64)
-PRIMITIVE_TYPE_TRAITS_DEF(int64_t, Int64, int64)
-PRIMITIVE_TYPE_TRAITS_DEF(float, Float, float32)
-PRIMITIVE_TYPE_TRAITS_DEF(double, Double, float64)
-
-#undef PRIMITIVE_TYPE_TRAITS_DEF
-#undef PRIMITIVE_TYPE_TRAITS_DEF_
-
-template <>
-struct TypeTraits<Date64Type> {
- using ArrayType = Date64Array;
- using BuilderType = Date64Builder;
- using ScalarType = Date64Scalar;
- using CType = Date64Type::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int64_t));
- }
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return date64(); }
-};
-
-template <>
-struct TypeTraits<Date32Type> {
- using ArrayType = Date32Array;
- using BuilderType = Date32Builder;
- using ScalarType = Date32Scalar;
- using CType = Date32Type::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int32_t));
- }
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return date32(); }
-};
-
-template <>
-struct TypeTraits<TimestampType> {
- using ArrayType = TimestampArray;
- using BuilderType = TimestampBuilder;
- using ScalarType = TimestampScalar;
- using CType = TimestampType::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int64_t));
- }
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<DurationType> {
- using ArrayType = DurationArray;
- using BuilderType = DurationBuilder;
- using ScalarType = DurationScalar;
- using CType = DurationType::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int64_t));
- }
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<DayTimeIntervalType> {
- using ArrayType = DayTimeIntervalArray;
- using BuilderType = DayTimeIntervalBuilder;
- using ScalarType = DayTimeIntervalScalar;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(DayTimeIntervalType::DayMilliseconds));
- }
- constexpr static bool is_parameter_free = true;
- static std::shared_ptr<DataType> type_singleton() { return day_time_interval(); }
-};
-
-template <>
-struct TypeTraits<MonthIntervalType> {
- using ArrayType = MonthIntervalArray;
- using BuilderType = MonthIntervalBuilder;
- using ScalarType = MonthIntervalScalar;
+TYPE_ID_TRAIT(STRUCT, StructType)
+TYPE_ID_TRAIT(LIST, ListType)
+TYPE_ID_TRAIT(LARGE_LIST, LargeListType)
+TYPE_ID_TRAIT(FIXED_SIZE_LIST, FixedSizeListType)
+TYPE_ID_TRAIT(MAP, MapType)
+TYPE_ID_TRAIT(DENSE_UNION, DenseUnionType)
+TYPE_ID_TRAIT(SPARSE_UNION, SparseUnionType)
+TYPE_ID_TRAIT(DICTIONARY, DictionaryType)
+TYPE_ID_TRAIT(EXTENSION, ExtensionType)
+
+#undef TYPE_ID_TRAIT
+
+//
+// Per-type type traits
+//
+
+template <typename T>
+struct TypeTraits {};
+
+template <typename T>
+struct CTypeTraits {};
+
+template <>
+struct TypeTraits<NullType> {
+ using ArrayType = NullArray;
+ using BuilderType = NullBuilder;
+ using ScalarType = NullScalar;
+
+ static constexpr int64_t bytes_required(int64_t) { return 0; }
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return null(); }
+};
+
+template <>
+struct TypeTraits<BooleanType> {
+ using ArrayType = BooleanArray;
+ using BuilderType = BooleanBuilder;
+ using ScalarType = BooleanScalar;
+ using CType = bool;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return BitUtil::BytesForBits(elements);
+ }
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return boolean(); }
+};
+
+template <>
+struct CTypeTraits<bool> : public TypeTraits<BooleanType> {
+ using ArrowType = BooleanType;
+};
+
+#define PRIMITIVE_TYPE_TRAITS_DEF_(CType_, ArrowType_, ArrowArrayType, ArrowBuilderType, \
+ ArrowScalarType, ArrowTensorType, SingletonFn) \
+ template <> \
+ struct TypeTraits<ArrowType_> { \
+ using ArrayType = ArrowArrayType; \
+ using BuilderType = ArrowBuilderType; \
+ using ScalarType = ArrowScalarType; \
+ using TensorType = ArrowTensorType; \
+ using CType = ArrowType_::c_type; \
+ static constexpr int64_t bytes_required(int64_t elements) { \
+ return elements * static_cast<int64_t>(sizeof(CType)); \
+ } \
+ constexpr static bool is_parameter_free = true; \
+ static inline std::shared_ptr<DataType> type_singleton() { return SingletonFn(); } \
+ }; \
+ \
+ template <> \
+ struct CTypeTraits<CType_> : public TypeTraits<ArrowType_> { \
+ using ArrowType = ArrowType_; \
+ };
+
+#define PRIMITIVE_TYPE_TRAITS_DEF(CType, ArrowShort, SingletonFn) \
+ PRIMITIVE_TYPE_TRAITS_DEF_( \
+ CType, ARROW_CONCAT(ArrowShort, Type), ARROW_CONCAT(ArrowShort, Array), \
+ ARROW_CONCAT(ArrowShort, Builder), ARROW_CONCAT(ArrowShort, Scalar), \
+ ARROW_CONCAT(ArrowShort, Tensor), SingletonFn)
+
+PRIMITIVE_TYPE_TRAITS_DEF(uint8_t, UInt8, uint8)
+PRIMITIVE_TYPE_TRAITS_DEF(int8_t, Int8, int8)
+PRIMITIVE_TYPE_TRAITS_DEF(uint16_t, UInt16, uint16)
+PRIMITIVE_TYPE_TRAITS_DEF(int16_t, Int16, int16)
+PRIMITIVE_TYPE_TRAITS_DEF(uint32_t, UInt32, uint32)
+PRIMITIVE_TYPE_TRAITS_DEF(int32_t, Int32, int32)
+PRIMITIVE_TYPE_TRAITS_DEF(uint64_t, UInt64, uint64)
+PRIMITIVE_TYPE_TRAITS_DEF(int64_t, Int64, int64)
+PRIMITIVE_TYPE_TRAITS_DEF(float, Float, float32)
+PRIMITIVE_TYPE_TRAITS_DEF(double, Double, float64)
+
+#undef PRIMITIVE_TYPE_TRAITS_DEF
+#undef PRIMITIVE_TYPE_TRAITS_DEF_
+
+template <>
+struct TypeTraits<Date64Type> {
+ using ArrayType = Date64Array;
+ using BuilderType = Date64Builder;
+ using ScalarType = Date64Scalar;
+ using CType = Date64Type::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int64_t));
+ }
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return date64(); }
+};
+
+template <>
+struct TypeTraits<Date32Type> {
+ using ArrayType = Date32Array;
+ using BuilderType = Date32Builder;
+ using ScalarType = Date32Scalar;
+ using CType = Date32Type::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int32_t));
+ }
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return date32(); }
+};
+
+template <>
+struct TypeTraits<TimestampType> {
+ using ArrayType = TimestampArray;
+ using BuilderType = TimestampBuilder;
+ using ScalarType = TimestampScalar;
+ using CType = TimestampType::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int64_t));
+ }
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<DurationType> {
+ using ArrayType = DurationArray;
+ using BuilderType = DurationBuilder;
+ using ScalarType = DurationScalar;
+ using CType = DurationType::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int64_t));
+ }
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<DayTimeIntervalType> {
+ using ArrayType = DayTimeIntervalArray;
+ using BuilderType = DayTimeIntervalBuilder;
+ using ScalarType = DayTimeIntervalScalar;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(DayTimeIntervalType::DayMilliseconds));
+ }
+ constexpr static bool is_parameter_free = true;
+ static std::shared_ptr<DataType> type_singleton() { return day_time_interval(); }
+};
+
+template <>
+struct TypeTraits<MonthIntervalType> {
+ using ArrayType = MonthIntervalArray;
+ using BuilderType = MonthIntervalBuilder;
+ using ScalarType = MonthIntervalScalar;
using CType = MonthIntervalType::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int32_t));
- }
- constexpr static bool is_parameter_free = true;
- static std::shared_ptr<DataType> type_singleton() { return month_interval(); }
-};
-
-template <>
-struct TypeTraits<Time32Type> {
- using ArrayType = Time32Array;
- using BuilderType = Time32Builder;
- using ScalarType = Time32Scalar;
- using CType = Time32Type::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int32_t));
- }
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<Time64Type> {
- using ArrayType = Time64Array;
- using BuilderType = Time64Builder;
- using ScalarType = Time64Scalar;
- using CType = Time64Type::c_type;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(int64_t));
- }
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<HalfFloatType> {
- using ArrayType = HalfFloatArray;
- using BuilderType = HalfFloatBuilder;
- using ScalarType = HalfFloatScalar;
- using TensorType = HalfFloatTensor;
-
- static constexpr int64_t bytes_required(int64_t elements) {
- return elements * static_cast<int64_t>(sizeof(uint16_t));
- }
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return float16(); }
-};
-
-template <>
-struct TypeTraits<Decimal128Type> {
- using ArrayType = Decimal128Array;
- using BuilderType = Decimal128Builder;
- using ScalarType = Decimal128Scalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int32_t));
+ }
+ constexpr static bool is_parameter_free = true;
+ static std::shared_ptr<DataType> type_singleton() { return month_interval(); }
+};
+
+template <>
+struct TypeTraits<Time32Type> {
+ using ArrayType = Time32Array;
+ using BuilderType = Time32Builder;
+ using ScalarType = Time32Scalar;
+ using CType = Time32Type::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int32_t));
+ }
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<Time64Type> {
+ using ArrayType = Time64Array;
+ using BuilderType = Time64Builder;
+ using ScalarType = Time64Scalar;
+ using CType = Time64Type::c_type;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(int64_t));
+ }
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<HalfFloatType> {
+ using ArrayType = HalfFloatArray;
+ using BuilderType = HalfFloatBuilder;
+ using ScalarType = HalfFloatScalar;
+ using TensorType = HalfFloatTensor;
+
+ static constexpr int64_t bytes_required(int64_t elements) {
+ return elements * static_cast<int64_t>(sizeof(uint16_t));
+ }
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return float16(); }
+};
+
+template <>
+struct TypeTraits<Decimal128Type> {
+ using ArrayType = Decimal128Array;
+ using BuilderType = Decimal128Builder;
+ using ScalarType = Decimal128Scalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
struct TypeTraits<Decimal256Type> {
using ArrayType = Decimal256Array;
using BuilderType = Decimal256Builder;
@@ -299,294 +299,294 @@ struct TypeTraits<Decimal256Type> {
};
template <>
-struct TypeTraits<BinaryType> {
- using ArrayType = BinaryArray;
- using BuilderType = BinaryBuilder;
- using ScalarType = BinaryScalar;
- using OffsetType = Int32Type;
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
-};
-
-template <>
-struct TypeTraits<LargeBinaryType> {
- using ArrayType = LargeBinaryArray;
- using BuilderType = LargeBinaryBuilder;
- using ScalarType = LargeBinaryScalar;
- using OffsetType = Int64Type;
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return large_binary(); }
-};
-
-template <>
-struct TypeTraits<FixedSizeBinaryType> {
- using ArrayType = FixedSizeBinaryArray;
- using BuilderType = FixedSizeBinaryBuilder;
- using ScalarType = FixedSizeBinaryScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<StringType> {
- using ArrayType = StringArray;
- using BuilderType = StringBuilder;
- using ScalarType = StringScalar;
- using OffsetType = Int32Type;
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
-};
-
-template <>
-struct TypeTraits<LargeStringType> {
- using ArrayType = LargeStringArray;
- using BuilderType = LargeStringBuilder;
- using ScalarType = LargeStringScalar;
- using OffsetType = Int64Type;
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return large_utf8(); }
-};
-
-template <>
-struct CTypeTraits<std::string> : public TypeTraits<StringType> {
- using ArrowType = StringType;
-};
-
-template <>
-struct CTypeTraits<const char*> : public CTypeTraits<std::string> {};
-
-template <size_t N>
-struct CTypeTraits<const char (&)[N]> : public CTypeTraits<std::string> {};
-
-template <>
-struct CTypeTraits<DayTimeIntervalType::DayMilliseconds>
- : public TypeTraits<DayTimeIntervalType> {
- using ArrowType = DayTimeIntervalType;
-};
-
-template <>
-struct TypeTraits<ListType> {
- using ArrayType = ListArray;
- using BuilderType = ListBuilder;
- using ScalarType = ListScalar;
- using OffsetType = Int32Type;
- using OffsetArrayType = Int32Array;
- using OffsetBuilderType = Int32Builder;
- using OffsetScalarType = Int32Scalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<LargeListType> {
- using ArrayType = LargeListArray;
- using BuilderType = LargeListBuilder;
- using ScalarType = LargeListScalar;
- using OffsetType = Int64Type;
- using OffsetArrayType = Int64Array;
- using OffsetBuilderType = Int64Builder;
- using OffsetScalarType = Int64Scalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<MapType> {
- using ArrayType = MapArray;
- using BuilderType = MapBuilder;
- using ScalarType = MapScalar;
- using OffsetType = Int32Type;
- using OffsetArrayType = Int32Array;
- using OffsetBuilderType = Int32Builder;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<FixedSizeListType> {
- using ArrayType = FixedSizeListArray;
- using BuilderType = FixedSizeListBuilder;
- using ScalarType = FixedSizeListScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <typename CType>
-struct CTypeTraits<std::vector<CType>> : public TypeTraits<ListType> {
- using ArrowType = ListType;
-
- static inline std::shared_ptr<DataType> type_singleton() {
- return list(CTypeTraits<CType>::type_singleton());
- }
-};
-
-template <>
-struct TypeTraits<StructType> {
- using ArrayType = StructArray;
- using BuilderType = StructBuilder;
- using ScalarType = StructScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<SparseUnionType> {
- using ArrayType = SparseUnionArray;
- using BuilderType = SparseUnionBuilder;
- using ScalarType = SparseUnionScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<DenseUnionType> {
- using ArrayType = DenseUnionArray;
- using BuilderType = DenseUnionBuilder;
- using ScalarType = DenseUnionScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<DictionaryType> {
- using ArrayType = DictionaryArray;
- using ScalarType = DictionaryScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
-struct TypeTraits<ExtensionType> {
- using ArrayType = ExtensionArray;
- using ScalarType = ExtensionScalar;
- constexpr static bool is_parameter_free = false;
-};
-
-namespace internal {
-
-template <typename... Ts>
-struct make_void {
- using type = void;
-};
-
-template <typename... Ts>
-using void_t = typename make_void<Ts...>::type;
-
-} // namespace internal
-
-//
-// Useful type predicates
-//
-
-// only in C++14
-template <bool B, typename T = void>
-using enable_if_t = typename std::enable_if<B, T>::type;
-
-template <typename T>
-using is_null_type = std::is_same<NullType, T>;
-
-template <typename T, typename R = void>
-using enable_if_null = enable_if_t<is_null_type<T>::value, R>;
-
-template <typename T>
-using is_boolean_type = std::is_same<BooleanType, T>;
-
-template <typename T, typename R = void>
-using enable_if_boolean = enable_if_t<is_boolean_type<T>::value, R>;
-
-template <typename T>
-using is_number_type = std::is_base_of<NumberType, T>;
-
-template <typename T, typename R = void>
-using enable_if_number = enable_if_t<is_number_type<T>::value, R>;
-
-template <typename T>
-using is_integer_type = std::is_base_of<IntegerType, T>;
-
-template <typename T, typename R = void>
-using enable_if_integer = enable_if_t<is_integer_type<T>::value, R>;
-
-template <typename T>
-using is_signed_integer_type =
- std::integral_constant<bool, is_integer_type<T>::value &&
- std::is_signed<typename T::c_type>::value>;
-
-template <typename T, typename R = void>
-using enable_if_signed_integer = enable_if_t<is_signed_integer_type<T>::value, R>;
-
-template <typename T>
-using is_unsigned_integer_type =
- std::integral_constant<bool, is_integer_type<T>::value &&
- std::is_unsigned<typename T::c_type>::value>;
-
-template <typename T, typename R = void>
-using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer_type<T>::value, R>;
-
-// Note this will also include HalfFloatType which is represented by a
-// non-floating point primitive (uint16_t).
-template <typename T>
-using is_floating_type = std::is_base_of<FloatingPointType, T>;
-
-template <typename T, typename R = void>
-using enable_if_floating_point = enable_if_t<is_floating_type<T>::value, R>;
-
-// Half floats are special in that they behave physically like an unsigned
-// integer.
-template <typename T>
-using is_half_float_type = std::is_same<HalfFloatType, T>;
-
-template <typename T, typename R = void>
-using enable_if_half_float = enable_if_t<is_half_float_type<T>::value, R>;
-
-// Binary Types
-
-// Base binary refers to Binary/LargeBinary/String/LargeString
-template <typename T>
-using is_base_binary_type = std::is_base_of<BaseBinaryType, T>;
-
-template <typename T, typename R = void>
-using enable_if_base_binary = enable_if_t<is_base_binary_type<T>::value, R>;
-
-// Any binary excludes string from Base binary
-template <typename T>
-using is_binary_type =
- std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
- std::is_same<LargeBinaryType, T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_binary = enable_if_t<is_binary_type<T>::value, R>;
-
-template <typename T>
-using is_string_type =
- std::integral_constant<bool, std::is_same<StringType, T>::value ||
- std::is_same<LargeStringType, T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_string = enable_if_t<is_string_type<T>::value, R>;
-
-template <typename T>
-using is_string_like_type =
- std::integral_constant<bool, is_base_binary_type<T>::value && T::is_utf8>;
-
-template <typename T, typename R = void>
-using enable_if_string_like = enable_if_t<is_string_like_type<T>::value, R>;
-
-template <typename T, typename U, typename R = void>
-using enable_if_same = enable_if_t<std::is_same<T, U>::value, R>;
-
-// Note that this also includes DecimalType
-template <typename T>
-using is_fixed_size_binary_type = std::is_base_of<FixedSizeBinaryType, T>;
-
-template <typename T, typename R = void>
-using enable_if_fixed_size_binary = enable_if_t<is_fixed_size_binary_type<T>::value, R>;
-
-template <typename T>
-using is_binary_like_type =
- std::integral_constant<bool, (is_base_binary_type<T>::value &&
- !is_string_like_type<T>::value) ||
- is_fixed_size_binary_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_binary_like = enable_if_t<is_binary_like_type<T>::value, R>;
-
-template <typename T>
-using is_decimal_type = std::is_base_of<DecimalType, T>;
-
-template <typename T, typename R = void>
-using enable_if_decimal = enable_if_t<is_decimal_type<T>::value, R>;
-
+struct TypeTraits<BinaryType> {
+ using ArrayType = BinaryArray;
+ using BuilderType = BinaryBuilder;
+ using ScalarType = BinaryScalar;
+ using OffsetType = Int32Type;
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
+};
+
+template <>
+struct TypeTraits<LargeBinaryType> {
+ using ArrayType = LargeBinaryArray;
+ using BuilderType = LargeBinaryBuilder;
+ using ScalarType = LargeBinaryScalar;
+ using OffsetType = Int64Type;
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return large_binary(); }
+};
+
+template <>
+struct TypeTraits<FixedSizeBinaryType> {
+ using ArrayType = FixedSizeBinaryArray;
+ using BuilderType = FixedSizeBinaryBuilder;
+ using ScalarType = FixedSizeBinaryScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<StringType> {
+ using ArrayType = StringArray;
+ using BuilderType = StringBuilder;
+ using ScalarType = StringScalar;
+ using OffsetType = Int32Type;
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
+};
+
+template <>
+struct TypeTraits<LargeStringType> {
+ using ArrayType = LargeStringArray;
+ using BuilderType = LargeStringBuilder;
+ using ScalarType = LargeStringScalar;
+ using OffsetType = Int64Type;
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return large_utf8(); }
+};
+
+template <>
+struct CTypeTraits<std::string> : public TypeTraits<StringType> {
+ using ArrowType = StringType;
+};
+
+template <>
+struct CTypeTraits<const char*> : public CTypeTraits<std::string> {};
+
+template <size_t N>
+struct CTypeTraits<const char (&)[N]> : public CTypeTraits<std::string> {};
+
+template <>
+struct CTypeTraits<DayTimeIntervalType::DayMilliseconds>
+ : public TypeTraits<DayTimeIntervalType> {
+ using ArrowType = DayTimeIntervalType;
+};
+
+template <>
+struct TypeTraits<ListType> {
+ using ArrayType = ListArray;
+ using BuilderType = ListBuilder;
+ using ScalarType = ListScalar;
+ using OffsetType = Int32Type;
+ using OffsetArrayType = Int32Array;
+ using OffsetBuilderType = Int32Builder;
+ using OffsetScalarType = Int32Scalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<LargeListType> {
+ using ArrayType = LargeListArray;
+ using BuilderType = LargeListBuilder;
+ using ScalarType = LargeListScalar;
+ using OffsetType = Int64Type;
+ using OffsetArrayType = Int64Array;
+ using OffsetBuilderType = Int64Builder;
+ using OffsetScalarType = Int64Scalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<MapType> {
+ using ArrayType = MapArray;
+ using BuilderType = MapBuilder;
+ using ScalarType = MapScalar;
+ using OffsetType = Int32Type;
+ using OffsetArrayType = Int32Array;
+ using OffsetBuilderType = Int32Builder;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<FixedSizeListType> {
+ using ArrayType = FixedSizeListArray;
+ using BuilderType = FixedSizeListBuilder;
+ using ScalarType = FixedSizeListScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <typename CType>
+struct CTypeTraits<std::vector<CType>> : public TypeTraits<ListType> {
+ using ArrowType = ListType;
+
+ static inline std::shared_ptr<DataType> type_singleton() {
+ return list(CTypeTraits<CType>::type_singleton());
+ }
+};
+
+template <>
+struct TypeTraits<StructType> {
+ using ArrayType = StructArray;
+ using BuilderType = StructBuilder;
+ using ScalarType = StructScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<SparseUnionType> {
+ using ArrayType = SparseUnionArray;
+ using BuilderType = SparseUnionBuilder;
+ using ScalarType = SparseUnionScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<DenseUnionType> {
+ using ArrayType = DenseUnionArray;
+ using BuilderType = DenseUnionBuilder;
+ using ScalarType = DenseUnionScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<DictionaryType> {
+ using ArrayType = DictionaryArray;
+ using ScalarType = DictionaryScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<ExtensionType> {
+ using ArrayType = ExtensionArray;
+ using ScalarType = ExtensionScalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+namespace internal {
+
+template <typename... Ts>
+struct make_void {
+ using type = void;
+};
+
+template <typename... Ts>
+using void_t = typename make_void<Ts...>::type;
+
+} // namespace internal
+
+//
+// Useful type predicates
+//
+
+// only in C++14
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+template <typename T>
+using is_null_type = std::is_same<NullType, T>;
+
+template <typename T, typename R = void>
+using enable_if_null = enable_if_t<is_null_type<T>::value, R>;
+
+template <typename T>
+using is_boolean_type = std::is_same<BooleanType, T>;
+
+template <typename T, typename R = void>
+using enable_if_boolean = enable_if_t<is_boolean_type<T>::value, R>;
+
+template <typename T>
+using is_number_type = std::is_base_of<NumberType, T>;
+
+template <typename T, typename R = void>
+using enable_if_number = enable_if_t<is_number_type<T>::value, R>;
+
+template <typename T>
+using is_integer_type = std::is_base_of<IntegerType, T>;
+
+template <typename T, typename R = void>
+using enable_if_integer = enable_if_t<is_integer_type<T>::value, R>;
+
+template <typename T>
+using is_signed_integer_type =
+ std::integral_constant<bool, is_integer_type<T>::value &&
+ std::is_signed<typename T::c_type>::value>;
+
+template <typename T, typename R = void>
+using enable_if_signed_integer = enable_if_t<is_signed_integer_type<T>::value, R>;
+
+template <typename T>
+using is_unsigned_integer_type =
+ std::integral_constant<bool, is_integer_type<T>::value &&
+ std::is_unsigned<typename T::c_type>::value>;
+
+template <typename T, typename R = void>
+using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer_type<T>::value, R>;
+
+// Note this will also include HalfFloatType which is represented by a
+// non-floating point primitive (uint16_t).
+template <typename T>
+using is_floating_type = std::is_base_of<FloatingPointType, T>;
+
+template <typename T, typename R = void>
+using enable_if_floating_point = enable_if_t<is_floating_type<T>::value, R>;
+
+// Half floats are special in that they behave physically like an unsigned
+// integer.
+template <typename T>
+using is_half_float_type = std::is_same<HalfFloatType, T>;
+
+template <typename T, typename R = void>
+using enable_if_half_float = enable_if_t<is_half_float_type<T>::value, R>;
+
+// Binary Types
+
+// Base binary refers to Binary/LargeBinary/String/LargeString
+template <typename T>
+using is_base_binary_type = std::is_base_of<BaseBinaryType, T>;
+
+template <typename T, typename R = void>
+using enable_if_base_binary = enable_if_t<is_base_binary_type<T>::value, R>;
+
+// Any binary excludes string from Base binary
+template <typename T>
+using is_binary_type =
+ std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
+ std::is_same<LargeBinaryType, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_binary = enable_if_t<is_binary_type<T>::value, R>;
+
+template <typename T>
+using is_string_type =
+ std::integral_constant<bool, std::is_same<StringType, T>::value ||
+ std::is_same<LargeStringType, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_string = enable_if_t<is_string_type<T>::value, R>;
+
+template <typename T>
+using is_string_like_type =
+ std::integral_constant<bool, is_base_binary_type<T>::value && T::is_utf8>;
+
+template <typename T, typename R = void>
+using enable_if_string_like = enable_if_t<is_string_like_type<T>::value, R>;
+
+template <typename T, typename U, typename R = void>
+using enable_if_same = enable_if_t<std::is_same<T, U>::value, R>;
+
+// Note that this also includes DecimalType
+template <typename T>
+using is_fixed_size_binary_type = std::is_base_of<FixedSizeBinaryType, T>;
+
+template <typename T, typename R = void>
+using enable_if_fixed_size_binary = enable_if_t<is_fixed_size_binary_type<T>::value, R>;
+
+template <typename T>
+using is_binary_like_type =
+ std::integral_constant<bool, (is_base_binary_type<T>::value &&
+ !is_string_like_type<T>::value) ||
+ is_fixed_size_binary_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_binary_like = enable_if_t<is_binary_like_type<T>::value, R>;
+
+template <typename T>
+using is_decimal_type = std::is_base_of<DecimalType, T>;
+
+template <typename T, typename R = void>
+using enable_if_decimal = enable_if_t<is_decimal_type<T>::value, R>;
+
template <typename T>
using is_decimal128_type = std::is_base_of<Decimal128Type, T>;
@@ -599,253 +599,253 @@ using is_decimal256_type = std::is_base_of<Decimal256Type, T>;
template <typename T, typename R = void>
using enable_if_decimal256 = enable_if_t<is_decimal256_type<T>::value, R>;
-// Nested Types
-
-template <typename T>
-using is_nested_type = std::is_base_of<NestedType, T>;
-
-template <typename T, typename R = void>
-using enable_if_nested = enable_if_t<is_nested_type<T>::value, R>;
-
-template <typename T, typename R = void>
-using enable_if_not_nested = enable_if_t<!is_nested_type<T>::value, R>;
-
-template <typename T>
-using is_var_length_list_type =
- std::integral_constant<bool, std::is_base_of<LargeListType, T>::value ||
- std::is_base_of<ListType, T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_var_size_list = enable_if_t<is_var_length_list_type<T>::value, R>;
-
-// DEPRECATED use is_var_length_list_type.
-template <typename T>
-using is_base_list_type = is_var_length_list_type<T>;
-
-// DEPRECATED use enable_if_var_size_list
-template <typename T, typename R = void>
-using enable_if_base_list = enable_if_var_size_list<T, R>;
-
-template <typename T>
-using is_fixed_size_list_type = std::is_same<FixedSizeListType, T>;
-
-template <typename T, typename R = void>
-using enable_if_fixed_size_list = enable_if_t<is_fixed_size_list_type<T>::value, R>;
-
-template <typename T>
-using is_list_type =
- std::integral_constant<bool, std::is_same<T, ListType>::value ||
- std::is_same<T, LargeListType>::value ||
+// Nested Types
+
+template <typename T>
+using is_nested_type = std::is_base_of<NestedType, T>;
+
+template <typename T, typename R = void>
+using enable_if_nested = enable_if_t<is_nested_type<T>::value, R>;
+
+template <typename T, typename R = void>
+using enable_if_not_nested = enable_if_t<!is_nested_type<T>::value, R>;
+
+template <typename T>
+using is_var_length_list_type =
+ std::integral_constant<bool, std::is_base_of<LargeListType, T>::value ||
+ std::is_base_of<ListType, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_var_size_list = enable_if_t<is_var_length_list_type<T>::value, R>;
+
+// DEPRECATED use is_var_length_list_type.
+template <typename T>
+using is_base_list_type = is_var_length_list_type<T>;
+
+// DEPRECATED use enable_if_var_size_list
+template <typename T, typename R = void>
+using enable_if_base_list = enable_if_var_size_list<T, R>;
+
+template <typename T>
+using is_fixed_size_list_type = std::is_same<FixedSizeListType, T>;
+
+template <typename T, typename R = void>
+using enable_if_fixed_size_list = enable_if_t<is_fixed_size_list_type<T>::value, R>;
+
+template <typename T>
+using is_list_type =
+ std::integral_constant<bool, std::is_same<T, ListType>::value ||
+ std::is_same<T, LargeListType>::value ||
std::is_same<T, FixedSizeListType>::value>;
-
-template <typename T, typename R = void>
-using enable_if_list_type = enable_if_t<is_list_type<T>::value, R>;
-
-template <typename T>
-using is_list_like_type =
- std::integral_constant<bool, is_base_list_type<T>::value ||
- is_fixed_size_list_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_list_like = enable_if_t<is_list_like_type<T>::value, R>;
-
-template <typename T>
-using is_struct_type = std::is_base_of<StructType, T>;
-
-template <typename T, typename R = void>
-using enable_if_struct = enable_if_t<is_struct_type<T>::value, R>;
-
-template <typename T>
-using is_union_type = std::is_base_of<UnionType, T>;
-
-template <typename T, typename R = void>
-using enable_if_union = enable_if_t<is_union_type<T>::value, R>;
-
-// TemporalTypes
-
-template <typename T>
-using is_temporal_type = std::is_base_of<TemporalType, T>;
-
-template <typename T, typename R = void>
-using enable_if_temporal = enable_if_t<is_temporal_type<T>::value, R>;
-
-template <typename T>
-using is_date_type = std::is_base_of<DateType, T>;
-
-template <typename T, typename R = void>
-using enable_if_date = enable_if_t<is_date_type<T>::value, R>;
-
-template <typename T>
-using is_time_type = std::is_base_of<TimeType, T>;
-
-template <typename T, typename R = void>
-using enable_if_time = enable_if_t<is_time_type<T>::value, R>;
-
-template <typename T>
-using is_timestamp_type = std::is_base_of<TimestampType, T>;
-
-template <typename T, typename R = void>
-using enable_if_timestamp = enable_if_t<is_timestamp_type<T>::value, R>;
-
-template <typename T>
-using is_duration_type = std::is_base_of<DurationType, T>;
-
-template <typename T, typename R = void>
-using enable_if_duration = enable_if_t<is_duration_type<T>::value, R>;
-
-template <typename T>
-using is_interval_type = std::is_base_of<IntervalType, T>;
-
-template <typename T, typename R = void>
-using enable_if_interval = enable_if_t<is_interval_type<T>::value, R>;
-
-template <typename T>
-using is_dictionary_type = std::is_base_of<DictionaryType, T>;
-
-template <typename T, typename R = void>
-using enable_if_dictionary = enable_if_t<is_dictionary_type<T>::value, R>;
-
-template <typename T>
-using is_extension_type = std::is_base_of<ExtensionType, T>;
-
-template <typename T, typename R = void>
-using enable_if_extension = enable_if_t<is_extension_type<T>::value, R>;
-
-// Attribute differentiation
-
-template <typename T>
-using is_primitive_ctype = std::is_base_of<PrimitiveCType, T>;
-
-template <typename T, typename R = void>
-using enable_if_primitive_ctype = enable_if_t<is_primitive_ctype<T>::value, R>;
-
-template <typename T>
-using has_c_type = std::integral_constant<bool, is_primitive_ctype<T>::value ||
- is_temporal_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_has_c_type = enable_if_t<has_c_type<T>::value, R>;
-
-template <typename T>
-using has_string_view =
- std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
- std::is_same<LargeBinaryType, T>::value ||
- std::is_same<StringType, T>::value ||
- std::is_same<LargeStringType, T>::value ||
- std::is_same<FixedSizeBinaryType, T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_has_string_view = enable_if_t<has_string_view<T>::value, R>;
-
-template <typename T>
-using is_8bit_int = std::integral_constant<bool, std::is_same<UInt8Type, T>::value ||
- std::is_same<Int8Type, T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_8bit_int = enable_if_t<is_8bit_int<T>::value, R>;
-
-template <typename T>
-using is_parameter_free_type =
- std::integral_constant<bool, TypeTraits<T>::is_parameter_free>;
-
-template <typename T, typename R = void>
-using enable_if_parameter_free = enable_if_t<is_parameter_free_type<T>::value, R>;
-
-// Physical representation quirks
-
-template <typename T>
-using is_physical_signed_integer_type =
- std::integral_constant<bool,
- is_signed_integer_type<T>::value ||
- (is_temporal_type<T>::value && has_c_type<T>::value)>;
-
-template <typename T, typename R = void>
-using enable_if_physical_signed_integer =
- enable_if_t<is_physical_signed_integer_type<T>::value, R>;
-
-template <typename T>
-using is_physical_unsigned_integer_type =
- std::integral_constant<bool, is_unsigned_integer_type<T>::value ||
- is_half_float_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_physical_unsigned_integer =
- enable_if_t<is_physical_unsigned_integer_type<T>::value, R>;
-
-template <typename T>
-using is_physical_integer_type =
- std::integral_constant<bool, is_physical_unsigned_integer_type<T>::value ||
- is_physical_signed_integer_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_physical_integer = enable_if_t<is_physical_integer_type<T>::value, R>;
-
-// Like is_floating_type but excluding half-floats which don't have a
-// float-like c type.
-template <typename T>
-using is_physical_floating_type =
- std::integral_constant<bool,
- is_floating_type<T>::value && !is_half_float_type<T>::value>;
-
-template <typename T, typename R = void>
-using enable_if_physical_floating_point =
- enable_if_t<is_physical_floating_type<T>::value, R>;
-
-static inline bool is_integer(Type::type type_id) {
- switch (type_id) {
- case Type::UINT8:
- case Type::INT8:
- case Type::UINT16:
- case Type::INT16:
- case Type::UINT32:
- case Type::INT32:
- case Type::UINT64:
- case Type::INT64:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_signed_integer(Type::type type_id) {
- switch (type_id) {
- case Type::INT8:
- case Type::INT16:
- case Type::INT32:
- case Type::INT64:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_unsigned_integer(Type::type type_id) {
- switch (type_id) {
- case Type::UINT8:
- case Type::UINT16:
- case Type::UINT32:
- case Type::UINT64:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_floating(Type::type type_id) {
- switch (type_id) {
- case Type::HALF_FLOAT:
- case Type::FLOAT:
- case Type::DOUBLE:
- return true;
- default:
- break;
- }
- return false;
-}
-
+
+template <typename T, typename R = void>
+using enable_if_list_type = enable_if_t<is_list_type<T>::value, R>;
+
+template <typename T>
+using is_list_like_type =
+ std::integral_constant<bool, is_base_list_type<T>::value ||
+ is_fixed_size_list_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_list_like = enable_if_t<is_list_like_type<T>::value, R>;
+
+template <typename T>
+using is_struct_type = std::is_base_of<StructType, T>;
+
+template <typename T, typename R = void>
+using enable_if_struct = enable_if_t<is_struct_type<T>::value, R>;
+
+template <typename T>
+using is_union_type = std::is_base_of<UnionType, T>;
+
+template <typename T, typename R = void>
+using enable_if_union = enable_if_t<is_union_type<T>::value, R>;
+
+// TemporalTypes
+
+template <typename T>
+using is_temporal_type = std::is_base_of<TemporalType, T>;
+
+template <typename T, typename R = void>
+using enable_if_temporal = enable_if_t<is_temporal_type<T>::value, R>;
+
+template <typename T>
+using is_date_type = std::is_base_of<DateType, T>;
+
+template <typename T, typename R = void>
+using enable_if_date = enable_if_t<is_date_type<T>::value, R>;
+
+template <typename T>
+using is_time_type = std::is_base_of<TimeType, T>;
+
+template <typename T, typename R = void>
+using enable_if_time = enable_if_t<is_time_type<T>::value, R>;
+
+template <typename T>
+using is_timestamp_type = std::is_base_of<TimestampType, T>;
+
+template <typename T, typename R = void>
+using enable_if_timestamp = enable_if_t<is_timestamp_type<T>::value, R>;
+
+template <typename T>
+using is_duration_type = std::is_base_of<DurationType, T>;
+
+template <typename T, typename R = void>
+using enable_if_duration = enable_if_t<is_duration_type<T>::value, R>;
+
+template <typename T>
+using is_interval_type = std::is_base_of<IntervalType, T>;
+
+template <typename T, typename R = void>
+using enable_if_interval = enable_if_t<is_interval_type<T>::value, R>;
+
+template <typename T>
+using is_dictionary_type = std::is_base_of<DictionaryType, T>;
+
+template <typename T, typename R = void>
+using enable_if_dictionary = enable_if_t<is_dictionary_type<T>::value, R>;
+
+template <typename T>
+using is_extension_type = std::is_base_of<ExtensionType, T>;
+
+template <typename T, typename R = void>
+using enable_if_extension = enable_if_t<is_extension_type<T>::value, R>;
+
+// Attribute differentiation
+
+template <typename T>
+using is_primitive_ctype = std::is_base_of<PrimitiveCType, T>;
+
+template <typename T, typename R = void>
+using enable_if_primitive_ctype = enable_if_t<is_primitive_ctype<T>::value, R>;
+
+template <typename T>
+using has_c_type = std::integral_constant<bool, is_primitive_ctype<T>::value ||
+ is_temporal_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_has_c_type = enable_if_t<has_c_type<T>::value, R>;
+
+template <typename T>
+using has_string_view =
+ std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
+ std::is_same<LargeBinaryType, T>::value ||
+ std::is_same<StringType, T>::value ||
+ std::is_same<LargeStringType, T>::value ||
+ std::is_same<FixedSizeBinaryType, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_has_string_view = enable_if_t<has_string_view<T>::value, R>;
+
+template <typename T>
+using is_8bit_int = std::integral_constant<bool, std::is_same<UInt8Type, T>::value ||
+ std::is_same<Int8Type, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_8bit_int = enable_if_t<is_8bit_int<T>::value, R>;
+
+template <typename T>
+using is_parameter_free_type =
+ std::integral_constant<bool, TypeTraits<T>::is_parameter_free>;
+
+template <typename T, typename R = void>
+using enable_if_parameter_free = enable_if_t<is_parameter_free_type<T>::value, R>;
+
+// Physical representation quirks
+
+template <typename T>
+using is_physical_signed_integer_type =
+ std::integral_constant<bool,
+ is_signed_integer_type<T>::value ||
+ (is_temporal_type<T>::value && has_c_type<T>::value)>;
+
+template <typename T, typename R = void>
+using enable_if_physical_signed_integer =
+ enable_if_t<is_physical_signed_integer_type<T>::value, R>;
+
+template <typename T>
+using is_physical_unsigned_integer_type =
+ std::integral_constant<bool, is_unsigned_integer_type<T>::value ||
+ is_half_float_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_physical_unsigned_integer =
+ enable_if_t<is_physical_unsigned_integer_type<T>::value, R>;
+
+template <typename T>
+using is_physical_integer_type =
+ std::integral_constant<bool, is_physical_unsigned_integer_type<T>::value ||
+ is_physical_signed_integer_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_physical_integer = enable_if_t<is_physical_integer_type<T>::value, R>;
+
+// Like is_floating_type but excluding half-floats which don't have a
+// float-like c type.
+template <typename T>
+using is_physical_floating_type =
+ std::integral_constant<bool,
+ is_floating_type<T>::value && !is_half_float_type<T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_physical_floating_point =
+ enable_if_t<is_physical_floating_type<T>::value, R>;
+
+static inline bool is_integer(Type::type type_id) {
+ switch (type_id) {
+ case Type::UINT8:
+ case Type::INT8:
+ case Type::UINT16:
+ case Type::INT16:
+ case Type::UINT32:
+ case Type::INT32:
+ case Type::UINT64:
+ case Type::INT64:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_signed_integer(Type::type type_id) {
+ switch (type_id) {
+ case Type::INT8:
+ case Type::INT16:
+ case Type::INT32:
+ case Type::INT64:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_unsigned_integer(Type::type type_id) {
+ switch (type_id) {
+ case Type::UINT8:
+ case Type::UINT16:
+ case Type::UINT32:
+ case Type::UINT64:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_floating(Type::type type_id) {
+ switch (type_id) {
+ case Type::HALF_FLOAT:
+ case Type::FLOAT:
+ case Type::DOUBLE:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
static inline bool is_decimal(Type::type type_id) {
switch (type_id) {
case Type::DECIMAL128:
@@ -857,90 +857,90 @@ static inline bool is_decimal(Type::type type_id) {
return false;
}
-static inline bool is_primitive(Type::type type_id) {
- switch (type_id) {
- case Type::BOOL:
- case Type::UINT8:
- case Type::INT8:
- case Type::UINT16:
- case Type::INT16:
- case Type::UINT32:
- case Type::INT32:
- case Type::UINT64:
- case Type::INT64:
- case Type::HALF_FLOAT:
- case Type::FLOAT:
- case Type::DOUBLE:
- case Type::DATE32:
- case Type::DATE64:
- case Type::TIME32:
- case Type::TIME64:
- case Type::TIMESTAMP:
- case Type::DURATION:
- case Type::INTERVAL_MONTHS:
- case Type::INTERVAL_DAY_TIME:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_base_binary_like(Type::type type_id) {
- switch (type_id) {
- case Type::BINARY:
- case Type::LARGE_BINARY:
- case Type::STRING:
- case Type::LARGE_STRING:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_binary_like(Type::type type_id) {
- switch (type_id) {
- case Type::BINARY:
- case Type::STRING:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_large_binary_like(Type::type type_id) {
- switch (type_id) {
- case Type::LARGE_BINARY:
- case Type::LARGE_STRING:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_dictionary(Type::type type_id) {
- return type_id == Type::DICTIONARY;
-}
-
-static inline bool is_fixed_size_binary(Type::type type_id) {
- switch (type_id) {
+static inline bool is_primitive(Type::type type_id) {
+ switch (type_id) {
+ case Type::BOOL:
+ case Type::UINT8:
+ case Type::INT8:
+ case Type::UINT16:
+ case Type::INT16:
+ case Type::UINT32:
+ case Type::INT32:
+ case Type::UINT64:
+ case Type::INT64:
+ case Type::HALF_FLOAT:
+ case Type::FLOAT:
+ case Type::DOUBLE:
+ case Type::DATE32:
+ case Type::DATE64:
+ case Type::TIME32:
+ case Type::TIME64:
+ case Type::TIMESTAMP:
+ case Type::DURATION:
+ case Type::INTERVAL_MONTHS:
+ case Type::INTERVAL_DAY_TIME:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_base_binary_like(Type::type type_id) {
+ switch (type_id) {
+ case Type::BINARY:
+ case Type::LARGE_BINARY:
+ case Type::STRING:
+ case Type::LARGE_STRING:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_binary_like(Type::type type_id) {
+ switch (type_id) {
+ case Type::BINARY:
+ case Type::STRING:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_large_binary_like(Type::type type_id) {
+ switch (type_id) {
+ case Type::LARGE_BINARY:
+ case Type::LARGE_STRING:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_dictionary(Type::type type_id) {
+ return type_id == Type::DICTIONARY;
+}
+
+static inline bool is_fixed_size_binary(Type::type type_id) {
+ switch (type_id) {
case Type::DECIMAL128:
case Type::DECIMAL256:
- case Type::FIXED_SIZE_BINARY:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static inline bool is_fixed_width(Type::type type_id) {
- return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id);
-}
-
+ case Type::FIXED_SIZE_BINARY:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static inline bool is_fixed_width(Type::type type_id) {
+ return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id);
+}
+
static inline int bit_width(Type::type type_id) {
switch (type_id) {
case Type::BOOL:
@@ -987,22 +987,22 @@ static inline int bit_width(Type::type type_id) {
return 0;
}
-static inline bool is_nested(Type::type type_id) {
- switch (type_id) {
- case Type::LIST:
- case Type::LARGE_LIST:
- case Type::FIXED_SIZE_LIST:
- case Type::MAP:
- case Type::STRUCT:
- case Type::SPARSE_UNION:
- case Type::DENSE_UNION:
- return true;
- default:
- break;
- }
- return false;
-}
-
+static inline bool is_nested(Type::type type_id) {
+ switch (type_id) {
+ case Type::LIST:
+ case Type::LARGE_LIST:
+ case Type::FIXED_SIZE_LIST:
+ case Type::MAP:
+ case Type::STRUCT:
+ case Type::SPARSE_UNION:
+ case Type::DENSE_UNION:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
static inline int offset_bit_width(Type::type type_id) {
switch (type_id) {
case Type::STRING:
@@ -1021,4 +1021,4 @@ static inline int offset_bit_width(Type::type type_id) {
return 0;
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/align_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/align_util.h
index 4c25a1a17b8..0c0dbc14b1d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/align_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/align_util.h
@@ -1,68 +1,68 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-
-#include "arrow/util/bit_util.h"
-
-namespace arrow {
-namespace internal {
-
-struct BitmapWordAlignParams {
- int64_t leading_bits;
- int64_t trailing_bits;
- int64_t trailing_bit_offset;
- const uint8_t* aligned_start;
- int64_t aligned_bits;
- int64_t aligned_words;
-};
-
-// Compute parameters for accessing a bitmap using aligned word instructions.
-// The returned parameters describe:
-// - a leading area of size `leading_bits` before the aligned words
-// - a word-aligned area of size `aligned_bits`
-// - a trailing area of size `trailing_bits` after the aligned words
-template <uint64_t ALIGN_IN_BYTES>
-inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset,
- int64_t length) {
- static_assert(BitUtil::IsPowerOf2(ALIGN_IN_BYTES),
- "ALIGN_IN_BYTES should be a positive power of two");
- constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
-
- BitmapWordAlignParams p;
-
- // Compute a "bit address" that we can align up to ALIGN_IN_BITS.
- // We don't care about losing the upper bits since we are only interested in the
- // difference between both addresses.
- const uint64_t bit_addr =
- reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset);
- const uint64_t aligned_bit_addr = BitUtil::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS);
-
- p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr);
- p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS;
- p.aligned_bits = p.aligned_words * ALIGN_IN_BITS;
- p.trailing_bits = length - p.leading_bits - p.aligned_bits;
- p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits;
-
- p.aligned_start = data + (bit_offset + p.leading_bits) / 8;
- return p;
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace internal {
+
+struct BitmapWordAlignParams {
+ int64_t leading_bits;
+ int64_t trailing_bits;
+ int64_t trailing_bit_offset;
+ const uint8_t* aligned_start;
+ int64_t aligned_bits;
+ int64_t aligned_words;
+};
+
+// Compute parameters for accessing a bitmap using aligned word instructions.
+// The returned parameters describe:
+// - a leading area of size `leading_bits` before the aligned words
+// - a word-aligned area of size `aligned_bits`
+// - a trailing area of size `trailing_bits` after the aligned words
+template <uint64_t ALIGN_IN_BYTES>
+inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset,
+ int64_t length) {
+ static_assert(BitUtil::IsPowerOf2(ALIGN_IN_BYTES),
+ "ALIGN_IN_BYTES should be a positive power of two");
+ constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
+
+ BitmapWordAlignParams p;
+
+ // Compute a "bit address" that we can align up to ALIGN_IN_BITS.
+ // We don't care about losing the upper bits since we are only interested in the
+ // difference between both addresses.
+ const uint64_t bit_addr =
+ reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset);
+ const uint64_t aligned_bit_addr = BitUtil::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS);
+
+ p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr);
+ p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS;
+ p.aligned_bits = p.aligned_words * ALIGN_IN_BITS;
+ p.trailing_bits = length - p.leading_bits - p.aligned_bits;
+ p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits;
+
+ p.aligned_start = data + (bit_offset + p.leading_bits) / 8;
+ return p;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/atomic_shared_ptr.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/atomic_shared_ptr.h
index d93ad921db6..c3bd4ebbfb8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/atomic_shared_ptr.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/atomic_shared_ptr.h
@@ -1,111 +1,111 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic>
-#include <memory>
-#include <utility>
-
-#include "arrow/type_traits.h"
-
-namespace arrow {
-namespace internal {
-
-// Atomic shared_ptr operations only appeared in libstdc++ since GCC 5,
-// emulate them with unsafe ops if unavailable.
-// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57250
-
-template <typename T, typename = void>
-struct is_atomic_load_shared_ptr_available : std::false_type {};
-
-template <typename T>
-struct is_atomic_load_shared_ptr_available<
- T, void_t<decltype(std::atomic_load(std::declval<const std::shared_ptr<T>*>()))>>
- : std::true_type {};
-
-template <typename T>
-using enable_if_atomic_load_shared_ptr_available =
- enable_if_t<is_atomic_load_shared_ptr_available<T>::value, T>;
-
-template <typename T>
-using enable_if_atomic_load_shared_ptr_unavailable =
- enable_if_t<!is_atomic_load_shared_ptr_available<T>::value, T>;
-
-template <class T>
-enable_if_atomic_load_shared_ptr_available<std::shared_ptr<T>> atomic_load(
- const std::shared_ptr<T>* p) {
- return std::atomic_load(p);
-}
-
-template <class T>
-enable_if_atomic_load_shared_ptr_unavailable<std::shared_ptr<T>> atomic_load(
- const std::shared_ptr<T>* p) {
- return *p;
-}
-
-template <typename T, typename = void>
-struct is_atomic_store_shared_ptr_available : std::false_type {};
-
-template <typename T>
-struct is_atomic_store_shared_ptr_available<
- T, void_t<decltype(std::atomic_store(std::declval<std::shared_ptr<T>*>(),
- std::declval<std::shared_ptr<T>>()))>>
- : std::true_type {};
-
-template <typename T>
-using enable_if_atomic_store_shared_ptr_available =
- enable_if_t<is_atomic_store_shared_ptr_available<T>::value, T>;
-
-template <typename T>
-using enable_if_atomic_store_shared_ptr_unavailable =
- enable_if_t<!is_atomic_store_shared_ptr_available<T>::value, T>;
-
-template <class T>
-void atomic_store(enable_if_atomic_store_shared_ptr_available<std::shared_ptr<T>*> p,
- std::shared_ptr<T> r) {
- std::atomic_store(p, std::move(r));
-}
-
-template <class T>
-void atomic_store(enable_if_atomic_store_shared_ptr_unavailable<std::shared_ptr<T>*> p,
- std::shared_ptr<T> r) {
- *p = r;
-}
-
-template <class T>
-bool atomic_compare_exchange_strong(
- enable_if_atomic_store_shared_ptr_available<std::shared_ptr<T>*> p,
- std::shared_ptr<T>* expected, std::shared_ptr<T> desired) {
- return std::atomic_compare_exchange_strong(p, expected, std::move(desired));
-}
-
-template <class T>
-bool atomic_compare_exchange_strong(
- enable_if_atomic_store_shared_ptr_unavailable<std::shared_ptr<T>*> p,
- std::shared_ptr<T>* expected, std::shared_ptr<T> desired) {
- if (*p == *expected) {
- *p = std::move(desired);
- return true;
- } else {
- *expected = *p;
- return false;
- }
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <utility>
+
+#include "arrow/type_traits.h"
+
+namespace arrow {
+namespace internal {
+
+// Atomic shared_ptr operations only appeared in libstdc++ since GCC 5,
+// emulate them with unsafe ops if unavailable.
+// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57250
+
+template <typename T, typename = void>
+struct is_atomic_load_shared_ptr_available : std::false_type {};
+
+template <typename T>
+struct is_atomic_load_shared_ptr_available<
+ T, void_t<decltype(std::atomic_load(std::declval<const std::shared_ptr<T>*>()))>>
+ : std::true_type {};
+
+template <typename T>
+using enable_if_atomic_load_shared_ptr_available =
+ enable_if_t<is_atomic_load_shared_ptr_available<T>::value, T>;
+
+template <typename T>
+using enable_if_atomic_load_shared_ptr_unavailable =
+ enable_if_t<!is_atomic_load_shared_ptr_available<T>::value, T>;
+
+template <class T>
+enable_if_atomic_load_shared_ptr_available<std::shared_ptr<T>> atomic_load(
+ const std::shared_ptr<T>* p) {
+ return std::atomic_load(p);
+}
+
+template <class T>
+enable_if_atomic_load_shared_ptr_unavailable<std::shared_ptr<T>> atomic_load(
+ const std::shared_ptr<T>* p) {
+ return *p;
+}
+
+template <typename T, typename = void>
+struct is_atomic_store_shared_ptr_available : std::false_type {};
+
+template <typename T>
+struct is_atomic_store_shared_ptr_available<
+ T, void_t<decltype(std::atomic_store(std::declval<std::shared_ptr<T>*>(),
+ std::declval<std::shared_ptr<T>>()))>>
+ : std::true_type {};
+
+template <typename T>
+using enable_if_atomic_store_shared_ptr_available =
+ enable_if_t<is_atomic_store_shared_ptr_available<T>::value, T>;
+
+template <typename T>
+using enable_if_atomic_store_shared_ptr_unavailable =
+ enable_if_t<!is_atomic_store_shared_ptr_available<T>::value, T>;
+
+template <class T>
+void atomic_store(enable_if_atomic_store_shared_ptr_available<std::shared_ptr<T>*> p,
+ std::shared_ptr<T> r) {
+ std::atomic_store(p, std::move(r));
+}
+
+template <class T>
+void atomic_store(enable_if_atomic_store_shared_ptr_unavailable<std::shared_ptr<T>*> p,
+ std::shared_ptr<T> r) {
+ *p = r;
+}
+
+template <class T>
+bool atomic_compare_exchange_strong(
+ enable_if_atomic_store_shared_ptr_available<std::shared_ptr<T>*> p,
+ std::shared_ptr<T>* expected, std::shared_ptr<T> desired) {
+ return std::atomic_compare_exchange_strong(p, expected, std::move(desired));
+}
+
+template <class T>
+bool atomic_compare_exchange_strong(
+ enable_if_atomic_store_shared_ptr_unavailable<std::shared_ptr<T>*> p,
+ std::shared_ptr<T>* expected, std::shared_ptr<T> desired) {
+ if (*p == *expected) {
+ *p = std::move(desired);
+ return true;
+ } else {
+ *expected = *p;
+ return false;
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/base64.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/base64.h
index 9ab41412ac3..e736bc2e384 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/base64.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/base64.h
@@ -1,34 +1,34 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <string>
-
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
-ARROW_EXPORT
-std::string base64_encode(unsigned char const*, unsigned int len);
-
-ARROW_EXPORT
-std::string base64_decode(std::string const& s);
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+ARROW_EXPORT
+std::string base64_encode(unsigned char const*, unsigned int len);
+
+ARROW_EXPORT
+std::string base64_decode(std::string const& s);
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
index 56809f28165..5d813ab7840 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
@@ -1,126 +1,126 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/basic_decimal.h"
-
-#include <algorithm>
-#include <array>
-#include <climits>
-#include <cstdint>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <limits>
-#include <string>
-
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/basic_decimal.h"
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <limits>
+#include <string>
+
+#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
-#include "arrow/util/int128_internal.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-
-using internal::SafeLeftShift;
-using internal::SafeSignedAdd;
-
-static const BasicDecimal128 ScaleMultipliers[] = {
- BasicDecimal128(1LL),
- BasicDecimal128(10LL),
- BasicDecimal128(100LL),
- BasicDecimal128(1000LL),
- BasicDecimal128(10000LL),
- BasicDecimal128(100000LL),
- BasicDecimal128(1000000LL),
- BasicDecimal128(10000000LL),
- BasicDecimal128(100000000LL),
- BasicDecimal128(1000000000LL),
- BasicDecimal128(10000000000LL),
- BasicDecimal128(100000000000LL),
- BasicDecimal128(1000000000000LL),
- BasicDecimal128(10000000000000LL),
- BasicDecimal128(100000000000000LL),
- BasicDecimal128(1000000000000000LL),
- BasicDecimal128(10000000000000000LL),
- BasicDecimal128(100000000000000000LL),
- BasicDecimal128(1000000000000000000LL),
- BasicDecimal128(0LL, 10000000000000000000ULL),
- BasicDecimal128(5LL, 7766279631452241920ULL),
- BasicDecimal128(54LL, 3875820019684212736ULL),
- BasicDecimal128(542LL, 1864712049423024128ULL),
- BasicDecimal128(5421LL, 200376420520689664ULL),
- BasicDecimal128(54210LL, 2003764205206896640ULL),
- BasicDecimal128(542101LL, 1590897978359414784ULL),
- BasicDecimal128(5421010LL, 15908979783594147840ULL),
- BasicDecimal128(54210108LL, 11515845246265065472ULL),
- BasicDecimal128(542101086LL, 4477988020393345024ULL),
- BasicDecimal128(5421010862LL, 7886392056514347008ULL),
- BasicDecimal128(54210108624LL, 5076944270305263616ULL),
- BasicDecimal128(542101086242LL, 13875954555633532928ULL),
- BasicDecimal128(5421010862427LL, 9632337040368467968ULL),
- BasicDecimal128(54210108624275LL, 4089650035136921600ULL),
- BasicDecimal128(542101086242752LL, 4003012203950112768ULL),
- BasicDecimal128(5421010862427522LL, 3136633892082024448ULL),
- BasicDecimal128(54210108624275221LL, 12919594847110692864ULL),
- BasicDecimal128(542101086242752217LL, 68739955140067328ULL),
- BasicDecimal128(5421010862427522170LL, 687399551400673280ULL)};
-
-static const BasicDecimal128 ScaleMultipliersHalf[] = {
- BasicDecimal128(0ULL),
- BasicDecimal128(5ULL),
- BasicDecimal128(50ULL),
- BasicDecimal128(500ULL),
- BasicDecimal128(5000ULL),
- BasicDecimal128(50000ULL),
- BasicDecimal128(500000ULL),
- BasicDecimal128(5000000ULL),
- BasicDecimal128(50000000ULL),
- BasicDecimal128(500000000ULL),
- BasicDecimal128(5000000000ULL),
- BasicDecimal128(50000000000ULL),
- BasicDecimal128(500000000000ULL),
- BasicDecimal128(5000000000000ULL),
- BasicDecimal128(50000000000000ULL),
- BasicDecimal128(500000000000000ULL),
- BasicDecimal128(5000000000000000ULL),
- BasicDecimal128(50000000000000000ULL),
- BasicDecimal128(500000000000000000ULL),
- BasicDecimal128(5000000000000000000ULL),
- BasicDecimal128(2LL, 13106511852580896768ULL),
- BasicDecimal128(27LL, 1937910009842106368ULL),
- BasicDecimal128(271LL, 932356024711512064ULL),
- BasicDecimal128(2710LL, 9323560247115120640ULL),
- BasicDecimal128(27105LL, 1001882102603448320ULL),
- BasicDecimal128(271050LL, 10018821026034483200ULL),
- BasicDecimal128(2710505LL, 7954489891797073920ULL),
- BasicDecimal128(27105054LL, 5757922623132532736ULL),
- BasicDecimal128(271050543LL, 2238994010196672512ULL),
- BasicDecimal128(2710505431LL, 3943196028257173504ULL),
- BasicDecimal128(27105054312LL, 2538472135152631808ULL),
- BasicDecimal128(271050543121LL, 6937977277816766464ULL),
- BasicDecimal128(2710505431213LL, 14039540557039009792ULL),
- BasicDecimal128(27105054312137LL, 11268197054423236608ULL),
- BasicDecimal128(271050543121376LL, 2001506101975056384ULL),
- BasicDecimal128(2710505431213761LL, 1568316946041012224ULL),
- BasicDecimal128(27105054312137610LL, 15683169460410122240ULL),
- BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
- BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
-
+#include "arrow/util/int128_internal.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+using internal::SafeLeftShift;
+using internal::SafeSignedAdd;
+
+static const BasicDecimal128 ScaleMultipliers[] = {
+ BasicDecimal128(1LL),
+ BasicDecimal128(10LL),
+ BasicDecimal128(100LL),
+ BasicDecimal128(1000LL),
+ BasicDecimal128(10000LL),
+ BasicDecimal128(100000LL),
+ BasicDecimal128(1000000LL),
+ BasicDecimal128(10000000LL),
+ BasicDecimal128(100000000LL),
+ BasicDecimal128(1000000000LL),
+ BasicDecimal128(10000000000LL),
+ BasicDecimal128(100000000000LL),
+ BasicDecimal128(1000000000000LL),
+ BasicDecimal128(10000000000000LL),
+ BasicDecimal128(100000000000000LL),
+ BasicDecimal128(1000000000000000LL),
+ BasicDecimal128(10000000000000000LL),
+ BasicDecimal128(100000000000000000LL),
+ BasicDecimal128(1000000000000000000LL),
+ BasicDecimal128(0LL, 10000000000000000000ULL),
+ BasicDecimal128(5LL, 7766279631452241920ULL),
+ BasicDecimal128(54LL, 3875820019684212736ULL),
+ BasicDecimal128(542LL, 1864712049423024128ULL),
+ BasicDecimal128(5421LL, 200376420520689664ULL),
+ BasicDecimal128(54210LL, 2003764205206896640ULL),
+ BasicDecimal128(542101LL, 1590897978359414784ULL),
+ BasicDecimal128(5421010LL, 15908979783594147840ULL),
+ BasicDecimal128(54210108LL, 11515845246265065472ULL),
+ BasicDecimal128(542101086LL, 4477988020393345024ULL),
+ BasicDecimal128(5421010862LL, 7886392056514347008ULL),
+ BasicDecimal128(54210108624LL, 5076944270305263616ULL),
+ BasicDecimal128(542101086242LL, 13875954555633532928ULL),
+ BasicDecimal128(5421010862427LL, 9632337040368467968ULL),
+ BasicDecimal128(54210108624275LL, 4089650035136921600ULL),
+ BasicDecimal128(542101086242752LL, 4003012203950112768ULL),
+ BasicDecimal128(5421010862427522LL, 3136633892082024448ULL),
+ BasicDecimal128(54210108624275221LL, 12919594847110692864ULL),
+ BasicDecimal128(542101086242752217LL, 68739955140067328ULL),
+ BasicDecimal128(5421010862427522170LL, 687399551400673280ULL)};
+
+static const BasicDecimal128 ScaleMultipliersHalf[] = {
+ BasicDecimal128(0ULL),
+ BasicDecimal128(5ULL),
+ BasicDecimal128(50ULL),
+ BasicDecimal128(500ULL),
+ BasicDecimal128(5000ULL),
+ BasicDecimal128(50000ULL),
+ BasicDecimal128(500000ULL),
+ BasicDecimal128(5000000ULL),
+ BasicDecimal128(50000000ULL),
+ BasicDecimal128(500000000ULL),
+ BasicDecimal128(5000000000ULL),
+ BasicDecimal128(50000000000ULL),
+ BasicDecimal128(500000000000ULL),
+ BasicDecimal128(5000000000000ULL),
+ BasicDecimal128(50000000000000ULL),
+ BasicDecimal128(500000000000000ULL),
+ BasicDecimal128(5000000000000000ULL),
+ BasicDecimal128(50000000000000000ULL),
+ BasicDecimal128(500000000000000000ULL),
+ BasicDecimal128(5000000000000000000ULL),
+ BasicDecimal128(2LL, 13106511852580896768ULL),
+ BasicDecimal128(27LL, 1937910009842106368ULL),
+ BasicDecimal128(271LL, 932356024711512064ULL),
+ BasicDecimal128(2710LL, 9323560247115120640ULL),
+ BasicDecimal128(27105LL, 1001882102603448320ULL),
+ BasicDecimal128(271050LL, 10018821026034483200ULL),
+ BasicDecimal128(2710505LL, 7954489891797073920ULL),
+ BasicDecimal128(27105054LL, 5757922623132532736ULL),
+ BasicDecimal128(271050543LL, 2238994010196672512ULL),
+ BasicDecimal128(2710505431LL, 3943196028257173504ULL),
+ BasicDecimal128(27105054312LL, 2538472135152631808ULL),
+ BasicDecimal128(271050543121LL, 6937977277816766464ULL),
+ BasicDecimal128(2710505431213LL, 14039540557039009792ULL),
+ BasicDecimal128(27105054312137LL, 11268197054423236608ULL),
+ BasicDecimal128(271050543121376LL, 2001506101975056384ULL),
+ BasicDecimal128(2710505431213761LL, 1568316946041012224ULL),
+ BasicDecimal128(27105054312137610LL, 15683169460410122240ULL),
+ BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
+ BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
+
static const BasicDecimal256 ScaleMultipliersDecimal256[] = {
BasicDecimal256({1ULL, 0ULL, 0ULL, 0ULL}),
BasicDecimal256({10ULL, 0ULL, 0ULL, 0ULL}),
@@ -334,146 +334,146 @@ static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = {
BasicDecimal256(
{0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})};
-#ifdef ARROW_USE_NATIVE_INT128
-static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
-#else
+#ifdef ARROW_USE_NATIVE_INT128
+static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
+#else
static constexpr uint64_t kInt32Mask = 0xFFFFFFFF;
-#endif
-
-// same as ScaleMultipliers[38] - 1
-static constexpr BasicDecimal128 kMaxValue =
- BasicDecimal128(5421010862427522170LL, 687399551400673280ULL - 1);
-
-#if ARROW_LITTLE_ENDIAN
-BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
- : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[1],
- reinterpret_cast<const uint64_t*>(bytes)[0]) {}
-#else
-BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
- : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[0],
- reinterpret_cast<const uint64_t*>(bytes)[1]) {}
-#endif
-
-std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {
- std::array<uint8_t, 16> out{{0}};
- ToBytes(out.data());
- return out;
-}
-
-void BasicDecimal128::ToBytes(uint8_t* out) const {
- DCHECK_NE(out, nullptr);
-#if ARROW_LITTLE_ENDIAN
- reinterpret_cast<uint64_t*>(out)[0] = low_bits_;
- reinterpret_cast<int64_t*>(out)[1] = high_bits_;
-#else
- reinterpret_cast<int64_t*>(out)[0] = high_bits_;
- reinterpret_cast<uint64_t*>(out)[1] = low_bits_;
-#endif
-}
-
-BasicDecimal128& BasicDecimal128::Negate() {
- low_bits_ = ~low_bits_ + 1;
- high_bits_ = ~high_bits_;
- if (low_bits_ == 0) {
- high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
- }
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::Abs() { return *this < 0 ? Negate() : *this; }
-
-BasicDecimal128 BasicDecimal128::Abs(const BasicDecimal128& in) {
- BasicDecimal128 result(in);
- return result.Abs();
-}
-
-bool BasicDecimal128::FitsInPrecision(int32_t precision) const {
- DCHECK_GT(precision, 0);
- DCHECK_LE(precision, 38);
- return BasicDecimal128::Abs(*this) < ScaleMultipliers[precision];
-}
-
-BasicDecimal128& BasicDecimal128::operator+=(const BasicDecimal128& right) {
- const uint64_t sum = low_bits_ + right.low_bits_;
- high_bits_ = SafeSignedAdd<int64_t>(high_bits_, right.high_bits_);
- if (sum < low_bits_) {
- high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
- }
- low_bits_ = sum;
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator-=(const BasicDecimal128& right) {
- const uint64_t diff = low_bits_ - right.low_bits_;
- high_bits_ -= right.high_bits_;
- if (diff > low_bits_) {
- --high_bits_;
- }
- low_bits_ = diff;
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator/=(const BasicDecimal128& right) {
- BasicDecimal128 remainder;
- auto s = Divide(right, this, &remainder);
- DCHECK_EQ(s, DecimalStatus::kSuccess);
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator|=(const BasicDecimal128& right) {
- low_bits_ |= right.low_bits_;
- high_bits_ |= right.high_bits_;
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator&=(const BasicDecimal128& right) {
- low_bits_ &= right.low_bits_;
- high_bits_ &= right.high_bits_;
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator<<=(uint32_t bits) {
- if (bits != 0) {
- if (bits < 64) {
- high_bits_ = SafeLeftShift(high_bits_, bits);
- high_bits_ |= (low_bits_ >> (64 - bits));
- low_bits_ <<= bits;
- } else if (bits < 128) {
- high_bits_ = static_cast<int64_t>(low_bits_) << (bits - 64);
- low_bits_ = 0;
- } else {
- high_bits_ = 0;
- low_bits_ = 0;
- }
- }
- return *this;
-}
-
-BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
- if (bits != 0) {
- if (bits < 64) {
- low_bits_ >>= bits;
- low_bits_ |= static_cast<uint64_t>(high_bits_ << (64 - bits));
- high_bits_ = static_cast<int64_t>(static_cast<uint64_t>(high_bits_) >> bits);
- } else if (bits < 128) {
- low_bits_ = static_cast<uint64_t>(high_bits_ >> (bits - 64));
- high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
- } else {
- high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
- low_bits_ = static_cast<uint64_t>(high_bits_);
- }
- }
- return *this;
-}
-
-namespace {
-
+#endif
+
+// same as ScaleMultipliers[38] - 1
+static constexpr BasicDecimal128 kMaxValue =
+ BasicDecimal128(5421010862427522170LL, 687399551400673280ULL - 1);
+
+#if ARROW_LITTLE_ENDIAN
+BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
+ : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[1],
+ reinterpret_cast<const uint64_t*>(bytes)[0]) {}
+#else
+BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
+ : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[0],
+ reinterpret_cast<const uint64_t*>(bytes)[1]) {}
+#endif
+
+std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {
+ std::array<uint8_t, 16> out{{0}};
+ ToBytes(out.data());
+ return out;
+}
+
+void BasicDecimal128::ToBytes(uint8_t* out) const {
+ DCHECK_NE(out, nullptr);
+#if ARROW_LITTLE_ENDIAN
+ reinterpret_cast<uint64_t*>(out)[0] = low_bits_;
+ reinterpret_cast<int64_t*>(out)[1] = high_bits_;
+#else
+ reinterpret_cast<int64_t*>(out)[0] = high_bits_;
+ reinterpret_cast<uint64_t*>(out)[1] = low_bits_;
+#endif
+}
+
+BasicDecimal128& BasicDecimal128::Negate() {
+ low_bits_ = ~low_bits_ + 1;
+ high_bits_ = ~high_bits_;
+ if (low_bits_ == 0) {
+ high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
+ }
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::Abs() { return *this < 0 ? Negate() : *this; }
+
+BasicDecimal128 BasicDecimal128::Abs(const BasicDecimal128& in) {
+ BasicDecimal128 result(in);
+ return result.Abs();
+}
+
+bool BasicDecimal128::FitsInPrecision(int32_t precision) const {
+ DCHECK_GT(precision, 0);
+ DCHECK_LE(precision, 38);
+ return BasicDecimal128::Abs(*this) < ScaleMultipliers[precision];
+}
+
+BasicDecimal128& BasicDecimal128::operator+=(const BasicDecimal128& right) {
+ const uint64_t sum = low_bits_ + right.low_bits_;
+ high_bits_ = SafeSignedAdd<int64_t>(high_bits_, right.high_bits_);
+ if (sum < low_bits_) {
+ high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
+ }
+ low_bits_ = sum;
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator-=(const BasicDecimal128& right) {
+ const uint64_t diff = low_bits_ - right.low_bits_;
+ high_bits_ -= right.high_bits_;
+ if (diff > low_bits_) {
+ --high_bits_;
+ }
+ low_bits_ = diff;
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator/=(const BasicDecimal128& right) {
+ BasicDecimal128 remainder;
+ auto s = Divide(right, this, &remainder);
+ DCHECK_EQ(s, DecimalStatus::kSuccess);
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator|=(const BasicDecimal128& right) {
+ low_bits_ |= right.low_bits_;
+ high_bits_ |= right.high_bits_;
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator&=(const BasicDecimal128& right) {
+ low_bits_ &= right.low_bits_;
+ high_bits_ &= right.high_bits_;
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator<<=(uint32_t bits) {
+ if (bits != 0) {
+ if (bits < 64) {
+ high_bits_ = SafeLeftShift(high_bits_, bits);
+ high_bits_ |= (low_bits_ >> (64 - bits));
+ low_bits_ <<= bits;
+ } else if (bits < 128) {
+ high_bits_ = static_cast<int64_t>(low_bits_) << (bits - 64);
+ low_bits_ = 0;
+ } else {
+ high_bits_ = 0;
+ low_bits_ = 0;
+ }
+ }
+ return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
+ if (bits != 0) {
+ if (bits < 64) {
+ low_bits_ >>= bits;
+ low_bits_ |= static_cast<uint64_t>(high_bits_ << (64 - bits));
+ high_bits_ = static_cast<int64_t>(static_cast<uint64_t>(high_bits_) >> bits);
+ } else if (bits < 128) {
+ low_bits_ = static_cast<uint64_t>(high_bits_ >> (bits - 64));
+ high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
+ } else {
+ high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
+ low_bits_ = static_cast<uint64_t>(high_bits_);
+ }
+ }
+ return *this;
+}
+
+namespace {
+
// Convenience wrapper type over 128 bit unsigned integers. We opt not to
// replace the uint128_t type in int128_internal.h because it would require
// significantly more implementation work to be done. This class merely
// provides the minimum necessary set of functions to perform 128+ bit
// multiplication operations when there may or may not be native support.
-#ifdef ARROW_USE_NATIVE_INT128
+#ifdef ARROW_USE_NATIVE_INT128
struct uint128_t {
uint128_t() {}
uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 64) | lo) {}
@@ -499,41 +499,41 @@ struct uint128_t {
__uint128_t val_;
};
-#else
+#else
// Multiply two 64 bit word components into a 128 bit result, with high bits
// stored in hi and low bits in lo.
inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
// Perform multiplication on two 64 bit words x and y into a 128 bit result
- // by splitting up x and y into 32 bit high/low bit components,
- // allowing us to represent the multiplication as
- // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
+ // by splitting up x and y into 32 bit high/low bit components,
+ // allowing us to represent the multiplication as
+ // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
// + x_hi * y_hi * 2^64
- //
+ //
// Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
- // Therefore,
- // lo_lo is (x_lo * y_lo)_lo,
- // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
- // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
- // hi_hi is (x_hi * y_hi)_hi
+ // Therefore,
+ // lo_lo is (x_lo * y_lo)_lo,
+ // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
+ // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
+ // hi_hi is (x_hi * y_hi)_hi
const uint64_t x_lo = x & kInt32Mask;
const uint64_t y_lo = y & kInt32Mask;
- const uint64_t x_hi = x >> 32;
- const uint64_t y_hi = y >> 32;
-
- const uint64_t t = x_lo * y_lo;
+ const uint64_t x_hi = x >> 32;
+ const uint64_t y_hi = y >> 32;
+
+ const uint64_t t = x_lo * y_lo;
const uint64_t t_lo = t & kInt32Mask;
- const uint64_t t_hi = t >> 32;
-
- const uint64_t u = x_hi * y_lo + t_hi;
+ const uint64_t t_hi = t >> 32;
+
+ const uint64_t u = x_hi * y_lo + t_hi;
const uint64_t u_lo = u & kInt32Mask;
- const uint64_t u_hi = u >> 32;
-
- const uint64_t v = x_lo * y_hi + u_lo;
- const uint64_t v_hi = v >> 32;
-
- *hi = x_hi * y_hi + u_hi + v_hi;
+ const uint64_t u_hi = u >> 32;
+
+ const uint64_t v = x_lo * y_hi + u_lo;
+ const uint64_t v_hi = v >> 32;
+
+ *hi = x_hi * y_hi + u_hi + v_hi;
*lo = (v << 32) + t_lo;
-}
+}
struct uint128_t {
uint128_t() {}
@@ -568,8 +568,8 @@ struct uint128_t {
uint64_t hi_;
uint64_t lo_;
};
-#endif
-
+#endif
+
// Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
// array into a same sized output. Elements in the array should be in
// little endian order, and output will be the same. Overflow in multiplication
@@ -589,26 +589,26 @@ inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
carry = tmp.hi();
}
}
-}
-
-} // namespace
-
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
- // Since the max value of BasicDecimal128 is supposed to be 1e38 - 1 and the
- // min the negation taking the absolute values here should always be safe.
- const bool negate = Sign() != right.Sign();
- BasicDecimal128 x = BasicDecimal128::Abs(*this);
- BasicDecimal128 y = BasicDecimal128::Abs(right);
+}
+
+} // namespace
+
+BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
+ // Since the max value of BasicDecimal128 is supposed to be 1e38 - 1 and the
+ // min the negation taking the absolute values here should always be safe.
+ const bool negate = Sign() != right.Sign();
+ BasicDecimal128 x = BasicDecimal128::Abs(*this);
+ BasicDecimal128 y = BasicDecimal128::Abs(right);
uint128_t r(x);
r *= uint128_t{y};
high_bits_ = r.hi();
low_bits_ = r.lo();
- if (negate) {
- Negate();
- }
- return *this;
-}
-
+ if (negate) {
+ Negate();
+ }
+ return *this;
+}
+
/// Expands the given little endian array of uint64_t into a big endian array of
/// uint32_t. The value of input array is expected to be non-negative. The result_array
/// will remove leading zeros from the input array.
@@ -640,50 +640,50 @@ static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
/// Expands the given value into a big endian array of ints so that we can work on
/// it. The array will be converted to an absolute value and the was_negative
-/// flag will be set appropriately. The array will remove leading zeros from
-/// the value.
+/// flag will be set appropriately. The array will remove leading zeros from
+/// the value.
/// \param array a big endian array of length 4 to set with the value
-/// \param was_negative a flag for whether the value was original negative
-/// \result the output length of the array
-static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
- bool& was_negative) {
+/// \param was_negative a flag for whether the value was original negative
+/// \result the output length of the array
+static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
+ bool& was_negative) {
BasicDecimal128 abs_value = BasicDecimal128::Abs(value);
was_negative = value.high_bits() < 0;
uint64_t high = static_cast<uint64_t>(abs_value.high_bits());
uint64_t low = abs_value.low_bits();
-
+
// FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) is not
// called here as the following code has better performance, to avoid regression on
// BasicDecimal128 Division.
- if (high != 0) {
- if (high > std::numeric_limits<uint32_t>::max()) {
- array[0] = static_cast<uint32_t>(high >> 32);
- array[1] = static_cast<uint32_t>(high);
- array[2] = static_cast<uint32_t>(low >> 32);
- array[3] = static_cast<uint32_t>(low);
- return 4;
- }
-
- array[0] = static_cast<uint32_t>(high);
- array[1] = static_cast<uint32_t>(low >> 32);
- array[2] = static_cast<uint32_t>(low);
- return 3;
- }
-
+ if (high != 0) {
+ if (high > std::numeric_limits<uint32_t>::max()) {
+ array[0] = static_cast<uint32_t>(high >> 32);
+ array[1] = static_cast<uint32_t>(high);
+ array[2] = static_cast<uint32_t>(low >> 32);
+ array[3] = static_cast<uint32_t>(low);
+ return 4;
+ }
+
+ array[0] = static_cast<uint32_t>(high);
+ array[1] = static_cast<uint32_t>(low >> 32);
+ array[2] = static_cast<uint32_t>(low);
+ return 3;
+ }
+
if (low > std::numeric_limits<uint32_t>::max()) {
- array[0] = static_cast<uint32_t>(low >> 32);
- array[1] = static_cast<uint32_t>(low);
- return 2;
- }
-
- if (low == 0) {
- return 0;
- }
-
- array[0] = static_cast<uint32_t>(low);
- return 1;
-}
-
+ array[0] = static_cast<uint32_t>(low >> 32);
+ array[1] = static_cast<uint32_t>(low);
+ return 2;
+ }
+
+ if (low == 0) {
+ return 0;
+ }
+
+ array[0] = static_cast<uint32_t>(low);
+ return 1;
+}
+
/// Expands the given value into a big endian array of ints so that we can work on
/// it. The array will be converted to an absolute value and the was_negative
/// flag will be set appropriately. The array will remove leading zeros from
@@ -702,56 +702,56 @@ static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array,
return FillInArray<4>(positive_value.little_endian_array(), array);
}
-/// Shift the number in the array left by bits positions.
-/// \param array the number to shift, must have length elements
-/// \param length the number of entries in the array
-/// \param bits the number of bits to shift (0 <= bits < 32)
-static void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
- if (length > 0 && bits != 0) {
- for (int64_t i = 0; i < length - 1; ++i) {
- array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
- }
- array[length - 1] <<= bits;
- }
-}
-
-/// Shift the number in the array right by bits positions.
-/// \param array the number to shift, must have length elements
-/// \param length the number of entries in the array
-/// \param bits the number of bits to shift (0 <= bits < 32)
+/// Shift the number in the array left by bits positions.
+/// \param array the number to shift, must have length elements
+/// \param length the number of entries in the array
+/// \param bits the number of bits to shift (0 <= bits < 32)
+static void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+ if (length > 0 && bits != 0) {
+ for (int64_t i = 0; i < length - 1; ++i) {
+ array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
+ }
+ array[length - 1] <<= bits;
+ }
+}
+
+/// Shift the number in the array right by bits positions.
+/// \param array the number to shift, must have length elements
+/// \param length the number of entries in the array
+/// \param bits the number of bits to shift (0 <= bits < 32)
static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
- if (length > 0 && bits != 0) {
- for (int64_t i = length - 1; i > 0; --i) {
- array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
- }
- array[0] >>= bits;
- }
-}
-
-/// \brief Fix the signs of the result and remainder at the end of the division based on
-/// the signs of the dividend and divisor.
+ if (length > 0 && bits != 0) {
+ for (int64_t i = length - 1; i > 0; --i) {
+ array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
+ }
+ array[0] >>= bits;
+ }
+}
+
+/// \brief Fix the signs of the result and remainder at the end of the division based on
+/// the signs of the dividend and divisor.
template <class DecimalClass>
static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainder,
bool dividend_was_negative,
bool divisor_was_negative) {
- if (dividend_was_negative != divisor_was_negative) {
- result->Negate();
- }
-
- if (dividend_was_negative) {
- remainder->Negate();
- }
-}
-
+ if (dividend_was_negative != divisor_was_negative) {
+ result->Negate();
+ }
+
+ if (dividend_was_negative) {
+ remainder->Negate();
+ }
+}
+
/// \brief Build a little endian array of uint64_t from a big endian array of uint32_t.
template <size_t N>
static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
const uint32_t* array, int64_t length) {
for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
if (array[i] != 0) {
- return DecimalStatus::kOverflow;
+ return DecimalStatus::kOverflow;
}
- }
+ }
int64_t next_index = length - 1;
size_t i = 0;
for (; i < N && next_index >= 0; i++) {
@@ -766,7 +766,7 @@ static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
}
return DecimalStatus::kSuccess;
}
-
+
/// \brief Build a BasicDecimal128 from a big endian array of uint32_t.
static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* array,
int64_t length) {
@@ -776,9 +776,9 @@ static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* arra
return status;
}
*value = {static_cast<int64_t>(result_array[1]), result_array[0]};
- return DecimalStatus::kSuccess;
-}
-
+ return DecimalStatus::kSuccess;
+}
+
/// \brief Build a BasicDecimal256 from a big endian array of uint32_t.
static DecimalStatus BuildFromArray(BasicDecimal256* value, const uint32_t* array,
int64_t length) {
@@ -791,7 +791,7 @@ static DecimalStatus BuildFromArray(BasicDecimal256* value, const uint32_t* arra
return DecimalStatus::kSuccess;
}
-/// \brief Do a division where the divisor fits into a single 32 bit value.
+/// \brief Do a division where the divisor fits into a single 32 bit value.
template <class DecimalClass>
static inline DecimalStatus SingleDivide(const uint32_t* dividend,
int64_t dividend_length, uint32_t divisor,
@@ -799,324 +799,324 @@ static inline DecimalStatus SingleDivide(const uint32_t* dividend,
bool dividend_was_negative,
bool divisor_was_negative,
DecimalClass* result) {
- uint64_t r = 0;
+ uint64_t r = 0;
constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t) + 1;
uint32_t result_array[kDecimalArrayLength];
- for (int64_t j = 0; j < dividend_length; j++) {
- r <<= 32;
- r += dividend[j];
- result_array[j] = static_cast<uint32_t>(r / divisor);
- r %= divisor;
- }
- auto status = BuildFromArray(result, result_array, dividend_length);
- if (status != DecimalStatus::kSuccess) {
- return status;
- }
-
- *remainder = static_cast<int64_t>(r);
- FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
- return DecimalStatus::kSuccess;
-}
-
+ for (int64_t j = 0; j < dividend_length; j++) {
+ r <<= 32;
+ r += dividend[j];
+ result_array[j] = static_cast<uint32_t>(r / divisor);
+ r %= divisor;
+ }
+ auto status = BuildFromArray(result, result_array, dividend_length);
+ if (status != DecimalStatus::kSuccess) {
+ return status;
+ }
+
+ *remainder = static_cast<int64_t>(r);
+ FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
+ return DecimalStatus::kSuccess;
+}
+
/// \brief Do a decimal division with remainder.
template <class DecimalClass>
static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
const DecimalClass& divisor,
DecimalClass* result, DecimalClass* remainder) {
constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t);
- // Split the dividend and divisor into integer pieces so that we can
- // work on them.
+ // Split the dividend and divisor into integer pieces so that we can
+ // work on them.
uint32_t dividend_array[kDecimalArrayLength + 1];
uint32_t divisor_array[kDecimalArrayLength];
- bool dividend_was_negative;
- bool divisor_was_negative;
- // leave an extra zero before the dividend
- dividend_array[0] = 0;
- int64_t dividend_length =
+ bool dividend_was_negative;
+ bool divisor_was_negative;
+ // leave an extra zero before the dividend
+ dividend_array[0] = 0;
+ int64_t dividend_length =
FillInArray(dividend, dividend_array + 1, dividend_was_negative) + 1;
- int64_t divisor_length = FillInArray(divisor, divisor_array, divisor_was_negative);
-
- // Handle some of the easy cases.
- if (dividend_length <= divisor_length) {
+ int64_t divisor_length = FillInArray(divisor, divisor_array, divisor_was_negative);
+
+ // Handle some of the easy cases.
+ if (dividend_length <= divisor_length) {
*remainder = dividend;
- *result = 0;
- return DecimalStatus::kSuccess;
- }
-
- if (divisor_length == 0) {
- return DecimalStatus::kDivideByZero;
- }
-
- if (divisor_length == 1) {
- return SingleDivide(dividend_array, dividend_length, divisor_array[0], remainder,
- dividend_was_negative, divisor_was_negative, result);
- }
-
- int64_t result_length = dividend_length - divisor_length;
+ *result = 0;
+ return DecimalStatus::kSuccess;
+ }
+
+ if (divisor_length == 0) {
+ return DecimalStatus::kDivideByZero;
+ }
+
+ if (divisor_length == 1) {
+ return SingleDivide(dividend_array, dividend_length, divisor_array[0], remainder,
+ dividend_was_negative, divisor_was_negative, result);
+ }
+
+ int64_t result_length = dividend_length - divisor_length;
uint32_t result_array[kDecimalArrayLength];
DCHECK_LE(result_length, kDecimalArrayLength);
-
- // Normalize by shifting both by a multiple of 2 so that
- // the digit guessing is better. The requirement is that
- // divisor_array[0] is greater than 2**31.
- int64_t normalize_bits = BitUtil::CountLeadingZeros(divisor_array[0]);
- ShiftArrayLeft(divisor_array, divisor_length, normalize_bits);
- ShiftArrayLeft(dividend_array, dividend_length, normalize_bits);
-
- // compute each digit in the result
- for (int64_t j = 0; j < result_length; ++j) {
- // Guess the next digit. At worst it is two too large
- uint32_t guess = std::numeric_limits<uint32_t>::max();
- const auto high_dividend =
- static_cast<uint64_t>(dividend_array[j]) << 32 | dividend_array[j + 1];
- if (dividend_array[j] != divisor_array[0]) {
- guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
- }
-
- // catch all of the cases where guess is two too large and most of the
- // cases where it is one too large
- auto rhat = static_cast<uint32_t>(high_dividend -
- guess * static_cast<uint64_t>(divisor_array[0]));
- while (static_cast<uint64_t>(divisor_array[1]) * guess >
- (static_cast<uint64_t>(rhat) << 32) + dividend_array[j + 2]) {
- --guess;
- rhat += divisor_array[0];
- if (static_cast<uint64_t>(rhat) < divisor_array[0]) {
- break;
- }
- }
-
- // subtract off the guess * divisor from the dividend
- uint64_t mult = 0;
- for (int64_t i = divisor_length - 1; i >= 0; --i) {
- mult += static_cast<uint64_t>(guess) * divisor_array[i];
- uint32_t prev = dividend_array[j + i + 1];
- dividend_array[j + i + 1] -= static_cast<uint32_t>(mult);
- mult >>= 32;
- if (dividend_array[j + i + 1] > prev) {
- ++mult;
- }
- }
- uint32_t prev = dividend_array[j];
- dividend_array[j] -= static_cast<uint32_t>(mult);
-
- // if guess was too big, we add back divisor
- if (dividend_array[j] > prev) {
- --guess;
- uint32_t carry = 0;
- for (int64_t i = divisor_length - 1; i >= 0; --i) {
- const auto sum =
- static_cast<uint64_t>(divisor_array[i]) + dividend_array[j + i + 1] + carry;
- dividend_array[j + i + 1] = static_cast<uint32_t>(sum);
- carry = static_cast<uint32_t>(sum >> 32);
- }
- dividend_array[j] += carry;
- }
-
- result_array[j] = guess;
- }
-
- // denormalize the remainder
- ShiftArrayRight(dividend_array, dividend_length, normalize_bits);
-
- // return result and remainder
- auto status = BuildFromArray(result, result_array, result_length);
- if (status != DecimalStatus::kSuccess) {
- return status;
- }
- status = BuildFromArray(remainder, dividend_array, dividend_length);
- if (status != DecimalStatus::kSuccess) {
- return status;
- }
-
- FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
- return DecimalStatus::kSuccess;
-}
-
+
+ // Normalize by shifting both by a multiple of 2 so that
+ // the digit guessing is better. The requirement is that
+ // divisor_array[0] is greater than 2**31.
+ int64_t normalize_bits = BitUtil::CountLeadingZeros(divisor_array[0]);
+ ShiftArrayLeft(divisor_array, divisor_length, normalize_bits);
+ ShiftArrayLeft(dividend_array, dividend_length, normalize_bits);
+
+ // compute each digit in the result
+ for (int64_t j = 0; j < result_length; ++j) {
+ // Guess the next digit. At worst it is two too large
+ uint32_t guess = std::numeric_limits<uint32_t>::max();
+ const auto high_dividend =
+ static_cast<uint64_t>(dividend_array[j]) << 32 | dividend_array[j + 1];
+ if (dividend_array[j] != divisor_array[0]) {
+ guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
+ }
+
+ // catch all of the cases where guess is two too large and most of the
+ // cases where it is one too large
+ auto rhat = static_cast<uint32_t>(high_dividend -
+ guess * static_cast<uint64_t>(divisor_array[0]));
+ while (static_cast<uint64_t>(divisor_array[1]) * guess >
+ (static_cast<uint64_t>(rhat) << 32) + dividend_array[j + 2]) {
+ --guess;
+ rhat += divisor_array[0];
+ if (static_cast<uint64_t>(rhat) < divisor_array[0]) {
+ break;
+ }
+ }
+
+ // subtract off the guess * divisor from the dividend
+ uint64_t mult = 0;
+ for (int64_t i = divisor_length - 1; i >= 0; --i) {
+ mult += static_cast<uint64_t>(guess) * divisor_array[i];
+ uint32_t prev = dividend_array[j + i + 1];
+ dividend_array[j + i + 1] -= static_cast<uint32_t>(mult);
+ mult >>= 32;
+ if (dividend_array[j + i + 1] > prev) {
+ ++mult;
+ }
+ }
+ uint32_t prev = dividend_array[j];
+ dividend_array[j] -= static_cast<uint32_t>(mult);
+
+ // if guess was too big, we add back divisor
+ if (dividend_array[j] > prev) {
+ --guess;
+ uint32_t carry = 0;
+ for (int64_t i = divisor_length - 1; i >= 0; --i) {
+ const auto sum =
+ static_cast<uint64_t>(divisor_array[i]) + dividend_array[j + i + 1] + carry;
+ dividend_array[j + i + 1] = static_cast<uint32_t>(sum);
+ carry = static_cast<uint32_t>(sum >> 32);
+ }
+ dividend_array[j] += carry;
+ }
+
+ result_array[j] = guess;
+ }
+
+ // denormalize the remainder
+ ShiftArrayRight(dividend_array, dividend_length, normalize_bits);
+
+ // return result and remainder
+ auto status = BuildFromArray(result, result_array, result_length);
+ if (status != DecimalStatus::kSuccess) {
+ return status;
+ }
+ status = BuildFromArray(remainder, dividend_array, dividend_length);
+ if (status != DecimalStatus::kSuccess) {
+ return status;
+ }
+
+ FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
+ return DecimalStatus::kSuccess;
+}
+
DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor,
BasicDecimal128* result,
BasicDecimal128* remainder) const {
return DecimalDivide(*this, divisor, result, remainder);
}
-bool operator==(const BasicDecimal128& left, const BasicDecimal128& right) {
- return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits();
-}
-
-bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right) {
- return !operator==(left, right);
-}
-
-bool operator<(const BasicDecimal128& left, const BasicDecimal128& right) {
- return left.high_bits() < right.high_bits() ||
- (left.high_bits() == right.high_bits() && left.low_bits() < right.low_bits());
-}
-
-bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right) {
- return !operator>(left, right);
-}
-
-bool operator>(const BasicDecimal128& left, const BasicDecimal128& right) {
- return operator<(right, left);
-}
-
-bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right) {
- return !operator<(left, right);
-}
-
-BasicDecimal128 operator-(const BasicDecimal128& operand) {
- BasicDecimal128 result(operand.high_bits(), operand.low_bits());
- return result.Negate();
-}
-
-BasicDecimal128 operator~(const BasicDecimal128& operand) {
- BasicDecimal128 result(~operand.high_bits(), ~operand.low_bits());
- return result;
-}
-
-BasicDecimal128 operator+(const BasicDecimal128& left, const BasicDecimal128& right) {
- BasicDecimal128 result(left.high_bits(), left.low_bits());
- result += right;
- return result;
-}
-
-BasicDecimal128 operator-(const BasicDecimal128& left, const BasicDecimal128& right) {
- BasicDecimal128 result(left.high_bits(), left.low_bits());
- result -= right;
- return result;
-}
-
-BasicDecimal128 operator*(const BasicDecimal128& left, const BasicDecimal128& right) {
- BasicDecimal128 result(left.high_bits(), left.low_bits());
- result *= right;
- return result;
-}
-
-BasicDecimal128 operator/(const BasicDecimal128& left, const BasicDecimal128& right) {
- BasicDecimal128 remainder;
- BasicDecimal128 result;
- auto s = left.Divide(right, &result, &remainder);
- DCHECK_EQ(s, DecimalStatus::kSuccess);
- return result;
-}
-
-BasicDecimal128 operator%(const BasicDecimal128& left, const BasicDecimal128& right) {
- BasicDecimal128 remainder;
- BasicDecimal128 result;
- auto s = left.Divide(right, &result, &remainder);
- DCHECK_EQ(s, DecimalStatus::kSuccess);
- return remainder;
-}
-
+bool operator==(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits();
+}
+
+bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return !operator==(left, right);
+}
+
+bool operator<(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return left.high_bits() < right.high_bits() ||
+ (left.high_bits() == right.high_bits() && left.low_bits() < right.low_bits());
+}
+
+bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return !operator>(left, right);
+}
+
+bool operator>(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return operator<(right, left);
+}
+
+bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right) {
+ return !operator<(left, right);
+}
+
+BasicDecimal128 operator-(const BasicDecimal128& operand) {
+ BasicDecimal128 result(operand.high_bits(), operand.low_bits());
+ return result.Negate();
+}
+
+BasicDecimal128 operator~(const BasicDecimal128& operand) {
+ BasicDecimal128 result(~operand.high_bits(), ~operand.low_bits());
+ return result;
+}
+
+BasicDecimal128 operator+(const BasicDecimal128& left, const BasicDecimal128& right) {
+ BasicDecimal128 result(left.high_bits(), left.low_bits());
+ result += right;
+ return result;
+}
+
+BasicDecimal128 operator-(const BasicDecimal128& left, const BasicDecimal128& right) {
+ BasicDecimal128 result(left.high_bits(), left.low_bits());
+ result -= right;
+ return result;
+}
+
+BasicDecimal128 operator*(const BasicDecimal128& left, const BasicDecimal128& right) {
+ BasicDecimal128 result(left.high_bits(), left.low_bits());
+ result *= right;
+ return result;
+}
+
+BasicDecimal128 operator/(const BasicDecimal128& left, const BasicDecimal128& right) {
+ BasicDecimal128 remainder;
+ BasicDecimal128 result;
+ auto s = left.Divide(right, &result, &remainder);
+ DCHECK_EQ(s, DecimalStatus::kSuccess);
+ return result;
+}
+
+BasicDecimal128 operator%(const BasicDecimal128& left, const BasicDecimal128& right) {
+ BasicDecimal128 remainder;
+ BasicDecimal128 result;
+ auto s = left.Divide(right, &result, &remainder);
+ DCHECK_EQ(s, DecimalStatus::kSuccess);
+ return remainder;
+}
+
template <class DecimalClass>
static bool RescaleWouldCauseDataLoss(const DecimalClass& value, int32_t delta_scale,
const DecimalClass& multiplier,
DecimalClass* result) {
- if (delta_scale < 0) {
- DCHECK_NE(multiplier, 0);
+ if (delta_scale < 0) {
+ DCHECK_NE(multiplier, 0);
DecimalClass remainder;
- auto status = value.Divide(multiplier, result, &remainder);
- DCHECK_EQ(status, DecimalStatus::kSuccess);
- return remainder != 0;
- }
-
- *result = value * multiplier;
- return (value < 0) ? *result > value : *result < value;
-}
-
+ auto status = value.Divide(multiplier, result, &remainder);
+ DCHECK_EQ(status, DecimalStatus::kSuccess);
+ return remainder != 0;
+ }
+
+ *result = value * multiplier;
+ return (value < 0) ? *result > value : *result < value;
+}
+
template <class DecimalClass>
DecimalStatus DecimalRescale(const DecimalClass& value, int32_t original_scale,
int32_t new_scale, DecimalClass* out) {
- DCHECK_NE(out, nullptr);
-
- if (original_scale == new_scale) {
+ DCHECK_NE(out, nullptr);
+
+ if (original_scale == new_scale) {
*out = value;
- return DecimalStatus::kSuccess;
- }
-
- const int32_t delta_scale = new_scale - original_scale;
- const int32_t abs_delta_scale = std::abs(delta_scale);
-
+ return DecimalStatus::kSuccess;
+ }
+
+ const int32_t delta_scale = new_scale - original_scale;
+ const int32_t abs_delta_scale = std::abs(delta_scale);
+
DecimalClass multiplier = DecimalClass::GetScaleMultiplier(abs_delta_scale);
-
- const bool rescale_would_cause_data_loss =
+
+ const bool rescale_would_cause_data_loss =
RescaleWouldCauseDataLoss(value, delta_scale, multiplier, out);
-
- // Fail if we overflow or truncate
- if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
- return DecimalStatus::kRescaleDataLoss;
- }
-
- return DecimalStatus::kSuccess;
-}
-
+
+ // Fail if we overflow or truncate
+ if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
+ return DecimalStatus::kRescaleDataLoss;
+ }
+
+ return DecimalStatus::kSuccess;
+}
+
DecimalStatus BasicDecimal128::Rescale(int32_t original_scale, int32_t new_scale,
BasicDecimal128* out) const {
return DecimalRescale(*this, original_scale, new_scale, out);
}
-void BasicDecimal128::GetWholeAndFraction(int scale, BasicDecimal128* whole,
- BasicDecimal128* fraction) const {
- DCHECK_GE(scale, 0);
- DCHECK_LE(scale, 38);
-
- BasicDecimal128 multiplier(ScaleMultipliers[scale]);
- auto s = Divide(multiplier, whole, fraction);
- DCHECK_EQ(s, DecimalStatus::kSuccess);
-}
-
-const BasicDecimal128& BasicDecimal128::GetScaleMultiplier(int32_t scale) {
- DCHECK_GE(scale, 0);
- DCHECK_LE(scale, 38);
-
- return ScaleMultipliers[scale];
-}
-
-const BasicDecimal128& BasicDecimal128::GetMaxValue() { return kMaxValue; }
-
-BasicDecimal128 BasicDecimal128::IncreaseScaleBy(int32_t increase_by) const {
- DCHECK_GE(increase_by, 0);
- DCHECK_LE(increase_by, 38);
-
- return (*this) * ScaleMultipliers[increase_by];
-}
-
-BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) const {
- DCHECK_GE(reduce_by, 0);
- DCHECK_LE(reduce_by, 38);
-
- if (reduce_by == 0) {
- return *this;
- }
-
- BasicDecimal128 divisor(ScaleMultipliers[reduce_by]);
- BasicDecimal128 result;
- BasicDecimal128 remainder;
- auto s = Divide(divisor, &result, &remainder);
- DCHECK_EQ(s, DecimalStatus::kSuccess);
- if (round) {
- auto divisor_half = ScaleMultipliersHalf[reduce_by];
- if (remainder.Abs() >= divisor_half) {
- if (result > 0) {
- result += 1;
- } else {
- result -= 1;
- }
- }
- }
- return result;
-}
-
-int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
- DCHECK_GE(*this, BasicDecimal128(0));
-
- if (high_bits_ == 0) {
- return BitUtil::CountLeadingZeros(low_bits_) + 64;
- } else {
- return BitUtil::CountLeadingZeros(static_cast<uint64_t>(high_bits_));
- }
-}
-
+void BasicDecimal128::GetWholeAndFraction(int scale, BasicDecimal128* whole,
+ BasicDecimal128* fraction) const {
+ DCHECK_GE(scale, 0);
+ DCHECK_LE(scale, 38);
+
+ BasicDecimal128 multiplier(ScaleMultipliers[scale]);
+ auto s = Divide(multiplier, whole, fraction);
+ DCHECK_EQ(s, DecimalStatus::kSuccess);
+}
+
+const BasicDecimal128& BasicDecimal128::GetScaleMultiplier(int32_t scale) {
+ DCHECK_GE(scale, 0);
+ DCHECK_LE(scale, 38);
+
+ return ScaleMultipliers[scale];
+}
+
+const BasicDecimal128& BasicDecimal128::GetMaxValue() { return kMaxValue; }
+
+BasicDecimal128 BasicDecimal128::IncreaseScaleBy(int32_t increase_by) const {
+ DCHECK_GE(increase_by, 0);
+ DCHECK_LE(increase_by, 38);
+
+ return (*this) * ScaleMultipliers[increase_by];
+}
+
+BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) const {
+ DCHECK_GE(reduce_by, 0);
+ DCHECK_LE(reduce_by, 38);
+
+ if (reduce_by == 0) {
+ return *this;
+ }
+
+ BasicDecimal128 divisor(ScaleMultipliers[reduce_by]);
+ BasicDecimal128 result;
+ BasicDecimal128 remainder;
+ auto s = Divide(divisor, &result, &remainder);
+ DCHECK_EQ(s, DecimalStatus::kSuccess);
+ if (round) {
+ auto divisor_half = ScaleMultipliersHalf[reduce_by];
+ if (remainder.Abs() >= divisor_half) {
+ if (result > 0) {
+ result += 1;
+ } else {
+ result -= 1;
+ }
+ }
+ }
+ return result;
+}
+
+int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
+ DCHECK_GE(*this, BasicDecimal128(0));
+
+ if (high_bits_ == 0) {
+ return BitUtil::CountLeadingZeros(low_bits_) + 64;
+ } else {
+ return BitUtil::CountLeadingZeros(static_cast<uint64_t>(high_bits_));
+ }
+}
+
#if ARROW_LITTLE_ENDIAN
BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
: little_endian_array_(
@@ -1341,4 +1341,4 @@ BasicDecimal256 operator/(const BasicDecimal256& left, const BasicDecimal256& ri
return result;
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
index acc8ea4930f..6996ebf3151 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
@@ -1,185 +1,185 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <array>
-#include <cstdint>
-#include <limits>
-#include <string>
-#include <type_traits>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/type_traits.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-enum class DecimalStatus {
- kSuccess,
- kDivideByZero,
- kOverflow,
- kRescaleDataLoss,
-};
-
-/// Represents a signed 128-bit integer in two's complement.
-///
-/// This class is also compiled into LLVM IR - so, it should not have cpp references like
-/// streams and boost.
-class ARROW_EXPORT BasicDecimal128 {
- public:
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+enum class DecimalStatus {
+ kSuccess,
+ kDivideByZero,
+ kOverflow,
+ kRescaleDataLoss,
+};
+
+/// Represents a signed 128-bit integer in two's complement.
+///
+/// This class is also compiled into LLVM IR - so, it should not have cpp references like
+/// streams and boost.
+class ARROW_EXPORT BasicDecimal128 {
+ public:
static constexpr int bit_width = 128;
- /// \brief Create a BasicDecimal128 from the two's complement representation.
- constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
- : low_bits_(low), high_bits_(high) {}
-
- /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
- constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
-
- /// \brief Convert any integer value into a BasicDecimal128.
- template <typename T,
- typename = typename std::enable_if<
- std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
- constexpr BasicDecimal128(T value) noexcept
- : BasicDecimal128(value >= T{0} ? 0 : -1, static_cast<uint64_t>(value)) { // NOLINT
- }
-
- /// \brief Create a BasicDecimal128 from an array of bytes. Bytes are assumed to be in
- /// native-endian byte order.
- explicit BasicDecimal128(const uint8_t* bytes);
-
- /// \brief Negate the current value (in-place)
- BasicDecimal128& Negate();
-
- /// \brief Absolute value (in-place)
- BasicDecimal128& Abs();
-
- /// \brief Absolute value
- static BasicDecimal128 Abs(const BasicDecimal128& left);
-
- /// \brief Add a number to this one. The result is truncated to 128 bits.
- BasicDecimal128& operator+=(const BasicDecimal128& right);
-
- /// \brief Subtract a number from this one. The result is truncated to 128 bits.
- BasicDecimal128& operator-=(const BasicDecimal128& right);
-
- /// \brief Multiply this number by another number. The result is truncated to 128 bits.
- BasicDecimal128& operator*=(const BasicDecimal128& right);
-
- /// Divide this number by right and return the result.
- ///
- /// This operation is not destructive.
- /// The answer rounds to zero. Signs work like:
- /// 21 / 5 -> 4, 1
- /// -21 / 5 -> -4, -1
- /// 21 / -5 -> -4, 1
- /// -21 / -5 -> 4, -1
- /// \param[in] divisor the number to divide by
- /// \param[out] result the quotient
- /// \param[out] remainder the remainder after the division
- DecimalStatus Divide(const BasicDecimal128& divisor, BasicDecimal128* result,
- BasicDecimal128* remainder) const;
-
- /// \brief In-place division.
- BasicDecimal128& operator/=(const BasicDecimal128& right);
-
- /// \brief Bitwise "or" between two BasicDecimal128.
- BasicDecimal128& operator|=(const BasicDecimal128& right);
-
- /// \brief Bitwise "and" between two BasicDecimal128.
- BasicDecimal128& operator&=(const BasicDecimal128& right);
-
- /// \brief Shift left by the given number of bits.
- BasicDecimal128& operator<<=(uint32_t bits);
-
- /// \brief Shift right by the given number of bits. Negative values will
- BasicDecimal128& operator>>=(uint32_t bits);
-
- /// \brief Get the high bits of the two's complement representation of the number.
+ /// \brief Create a BasicDecimal128 from the two's complement representation.
+ constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+ : low_bits_(low), high_bits_(high) {}
+
+ /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
+ constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
+
+ /// \brief Convert any integer value into a BasicDecimal128.
+ template <typename T,
+ typename = typename std::enable_if<
+ std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
+ constexpr BasicDecimal128(T value) noexcept
+ : BasicDecimal128(value >= T{0} ? 0 : -1, static_cast<uint64_t>(value)) { // NOLINT
+ }
+
+ /// \brief Create a BasicDecimal128 from an array of bytes. Bytes are assumed to be in
+ /// native-endian byte order.
+ explicit BasicDecimal128(const uint8_t* bytes);
+
+ /// \brief Negate the current value (in-place)
+ BasicDecimal128& Negate();
+
+ /// \brief Absolute value (in-place)
+ BasicDecimal128& Abs();
+
+ /// \brief Absolute value
+ static BasicDecimal128 Abs(const BasicDecimal128& left);
+
+ /// \brief Add a number to this one. The result is truncated to 128 bits.
+ BasicDecimal128& operator+=(const BasicDecimal128& right);
+
+ /// \brief Subtract a number from this one. The result is truncated to 128 bits.
+ BasicDecimal128& operator-=(const BasicDecimal128& right);
+
+ /// \brief Multiply this number by another number. The result is truncated to 128 bits.
+ BasicDecimal128& operator*=(const BasicDecimal128& right);
+
+ /// Divide this number by right and return the result.
+ ///
+ /// This operation is not destructive.
+ /// The answer rounds to zero. Signs work like:
+ /// 21 / 5 -> 4, 1
+ /// -21 / 5 -> -4, -1
+ /// 21 / -5 -> -4, 1
+ /// -21 / -5 -> 4, -1
+ /// \param[in] divisor the number to divide by
+ /// \param[out] result the quotient
+ /// \param[out] remainder the remainder after the division
+ DecimalStatus Divide(const BasicDecimal128& divisor, BasicDecimal128* result,
+ BasicDecimal128* remainder) const;
+
+ /// \brief In-place division.
+ BasicDecimal128& operator/=(const BasicDecimal128& right);
+
+ /// \brief Bitwise "or" between two BasicDecimal128.
+ BasicDecimal128& operator|=(const BasicDecimal128& right);
+
+ /// \brief Bitwise "and" between two BasicDecimal128.
+ BasicDecimal128& operator&=(const BasicDecimal128& right);
+
+ /// \brief Shift left by the given number of bits.
+ BasicDecimal128& operator<<=(uint32_t bits);
+
+ /// \brief Shift right by the given number of bits. Negative values will
+ BasicDecimal128& operator>>=(uint32_t bits);
+
+ /// \brief Get the high bits of the two's complement representation of the number.
inline constexpr int64_t high_bits() const { return high_bits_; }
-
- /// \brief Get the low bits of the two's complement representation of the number.
+
+ /// \brief Get the low bits of the two's complement representation of the number.
inline constexpr uint64_t low_bits() const { return low_bits_; }
-
- /// \brief Return the raw bytes of the value in native-endian byte order.
- std::array<uint8_t, 16> ToBytes() const;
- void ToBytes(uint8_t* out) const;
-
- /// \brief separate the integer and fractional parts for the given scale.
- void GetWholeAndFraction(int32_t scale, BasicDecimal128* whole,
- BasicDecimal128* fraction) const;
-
- /// \brief Scale multiplier for given scale value.
- static const BasicDecimal128& GetScaleMultiplier(int32_t scale);
-
- /// \brief Convert BasicDecimal128 from one scale to another
- DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
- BasicDecimal128* out) const;
-
- /// \brief Scale up.
- BasicDecimal128 IncreaseScaleBy(int32_t increase_by) const;
-
- /// \brief Scale down.
- /// - If 'round' is true, the right-most digits are dropped and the result value is
- /// rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits
- /// (>= 10^reduce_by / 2).
- /// - If 'round' is false, the right-most digits are simply dropped.
- BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
-
- /// \brief Whether this number fits in the given precision
- ///
- /// Return true if the number of significant digits is less or equal to `precision`.
- bool FitsInPrecision(int32_t precision) const;
-
- // returns 1 for positive and zero decimal values, -1 for negative decimal values.
- inline int64_t Sign() const { return 1 | (high_bits_ >> 63); }
-
- /// \brief count the number of leading binary zeroes.
- int32_t CountLeadingBinaryZeros() const;
-
- /// \brief Get the maximum valid unscaled decimal value.
- static const BasicDecimal128& GetMaxValue();
-
- private:
- uint64_t low_bits_;
- int64_t high_bits_;
-};
-
-ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
-ARROW_EXPORT bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right);
-ARROW_EXPORT bool operator<(const BasicDecimal128& left, const BasicDecimal128& right);
-ARROW_EXPORT bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right);
-ARROW_EXPORT bool operator>(const BasicDecimal128& left, const BasicDecimal128& right);
-ARROW_EXPORT bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right);
-
-ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& operand);
-ARROW_EXPORT BasicDecimal128 operator~(const BasicDecimal128& operand);
-ARROW_EXPORT BasicDecimal128 operator+(const BasicDecimal128& left,
- const BasicDecimal128& right);
-ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& left,
- const BasicDecimal128& right);
-ARROW_EXPORT BasicDecimal128 operator*(const BasicDecimal128& left,
- const BasicDecimal128& right);
-ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
- const BasicDecimal128& right);
-ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
- const BasicDecimal128& right);
-
+
+ /// \brief Return the raw bytes of the value in native-endian byte order.
+ std::array<uint8_t, 16> ToBytes() const;
+ void ToBytes(uint8_t* out) const;
+
+ /// \brief separate the integer and fractional parts for the given scale.
+ void GetWholeAndFraction(int32_t scale, BasicDecimal128* whole,
+ BasicDecimal128* fraction) const;
+
+ /// \brief Scale multiplier for given scale value.
+ static const BasicDecimal128& GetScaleMultiplier(int32_t scale);
+
+ /// \brief Convert BasicDecimal128 from one scale to another
+ DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+ BasicDecimal128* out) const;
+
+ /// \brief Scale up.
+ BasicDecimal128 IncreaseScaleBy(int32_t increase_by) const;
+
+ /// \brief Scale down.
+ /// - If 'round' is true, the right-most digits are dropped and the result value is
+ /// rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits
+ /// (>= 10^reduce_by / 2).
+ /// - If 'round' is false, the right-most digits are simply dropped.
+ BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+ /// \brief Whether this number fits in the given precision
+ ///
+ /// Return true if the number of significant digits is less or equal to `precision`.
+ bool FitsInPrecision(int32_t precision) const;
+
+ // returns 1 for positive and zero decimal values, -1 for negative decimal values.
+ inline int64_t Sign() const { return 1 | (high_bits_ >> 63); }
+
+ /// \brief count the number of leading binary zeroes.
+ int32_t CountLeadingBinaryZeros() const;
+
+ /// \brief Get the maximum valid unscaled decimal value.
+ static const BasicDecimal128& GetMaxValue();
+
+ private:
+ uint64_t low_bits_;
+ int64_t high_bits_;
+};
+
+ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right);
+
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator~(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator+(const BasicDecimal128& left,
+ const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& left,
+ const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator*(const BasicDecimal128& left,
+ const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
+ const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
+ const BasicDecimal128& right);
+
class ARROW_EXPORT BasicDecimal256 {
private:
// Due to a bug in clang, we have to declare the extend method prior to its
@@ -339,4 +339,4 @@ ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
const BasicDecimal256& right);
ARROW_EXPORT BasicDecimal256 operator/(const BasicDecimal256& left,
const BasicDecimal256& right);
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
index c67cedc4a06..60f1005773e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
@@ -1,80 +1,80 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bit_block_counter.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <type_traits>
-
-#include "arrow/buffer.h"
-#include "arrow/util/bitmap_ops.h"
-
-namespace arrow {
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bit_block_counter.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <type_traits>
+
+#include "arrow/buffer.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+namespace internal {
+
BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept {
- const int16_t run_length = static_cast<int16_t>(std::min(bits_remaining_, block_size));
- int16_t popcount = static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length));
- bits_remaining_ -= run_length;
- // This code path should trigger _at most_ 2 times. In the "two times"
- // case, the first time the run length will be a multiple of 8 by construction
- bitmap_ += run_length / 8;
- return {run_length, popcount};
-}
-
+ const int16_t run_length = static_cast<int16_t>(std::min(bits_remaining_, block_size));
+ int16_t popcount = static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length));
+ bits_remaining_ -= run_length;
+ // This code path should trigger _at most_ 2 times. In the "two times"
+ // case, the first time the run length will be a multiple of 8 by construction
+ bitmap_ += run_length / 8;
+ return {run_length, popcount};
+}
+
// Prevent pointer arithmetic on nullptr, which is undefined behavior even if the pointer
// is never dereferenced.
inline const uint8_t* EnsureNotNull(const uint8_t* ptr) {
static const uint8_t byte{};
return ptr == nullptr ? &byte : ptr;
-}
-
-OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
- int64_t offset, int64_t length)
- : has_bitmap_(validity_bitmap != nullptr),
- position_(0),
- length_(length),
+}
+
+OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
+ int64_t offset, int64_t length)
+ : has_bitmap_(validity_bitmap != nullptr),
+ position_(0),
+ length_(length),
counter_(EnsureNotNull(validity_bitmap), offset, length) {}
-
-OptionalBitBlockCounter::OptionalBitBlockCounter(
- const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, int64_t length)
- : OptionalBitBlockCounter(validity_bitmap ? validity_bitmap->data() : nullptr, offset,
- length) {}
-
-OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap,
- int64_t left_offset,
- const uint8_t* right_bitmap,
- int64_t right_offset,
- int64_t length)
- : has_bitmap_(HasBitmapFromBitmaps(left_bitmap != nullptr, right_bitmap != nullptr)),
- position_(0),
- length_(length),
+
+OptionalBitBlockCounter::OptionalBitBlockCounter(
+ const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, int64_t length)
+ : OptionalBitBlockCounter(validity_bitmap ? validity_bitmap->data() : nullptr, offset,
+ length) {}
+
+OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap,
+ int64_t left_offset,
+ const uint8_t* right_bitmap,
+ int64_t right_offset,
+ int64_t length)
+ : has_bitmap_(HasBitmapFromBitmaps(left_bitmap != nullptr, right_bitmap != nullptr)),
+ position_(0),
+ length_(length),
unary_counter_(EnsureNotNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
- left_bitmap != nullptr ? left_offset : right_offset, length),
+ left_bitmap != nullptr ? left_offset : right_offset, length),
binary_counter_(EnsureNotNull(left_bitmap), left_offset,
EnsureNotNull(right_bitmap), right_offset, length) {}
-
-OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(
- const std::shared_ptr<Buffer>& left_bitmap, int64_t left_offset,
- const std::shared_ptr<Buffer>& right_bitmap, int64_t right_offset, int64_t length)
- : OptionalBinaryBitBlockCounter(
- left_bitmap ? left_bitmap->data() : nullptr, left_offset,
- right_bitmap ? right_bitmap->data() : nullptr, right_offset, length) {}
-
-} // namespace internal
-} // namespace arrow
+
+OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(
+ const std::shared_ptr<Buffer>& left_bitmap, int64_t left_offset,
+ const std::shared_ptr<Buffer>& right_bitmap, int64_t right_offset, int64_t length)
+ : OptionalBinaryBitBlockCounter(
+ left_bitmap ? left_bitmap->data() : nullptr, left_offset,
+ right_bitmap ? right_bitmap->data() : nullptr, right_offset, length) {}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
index 63036af52a4..ace51fb04dc 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
@@ -1,39 +1,39 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <limits>
-#include <memory>
-
-#include "arrow/buffer.h"
-#include "arrow/status.h"
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
-#include "arrow/util/macros.h"
+#include "arrow/util/macros.h"
#include "arrow/util/ubsan.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-namespace detail {
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+namespace detail {
+
inline uint64_t LoadWord(const uint8_t* bytes) {
return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
}
@@ -45,19 +45,19 @@ inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) {
return (current >> shift) | (next << (64 - shift));
}
-// These templates are here to help with unit tests
-
-template <typename T>
-struct BitBlockAnd {
- static T Call(T left, T right) { return left & right; }
-};
-
-template <>
-struct BitBlockAnd<bool> {
- static bool Call(bool left, bool right) { return left && right; }
-};
-
-template <typename T>
+// These templates are here to help with unit tests
+
+template <typename T>
+struct BitBlockAnd {
+ static T Call(T left, T right) { return left & right; }
+};
+
+template <>
+struct BitBlockAnd<bool> {
+ static bool Call(bool left, bool right) { return left && right; }
+};
+
+template <typename T>
struct BitBlockAndNot {
static T Call(T left, T right) { return left & ~right; }
};
@@ -68,62 +68,62 @@ struct BitBlockAndNot<bool> {
};
template <typename T>
-struct BitBlockOr {
- static T Call(T left, T right) { return left | right; }
-};
-
-template <>
-struct BitBlockOr<bool> {
- static bool Call(bool left, bool right) { return left || right; }
-};
-
-template <typename T>
-struct BitBlockOrNot {
- static T Call(T left, T right) { return left | ~right; }
-};
-
-template <>
-struct BitBlockOrNot<bool> {
- static bool Call(bool left, bool right) { return left || !right; }
-};
-
-} // namespace detail
-
-/// \brief Return value from bit block counters: the total number of bits and
-/// the number of set bits.
-struct BitBlockCount {
- int16_t length;
- int16_t popcount;
-
- bool NoneSet() const { return this->popcount == 0; }
- bool AllSet() const { return this->length == this->popcount; }
-};
-
-/// \brief A class that scans through a true/false bitmap to compute popcounts
-/// 64 or 256 bits at a time. This is used to accelerate processing of
-/// mostly-not-null array data.
-class ARROW_EXPORT BitBlockCounter {
- public:
- BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
- : bitmap_(bitmap + start_offset / 8),
- bits_remaining_(length),
- offset_(start_offset % 8) {}
-
- /// \brief The bit size of each word run
- static constexpr int64_t kWordBits = 64;
-
- /// \brief The bit size of four words run
- static constexpr int64_t kFourWordsBits = kWordBits * 4;
-
- /// \brief Return the next run of available bits, usually 256. The returned
- /// pair contains the size of run and the number of true values. The last
- /// block will have a length less than 256 if the bitmap length is not a
- /// multiple of 256, and will return 0-length blocks in subsequent
- /// invocations.
+struct BitBlockOr {
+ static T Call(T left, T right) { return left | right; }
+};
+
+template <>
+struct BitBlockOr<bool> {
+ static bool Call(bool left, bool right) { return left || right; }
+};
+
+template <typename T>
+struct BitBlockOrNot {
+ static T Call(T left, T right) { return left | ~right; }
+};
+
+template <>
+struct BitBlockOrNot<bool> {
+ static bool Call(bool left, bool right) { return left || !right; }
+};
+
+} // namespace detail
+
+/// \brief Return value from bit block counters: the total number of bits and
+/// the number of set bits.
+struct BitBlockCount {
+ int16_t length;
+ int16_t popcount;
+
+ bool NoneSet() const { return this->popcount == 0; }
+ bool AllSet() const { return this->length == this->popcount; }
+};
+
+/// \brief A class that scans through a true/false bitmap to compute popcounts
+/// 64 or 256 bits at a time. This is used to accelerate processing of
+/// mostly-not-null array data.
+class ARROW_EXPORT BitBlockCounter {
+ public:
+ BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap + start_offset / 8),
+ bits_remaining_(length),
+ offset_(start_offset % 8) {}
+
+ /// \brief The bit size of each word run
+ static constexpr int64_t kWordBits = 64;
+
+ /// \brief The bit size of four words run
+ static constexpr int64_t kFourWordsBits = kWordBits * 4;
+
+ /// \brief Return the next run of available bits, usually 256. The returned
+ /// pair contains the size of run and the number of true values. The last
+ /// block will have a length less than 256 if the bitmap length is not a
+ /// multiple of 256, and will return 0-length blocks in subsequent
+ /// invocations.
BitBlockCount NextFourWords() {
using detail::LoadWord;
using detail::ShiftWord;
-
+
if (!bits_remaining_) {
return {0, 0};
}
@@ -160,15 +160,15 @@ class ARROW_EXPORT BitBlockCounter {
return {256, static_cast<int16_t>(total_popcount)};
}
- /// \brief Return the next run of available bits, usually 64. The returned
- /// pair contains the size of run and the number of true values. The last
- /// block will have a length less than 64 if the bitmap length is not a
- /// multiple of 64, and will return 0-length blocks in subsequent
- /// invocations.
+ /// \brief Return the next run of available bits, usually 64. The returned
+ /// pair contains the size of run and the number of true values. The last
+ /// block will have a length less than 64 if the bitmap length is not a
+ /// multiple of 64, and will return 0-length blocks in subsequent
+ /// invocations.
BitBlockCount NextWord() {
using detail::LoadWord;
using detail::ShiftWord;
-
+
if (!bits_remaining_) {
return {0, 0};
}
@@ -192,105 +192,105 @@ class ARROW_EXPORT BitBlockCounter {
return {64, static_cast<int16_t>(popcount)};
}
- private:
- /// \brief Return block with the requested size when doing word-wise
- /// computation is not possible due to inadequate bits remaining.
+ private:
+ /// \brief Return block with the requested size when doing word-wise
+ /// computation is not possible due to inadequate bits remaining.
BitBlockCount GetBlockSlow(int64_t block_size) noexcept;
-
- const uint8_t* bitmap_;
- int64_t bits_remaining_;
- int64_t offset_;
-};
-
-/// \brief A tool to iterate through a possibly non-existent validity bitmap,
-/// to allow us to write one code path for both the with-nulls and no-nulls
-/// cases without giving up a lot of performance.
-class ARROW_EXPORT OptionalBitBlockCounter {
- public:
- // validity_bitmap may be NULLPTR
- OptionalBitBlockCounter(const uint8_t* validity_bitmap, int64_t offset, int64_t length);
-
- // validity_bitmap may be null
- OptionalBitBlockCounter(const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset,
- int64_t length);
-
- /// Return block count for next word when the bitmap is available otherwise
- /// return a block with length up to INT16_MAX when there is no validity
- /// bitmap (so all the referenced values are not null).
- BitBlockCount NextBlock() {
- static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
- if (has_bitmap_) {
- BitBlockCount block = counter_.NextWord();
- position_ += block.length;
- return block;
- } else {
- int16_t block_size =
- static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
- position_ += block_size;
- // All values are non-null
- return {block_size, block_size};
- }
- }
-
- // Like NextBlock, but returns a word-sized block even when there is no
- // validity bitmap
- BitBlockCount NextWord() {
- static constexpr int64_t kWordSize = 64;
- if (has_bitmap_) {
- BitBlockCount block = counter_.NextWord();
- position_ += block.length;
- return block;
- } else {
- int16_t block_size = static_cast<int16_t>(std::min(kWordSize, length_ - position_));
- position_ += block_size;
- // All values are non-null
- return {block_size, block_size};
- }
- }
-
- private:
- const bool has_bitmap_;
- int64_t position_;
- int64_t length_;
- BitBlockCounter counter_;
-};
-
-/// \brief A class that computes popcounts on the result of bitwise operations
-/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each
-/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two
-/// words.
-class ARROW_EXPORT BinaryBitBlockCounter {
- public:
- BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
- const uint8_t* right_bitmap, int64_t right_offset, int64_t length)
- : left_bitmap_(left_bitmap + left_offset / 8),
- left_offset_(left_offset % 8),
- right_bitmap_(right_bitmap + right_offset / 8),
- right_offset_(right_offset % 8),
- bits_remaining_(length) {}
-
- /// \brief Return the popcount of the bitwise-and of the next run of
- /// available bits, up to 64. The returned pair contains the size of run and
- /// the number of true values. The last block will have a length less than 64
- /// if the bitmap length is not a multiple of 64, and will return 0-length
- /// blocks in subsequent invocations.
+
+ const uint8_t* bitmap_;
+ int64_t bits_remaining_;
+ int64_t offset_;
+};
+
+/// \brief A tool to iterate through a possibly non-existent validity bitmap,
+/// to allow us to write one code path for both the with-nulls and no-nulls
+/// cases without giving up a lot of performance.
+class ARROW_EXPORT OptionalBitBlockCounter {
+ public:
+ // validity_bitmap may be NULLPTR
+ OptionalBitBlockCounter(const uint8_t* validity_bitmap, int64_t offset, int64_t length);
+
+ // validity_bitmap may be null
+ OptionalBitBlockCounter(const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset,
+ int64_t length);
+
+ /// Return block count for next word when the bitmap is available otherwise
+ /// return a block with length up to INT16_MAX when there is no validity
+ /// bitmap (so all the referenced values are not null).
+ BitBlockCount NextBlock() {
+ static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+ if (has_bitmap_) {
+ BitBlockCount block = counter_.NextWord();
+ position_ += block.length;
+ return block;
+ } else {
+ int16_t block_size =
+ static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+
+ // Like NextBlock, but returns a word-sized block even when there is no
+ // validity bitmap
+ BitBlockCount NextWord() {
+ static constexpr int64_t kWordSize = 64;
+ if (has_bitmap_) {
+ BitBlockCount block = counter_.NextWord();
+ position_ += block.length;
+ return block;
+ } else {
+ int16_t block_size = static_cast<int16_t>(std::min(kWordSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+
+ private:
+ const bool has_bitmap_;
+ int64_t position_;
+ int64_t length_;
+ BitBlockCounter counter_;
+};
+
+/// \brief A class that computes popcounts on the result of bitwise operations
+/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each
+/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two
+/// words.
+class ARROW_EXPORT BinaryBitBlockCounter {
+ public:
+ BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+ const uint8_t* right_bitmap, int64_t right_offset, int64_t length)
+ : left_bitmap_(left_bitmap + left_offset / 8),
+ left_offset_(left_offset % 8),
+ right_bitmap_(right_bitmap + right_offset / 8),
+ right_offset_(right_offset % 8),
+ bits_remaining_(length) {}
+
+ /// \brief Return the popcount of the bitwise-and of the next run of
+ /// available bits, up to 64. The returned pair contains the size of run and
+ /// the number of true values. The last block will have a length less than 64
+ /// if the bitmap length is not a multiple of 64, and will return 0-length
+ /// blocks in subsequent invocations.
BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
-
+
/// \brief Computes "x & ~y" block for each available run of bits.
BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
- /// \brief Computes "x | y" block for each available run of bits.
+ /// \brief Computes "x | y" block for each available run of bits.
BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
-
- /// \brief Computes "x | ~y" block for each available run of bits.
+
+ /// \brief Computes "x | ~y" block for each available run of bits.
BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); }
-
- private:
- template <template <typename T> class Op>
+
+ private:
+ template <template <typename T> class Op>
BitBlockCount NextWord() {
using detail::LoadWord;
using detail::ShiftWord;
-
+
if (!bits_remaining_) {
return {0, 0};
}
@@ -335,50 +335,50 @@ class ARROW_EXPORT BinaryBitBlockCounter {
return {64, static_cast<int16_t>(popcount)};
}
- const uint8_t* left_bitmap_;
- int64_t left_offset_;
- const uint8_t* right_bitmap_;
- int64_t right_offset_;
- int64_t bits_remaining_;
-};
-
-class ARROW_EXPORT OptionalBinaryBitBlockCounter {
- public:
- // Any bitmap may be NULLPTR
- OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
- const uint8_t* right_bitmap, int64_t right_offset,
- int64_t length);
-
- // Any bitmap may be null
- OptionalBinaryBitBlockCounter(const std::shared_ptr<Buffer>& left_bitmap,
- int64_t left_offset,
- const std::shared_ptr<Buffer>& right_bitmap,
- int64_t right_offset, int64_t length);
-
- BitBlockCount NextAndBlock() {
- static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
- switch (has_bitmap_) {
- case HasBitmap::BOTH: {
- BitBlockCount block = binary_counter_.NextAndWord();
- position_ += block.length;
- return block;
- }
- case HasBitmap::ONE: {
- BitBlockCount block = unary_counter_.NextWord();
- position_ += block.length;
- return block;
- }
- case HasBitmap::NONE:
- default: {
- const int16_t block_size =
- static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
- position_ += block_size;
- // All values are non-null
- return {block_size, block_size};
- }
- }
- }
-
+ const uint8_t* left_bitmap_;
+ int64_t left_offset_;
+ const uint8_t* right_bitmap_;
+ int64_t right_offset_;
+ int64_t bits_remaining_;
+};
+
+class ARROW_EXPORT OptionalBinaryBitBlockCounter {
+ public:
+ // Any bitmap may be NULLPTR
+ OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+ const uint8_t* right_bitmap, int64_t right_offset,
+ int64_t length);
+
+ // Any bitmap may be null
+ OptionalBinaryBitBlockCounter(const std::shared_ptr<Buffer>& left_bitmap,
+ int64_t left_offset,
+ const std::shared_ptr<Buffer>& right_bitmap,
+ int64_t right_offset, int64_t length);
+
+ BitBlockCount NextAndBlock() {
+ static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+ switch (has_bitmap_) {
+ case HasBitmap::BOTH: {
+ BitBlockCount block = binary_counter_.NextAndWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::ONE: {
+ BitBlockCount block = unary_counter_.NextWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::NONE:
+ default: {
+ const int16_t block_size =
+ static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+ }
+
BitBlockCount NextOrNotBlock() {
static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
switch (has_bitmap_) {
@@ -403,140 +403,140 @@ class ARROW_EXPORT OptionalBinaryBitBlockCounter {
}
}
- private:
- enum class HasBitmap : int { BOTH, ONE, NONE };
-
- const HasBitmap has_bitmap_;
- int64_t position_;
- int64_t length_;
- BitBlockCounter unary_counter_;
- BinaryBitBlockCounter binary_counter_;
-
- static HasBitmap HasBitmapFromBitmaps(bool has_left, bool has_right) {
- switch (static_cast<int>(has_left) + static_cast<int>(has_right)) {
- case 0:
- return HasBitmap::NONE;
- case 1:
- return HasBitmap::ONE;
- default: // 2
- return HasBitmap::BOTH;
- }
- }
-};
-
-// Functional-style bit block visitors.
-
-template <typename VisitNotNull, typename VisitNull>
+ private:
+ enum class HasBitmap : int { BOTH, ONE, NONE };
+
+ const HasBitmap has_bitmap_;
+ int64_t position_;
+ int64_t length_;
+ BitBlockCounter unary_counter_;
+ BinaryBitBlockCounter binary_counter_;
+
+ static HasBitmap HasBitmapFromBitmaps(bool has_left, bool has_right) {
+ switch (static_cast<int>(has_left) + static_cast<int>(has_right)) {
+ case 0:
+ return HasBitmap::NONE;
+ case 1:
+ return HasBitmap::ONE;
+ default: // 2
+ return HasBitmap::BOTH;
+ }
+ }
+};
+
+// Functional-style bit block visitors.
+
+template <typename VisitNotNull, typename VisitNull>
static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
int64_t length, VisitNotNull&& visit_not_null,
VisitNull&& visit_null) {
- const uint8_t* bitmap = NULLPTR;
- if (bitmap_buf != NULLPTR) {
- bitmap = bitmap_buf->data();
- }
- internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
- int64_t position = 0;
- while (position < length) {
- internal::BitBlockCount block = bit_counter.NextBlock();
- if (block.AllSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- ARROW_RETURN_NOT_OK(visit_not_null(position));
- }
- } else if (block.NoneSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- ARROW_RETURN_NOT_OK(visit_null());
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- if (BitUtil::GetBit(bitmap, offset + position)) {
- ARROW_RETURN_NOT_OK(visit_not_null(position));
- } else {
- ARROW_RETURN_NOT_OK(visit_null());
- }
- }
- }
- }
- return Status::OK();
-}
-
-template <typename VisitNotNull, typename VisitNull>
+ const uint8_t* bitmap = NULLPTR;
+ if (bitmap_buf != NULLPTR) {
+ bitmap = bitmap_buf->data();
+ }
+ internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+ int64_t position = 0;
+ while (position < length) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ ARROW_RETURN_NOT_OK(visit_not_null(position));
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ ARROW_RETURN_NOT_OK(visit_null());
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(bitmap, offset + position)) {
+ ARROW_RETURN_NOT_OK(visit_not_null(position));
+ } else {
+ ARROW_RETURN_NOT_OK(visit_null());
+ }
+ }
+ }
+ }
+ return Status::OK();
+}
+
+template <typename VisitNotNull, typename VisitNull>
static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
int64_t length, VisitNotNull&& visit_not_null,
VisitNull&& visit_null) {
- const uint8_t* bitmap = NULLPTR;
- if (bitmap_buf != NULLPTR) {
- bitmap = bitmap_buf->data();
- }
- internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
- int64_t position = 0;
- while (position < length) {
- internal::BitBlockCount block = bit_counter.NextBlock();
- if (block.AllSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- visit_not_null(position);
- }
- } else if (block.NoneSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- visit_null();
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- if (BitUtil::GetBit(bitmap, offset + position)) {
- visit_not_null(position);
- } else {
- visit_null();
- }
- }
- }
- }
-}
-
-template <typename VisitNotNull, typename VisitNull>
+ const uint8_t* bitmap = NULLPTR;
+ if (bitmap_buf != NULLPTR) {
+ bitmap = bitmap_buf->data();
+ }
+ internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+ int64_t position = 0;
+ while (position < length) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_not_null(position);
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_null();
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(bitmap, offset + position)) {
+ visit_not_null(position);
+ } else {
+ visit_null();
+ }
+ }
+ }
+ }
+}
+
+template <typename VisitNotNull, typename VisitNull>
static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf,
int64_t left_offset,
const std::shared_ptr<Buffer>& right_bitmap_buf,
int64_t right_offset, int64_t length,
VisitNotNull&& visit_not_null, VisitNull&& visit_null) {
- if (left_bitmap_buf == NULLPTR || right_bitmap_buf == NULLPTR) {
- // At most one bitmap is present
- if (left_bitmap_buf == NULLPTR) {
- return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length,
- std::forward<VisitNotNull>(visit_not_null),
- std::forward<VisitNull>(visit_null));
- } else {
- return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length,
- std::forward<VisitNotNull>(visit_not_null),
- std::forward<VisitNull>(visit_null));
- }
- }
- // Both bitmaps are present
- const uint8_t* left_bitmap = left_bitmap_buf->data();
- const uint8_t* right_bitmap = right_bitmap_buf->data();
- BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset,
- length);
- int64_t position = 0;
- while (position < length) {
- BitBlockCount block = bit_counter.NextAndWord();
- if (block.AllSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- visit_not_null(position);
- }
- } else if (block.NoneSet()) {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- visit_null();
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i, ++position) {
- if (BitUtil::GetBit(left_bitmap, left_offset + position) &&
- BitUtil::GetBit(right_bitmap, right_offset + position)) {
- visit_not_null(position);
- } else {
- visit_null();
- }
- }
- }
- }
-}
-
-} // namespace internal
-} // namespace arrow
+ if (left_bitmap_buf == NULLPTR || right_bitmap_buf == NULLPTR) {
+ // At most one bitmap is present
+ if (left_bitmap_buf == NULLPTR) {
+ return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length,
+ std::forward<VisitNotNull>(visit_not_null),
+ std::forward<VisitNull>(visit_null));
+ } else {
+ return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length,
+ std::forward<VisitNotNull>(visit_not_null),
+ std::forward<VisitNull>(visit_null));
+ }
+ }
+ // Both bitmaps are present
+ const uint8_t* left_bitmap = left_bitmap_buf->data();
+ const uint8_t* right_bitmap = right_bitmap_buf->data();
+ BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset,
+ length);
+ int64_t position = 0;
+ while (position < length) {
+ BitBlockCount block = bit_counter.NextAndWord();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_not_null(position);
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_null();
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(left_bitmap, left_offset + position) &&
+ BitUtil::GetBit(right_bitmap, right_offset + position)) {
+ visit_not_null(position);
+ } else {
+ visit_null();
+ }
+ }
+ }
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
index eda6088eb32..26411f49d7a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
@@ -1,54 +1,54 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bit_run_reader.h"
-
-#include <cstdint>
-
-#include "arrow/util/bit_util.h"
-
-namespace arrow {
-namespace internal {
-
-#if ARROW_LITTLE_ENDIAN
-
-BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
- : bitmap_(bitmap + (start_offset / 8)),
- position_(start_offset % 8),
- length_(position_ + length) {
- if (ARROW_PREDICT_FALSE(length == 0)) {
- word_ = 0;
- return;
- }
-
- // On the initial load if there is an offset we need to account for this when
- // loading bytes. Every other call to LoadWord() should only occur when
- // position_ is a multiple of 64.
- current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset);
- int64_t bits_remaining = length + position_;
-
- LoadWord(bits_remaining);
-
- // Prepare for inversion in NextRun.
- // Clear out any preceding bits.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bit_run_reader.h"
+
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace internal {
+
+#if ARROW_LITTLE_ENDIAN
+
+BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap + (start_offset / 8)),
+ position_(start_offset % 8),
+ length_(position_ + length) {
+ if (ARROW_PREDICT_FALSE(length == 0)) {
+ word_ = 0;
+ return;
+ }
+
+ // On the initial load if there is an offset we need to account for this when
+ // loading bytes. Every other call to LoadWord() should only occur when
+ // position_ is a multiple of 64.
+ current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset);
+ int64_t bits_remaining = length + position_;
+
+ LoadWord(bits_remaining);
+
+ // Prepare for inversion in NextRun.
+ // Clear out any preceding bits.
word_ = word_ & ~BitUtil::LeastSignificantBitMask(position_);
-}
-
-#endif
-
-} // namespace internal
-} // namespace arrow
+}
+
+#endif
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
index 3e196628477..f5b91b641c4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
@@ -1,173 +1,173 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <string>
-
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_reader.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
#include "arrow/util/endian.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-struct BitRun {
- int64_t length;
- // Whether bits are set at this point.
- bool set;
-
- std::string ToString() const {
- return std::string("{Length: ") + std::to_string(length) +
- ", set=" + std::to_string(set) + "}";
- }
-};
-
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+struct BitRun {
+ int64_t length;
+ // Whether bits are set at this point.
+ bool set;
+
+ std::string ToString() const {
+ return std::string("{Length: ") + std::to_string(length) +
+ ", set=" + std::to_string(set) + "}";
+ }
+};
+
inline bool operator==(const BitRun& lhs, const BitRun& rhs) {
- return lhs.length == rhs.length && lhs.set == rhs.set;
-}
-
+ return lhs.length == rhs.length && lhs.set == rhs.set;
+}
+
inline bool operator!=(const BitRun& lhs, const BitRun& rhs) {
return lhs.length != rhs.length || lhs.set != rhs.set;
}
-class BitRunReaderLinear {
- public:
- BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length)
- : reader_(bitmap, start_offset, length) {}
-
- BitRun NextRun() {
- BitRun rl = {/*length=*/0, reader_.IsSet()};
- // Advance while the values are equal and not at the end of list.
- while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) {
- rl.length++;
- reader_.Next();
- }
- return rl;
- }
-
- private:
- BitmapReader reader_;
-};
-
-#if ARROW_LITTLE_ENDIAN
+class BitRunReaderLinear {
+ public:
+ BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : reader_(bitmap, start_offset, length) {}
+
+ BitRun NextRun() {
+ BitRun rl = {/*length=*/0, reader_.IsSet()};
+ // Advance while the values are equal and not at the end of list.
+ while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) {
+ rl.length++;
+ reader_.Next();
+ }
+ return rl;
+ }
+
+ private:
+ BitmapReader reader_;
+};
+
+#if ARROW_LITTLE_ENDIAN
/// A convenience class for counting the number of contiguous set/unset bits
-/// in a bitmap.
-class ARROW_EXPORT BitRunReader {
- public:
- /// \brief Constructs new BitRunReader.
- ///
- /// \param[in] bitmap source data
- /// \param[in] start_offset bit offset into the source data
- /// \param[in] length number of bits to copy
- BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length);
-
- /// Returns a new BitRun containing the number of contiguous
- /// bits with the same value. length == 0 indicates the
- /// end of the bitmap.
- BitRun NextRun() {
- if (ARROW_PREDICT_FALSE(position_ >= length_)) {
- return {/*length=*/0, false};
- }
- // This implementation relies on a efficient implementations of
- // CountTrailingZeros and assumes that runs are more often then
- // not. The logic is to incrementally find the next bit change
- // from the current position. This is done by zeroing all
- // bits in word_ up to position_ and using the TrailingZeroCount
- // to find the index of the next set bit.
-
- // The runs alternate on each call, so flip the bit.
- current_run_bit_set_ = !current_run_bit_set_;
-
- int64_t start_position = position_;
- int64_t start_bit_offset = start_position & 63;
- // Invert the word for proper use of CountTrailingZeros and
- // clear bits so CountTrailingZeros can do it magic.
+/// in a bitmap.
+class ARROW_EXPORT BitRunReader {
+ public:
+ /// \brief Constructs new BitRunReader.
+ ///
+ /// \param[in] bitmap source data
+ /// \param[in] start_offset bit offset into the source data
+ /// \param[in] length number of bits to copy
+ BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length);
+
+ /// Returns a new BitRun containing the number of contiguous
+ /// bits with the same value. length == 0 indicates the
+ /// end of the bitmap.
+ BitRun NextRun() {
+ if (ARROW_PREDICT_FALSE(position_ >= length_)) {
+ return {/*length=*/0, false};
+ }
+ // This implementation relies on a efficient implementations of
+ // CountTrailingZeros and assumes that runs are more often then
+ // not. The logic is to incrementally find the next bit change
+ // from the current position. This is done by zeroing all
+ // bits in word_ up to position_ and using the TrailingZeroCount
+ // to find the index of the next set bit.
+
+ // The runs alternate on each call, so flip the bit.
+ current_run_bit_set_ = !current_run_bit_set_;
+
+ int64_t start_position = position_;
+ int64_t start_bit_offset = start_position & 63;
+ // Invert the word for proper use of CountTrailingZeros and
+ // clear bits so CountTrailingZeros can do it magic.
word_ = ~word_ & ~BitUtil::LeastSignificantBitMask(start_bit_offset);
-
- // Go forward until the next change from unset to set.
- int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset;
- position_ += new_bits;
-
- if (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
- ARROW_PREDICT_TRUE(position_ < length_)) {
- // Continue extending position while we can advance an entire word.
- // (updates position_ accordingly).
- AdvanceUntilChange();
- }
-
- return {/*length=*/position_ - start_position, current_run_bit_set_};
- }
-
- private:
- void AdvanceUntilChange() {
- int64_t new_bits = 0;
- do {
- // Advance the position of the bitmap for loading.
- bitmap_ += sizeof(uint64_t);
- LoadNextWord();
- new_bits = BitUtil::CountTrailingZeros(word_);
- // Continue calculating run length.
- position_ += new_bits;
- } while (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
- ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0);
- }
-
- void LoadNextWord() { return LoadWord(length_ - position_); }
-
- // Helper method for Loading the next word.
- void LoadWord(int64_t bits_remaining) {
- word_ = 0;
- // we need at least an extra byte in this case.
- if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) {
- std::memcpy(&word_, bitmap_, 8);
- } else {
- int64_t bytes_to_load = BitUtil::BytesForBits(bits_remaining);
- auto word_ptr = reinterpret_cast<uint8_t*>(&word_);
- std::memcpy(word_ptr, bitmap_, bytes_to_load);
- // Ensure stoppage at last bit in bitmap by reversing the next higher
- // order bit.
- BitUtil::SetBitTo(word_ptr, bits_remaining,
- !BitUtil::GetBit(word_ptr, bits_remaining - 1));
- }
-
- // Two cases:
+
+ // Go forward until the next change from unset to set.
+ int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset;
+ position_ += new_bits;
+
+ if (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
+ ARROW_PREDICT_TRUE(position_ < length_)) {
+ // Continue extending position while we can advance an entire word.
+ // (updates position_ accordingly).
+ AdvanceUntilChange();
+ }
+
+ return {/*length=*/position_ - start_position, current_run_bit_set_};
+ }
+
+ private:
+ void AdvanceUntilChange() {
+ int64_t new_bits = 0;
+ do {
+ // Advance the position of the bitmap for loading.
+ bitmap_ += sizeof(uint64_t);
+ LoadNextWord();
+ new_bits = BitUtil::CountTrailingZeros(word_);
+ // Continue calculating run length.
+ position_ += new_bits;
+ } while (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
+ ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0);
+ }
+
+ void LoadNextWord() { return LoadWord(length_ - position_); }
+
+ // Helper method for Loading the next word.
+ void LoadWord(int64_t bits_remaining) {
+ word_ = 0;
+ // we need at least an extra byte in this case.
+ if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) {
+ std::memcpy(&word_, bitmap_, 8);
+ } else {
+ int64_t bytes_to_load = BitUtil::BytesForBits(bits_remaining);
+ auto word_ptr = reinterpret_cast<uint8_t*>(&word_);
+ std::memcpy(word_ptr, bitmap_, bytes_to_load);
+ // Ensure stoppage at last bit in bitmap by reversing the next higher
+ // order bit.
+ BitUtil::SetBitTo(word_ptr, bits_remaining,
+ !BitUtil::GetBit(word_ptr, bits_remaining - 1));
+ }
+
+ // Two cases:
// 1. For unset, CountTrailingZeros works naturally so we don't
- // invert the word.
- // 2. Otherwise invert so we can use CountTrailingZeros.
- if (current_run_bit_set_) {
- word_ = ~word_;
- }
- }
- const uint8_t* bitmap_;
- int64_t position_;
- int64_t length_;
- uint64_t word_;
- bool current_run_bit_set_;
-};
-#else
-using BitRunReader = BitRunReaderLinear;
-#endif
-
+ // invert the word.
+ // 2. Otherwise invert so we can use CountTrailingZeros.
+ if (current_run_bit_set_) {
+ word_ = ~word_;
+ }
+ }
+ const uint8_t* bitmap_;
+ int64_t position_;
+ int64_t length_;
+ uint64_t word_;
+ bool current_run_bit_set_;
+};
+#else
+using BitRunReader = BitRunReaderLinear;
+#endif
+
struct SetBitRun {
int64_t position;
int64_t length;
@@ -511,5 +511,5 @@ inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t o
std::forward<Visit>(visit));
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
index ee4bcde7713..35d3f683ff9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
@@ -1,74 +1,74 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bit_util.h"
-
-#include <cstdint>
-#include <cstring>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bit_util.h"
+
+#include <cstdint>
+#include <cstring>
+
#include "arrow/util/logging.h"
-namespace arrow {
-namespace BitUtil {
-
-void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set) {
- if (length == 0) {
- return;
- }
-
- const int64_t i_begin = start_offset;
- const int64_t i_end = start_offset + length;
- const uint8_t fill_byte = static_cast<uint8_t>(-static_cast<uint8_t>(bits_are_set));
-
- const int64_t bytes_begin = i_begin / 8;
- const int64_t bytes_end = i_end / 8 + 1;
-
- const uint8_t first_byte_mask = kPrecedingBitmask[i_begin % 8];
- const uint8_t last_byte_mask = kTrailingBitmask[i_end % 8];
-
- if (bytes_end == bytes_begin + 1) {
- // set bits within a single byte
- const uint8_t only_byte_mask =
- i_end % 8 == 0 ? first_byte_mask
- : static_cast<uint8_t>(first_byte_mask | last_byte_mask);
- bits[bytes_begin] &= only_byte_mask;
- bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~only_byte_mask);
- return;
- }
-
- // set/clear trailing bits of first byte
- bits[bytes_begin] &= first_byte_mask;
- bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~first_byte_mask);
-
- if (bytes_end - bytes_begin > 2) {
- // set/clear whole bytes
- std::memset(bits + bytes_begin + 1, fill_byte,
- static_cast<size_t>(bytes_end - bytes_begin - 2));
- }
-
- if (i_end % 8 == 0) {
- return;
- }
-
- // set/clear leading bits of last byte
- bits[bytes_end - 1] &= last_byte_mask;
- bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
-}
-
+namespace arrow {
+namespace BitUtil {
+
+void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set) {
+ if (length == 0) {
+ return;
+ }
+
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+ const uint8_t fill_byte = static_cast<uint8_t>(-static_cast<uint8_t>(bits_are_set));
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_end = i_end / 8 + 1;
+
+ const uint8_t first_byte_mask = kPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = kTrailingBitmask[i_end % 8];
+
+ if (bytes_end == bytes_begin + 1) {
+ // set bits within a single byte
+ const uint8_t only_byte_mask =
+ i_end % 8 == 0 ? first_byte_mask
+ : static_cast<uint8_t>(first_byte_mask | last_byte_mask);
+ bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~only_byte_mask);
+ return;
+ }
+
+ // set/clear trailing bits of first byte
+ bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~first_byte_mask);
+
+ if (bytes_end - bytes_begin > 2) {
+ // set/clear whole bytes
+ std::memset(bits + bytes_begin + 1, fill_byte,
+ static_cast<size_t>(bytes_end - bytes_begin - 2));
+ }
+
+ if (i_end % 8 == 0) {
+ return;
+ }
+
+ // set/clear leading bits of last byte
+ bits[bytes_end - 1] &= last_byte_mask;
+ bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
+}
+
template <bool value>
void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
// offset length
@@ -123,5 +123,5 @@ void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) {
SetBitmapImpl<false>(data, offset, length);
}
-} // namespace BitUtil
-} // namespace arrow
+} // namespace BitUtil
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
index c306ce7821b..a0d139a332e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
@@ -1,323 +1,323 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#if defined(_MSC_VER)
-#include <intrin.h> // IWYU pragma: keep
-#include <nmmintrin.h>
-#pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward)
-#define ARROW_POPCOUNT64 __popcnt64
-#define ARROW_POPCOUNT32 __popcnt
-#else
-#define ARROW_POPCOUNT64 __builtin_popcountll
-#define ARROW_POPCOUNT32 __builtin_popcount
-#endif
-
-#include <cstdint>
-#include <type_traits>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace detail {
-
-template <typename Integer>
-typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
- return static_cast<typename std::make_unsigned<Integer>::type>(x);
-}
-
-} // namespace detail
-
-namespace BitUtil {
-
-// The number of set bits in a given unsigned byte value, pre-computed
-//
-// Generated with the following Python code
-// output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
-// popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
-// print(output.format(', '.join(popcounts)))
-static constexpr uint8_t kBytePopcount[] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
- 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
- 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
- 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
- 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
- 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
- 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
- 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
-
-static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); }
-static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); }
-
-//
-// Bit-related computations on integer values
-//
-
-// Returns the ceil of value/divisor
-constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
- return (value == 0) ? 0 : 1 + (value - 1) / divisor;
-}
-
-// Return the number of bytes needed to fit the given number of bits
-constexpr int64_t BytesForBits(int64_t bits) {
- // This formula avoids integer overflow on very large `bits`
- return (bits >> 3) + ((bits & 7) != 0);
-}
-
-constexpr bool IsPowerOf2(int64_t value) {
- return value > 0 && (value & (value - 1)) == 0;
-}
-
-constexpr bool IsPowerOf2(uint64_t value) {
- return value > 0 && (value & (value - 1)) == 0;
-}
-
-// Returns the smallest power of two that contains v. If v is already a
-// power of two, it is returned as is.
-static inline int64_t NextPower2(int64_t n) {
- // Taken from
- // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- n--;
- n |= n >> 1;
- n |= n >> 2;
- n |= n >> 4;
- n |= n >> 8;
- n |= n >> 16;
- n |= n >> 32;
- n++;
- return n;
-}
-
-constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
-
-constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
-
-// Returns a mask for the bit_index lower order bits.
-// Only valid for bit_index in the range [0, 64).
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_MSC_VER)
+#include <intrin.h> // IWYU pragma: keep
+#include <nmmintrin.h>
+#pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanForward)
+#define ARROW_POPCOUNT64 __popcnt64
+#define ARROW_POPCOUNT32 __popcnt
+#else
+#define ARROW_POPCOUNT64 __builtin_popcountll
+#define ARROW_POPCOUNT32 __builtin_popcount
+#endif
+
+#include <cstdint>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace detail {
+
+template <typename Integer>
+typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
+ return static_cast<typename std::make_unsigned<Integer>::type>(x);
+}
+
+} // namespace detail
+
+namespace BitUtil {
+
+// The number of set bits in a given unsigned byte value, pre-computed
+//
+// Generated with the following Python code
+// output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
+// popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
+// print(output.format(', '.join(popcounts)))
+static constexpr uint8_t kBytePopcount[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
+ 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
+ 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
+ 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
+ 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
+ 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
+ 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
+ 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); }
+static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); }
+
+//
+// Bit-related computations on integer values
+//
+
+// Returns the ceil of value/divisor
+constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
+ return (value == 0) ? 0 : 1 + (value - 1) / divisor;
+}
+
+// Return the number of bytes needed to fit the given number of bits
+constexpr int64_t BytesForBits(int64_t bits) {
+ // This formula avoids integer overflow on very large `bits`
+ return (bits >> 3) + ((bits & 7) != 0);
+}
+
+constexpr bool IsPowerOf2(int64_t value) {
+ return value > 0 && (value & (value - 1)) == 0;
+}
+
+constexpr bool IsPowerOf2(uint64_t value) {
+ return value > 0 && (value & (value - 1)) == 0;
+}
+
+// Returns the smallest power of two that contains v. If v is already a
+// power of two, it is returned as is.
+static inline int64_t NextPower2(int64_t n) {
+ // Taken from
+ // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ n--;
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ n |= n >> 32;
+ n++;
+ return n;
+}
+
+constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
+
+constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
+
+// Returns a mask for the bit_index lower order bits.
+// Only valid for bit_index in the range [0, 64).
constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
- return (static_cast<uint64_t>(1) << bit_index) - 1;
-}
-
-// Returns 'value' rounded up to the nearest multiple of 'factor'
-constexpr int64_t RoundUp(int64_t value, int64_t factor) {
- return CeilDiv(value, factor) * factor;
-}
-
-// Returns 'value' rounded down to the nearest multiple of 'factor'
-constexpr int64_t RoundDown(int64_t value, int64_t factor) {
- return (value / factor) * factor;
-}
-
-// Returns 'value' rounded up to the nearest multiple of 'factor' when factor
-// is a power of two.
-// The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
-// since we cannot return the correct result which would be 2**64.
-constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
- // DCHECK(value >= 0);
- // DCHECK(IsPowerOf2(factor));
- return (value + (factor - 1)) & ~(factor - 1);
-}
-
-constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
- // DCHECK(IsPowerOf2(factor));
- return (value + (factor - 1)) & ~(factor - 1);
-}
-
-constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
-
-constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
- return RoundUpToPowerOf2(num, 64);
-}
-
-// Returns the number of bytes covering a sliced bitmap. Find the length
-// rounded to cover full bytes on both extremities.
-//
-// The following example represents a slice (offset=10, length=9)
-//
-// 0 8 16 24
-// |-------|-------|------|
-// [ ] (slice)
-// [ ] (same slice aligned to bytes bounds, length=16)
-//
-// The covering bytes is the length (in bytes) of this new aligned slice.
-constexpr int64_t CoveringBytes(int64_t offset, int64_t length) {
- return (BitUtil::RoundUp(length + offset, 8) - BitUtil::RoundDown(offset, 8)) / 8;
-}
-
-// Returns the 'num_bits' least-significant bits of 'v'.
-static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
- if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
- if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
- int n = 64 - num_bits;
- return (v << n) >> n;
-}
-
-/// \brief Count the number of leading zeros in an unsigned integer.
-static inline int CountLeadingZeros(uint32_t value) {
-#if defined(__clang__) || defined(__GNUC__)
- if (value == 0) return 32;
- return static_cast<int>(__builtin_clz(value));
-#elif defined(_MSC_VER)
- unsigned long index; // NOLINT
- if (_BitScanReverse(&index, static_cast<unsigned long>(value))) { // NOLINT
- return 31 - static_cast<int>(index);
- } else {
- return 32;
- }
-#else
- int bitpos = 0;
- while (value != 0) {
- value >>= 1;
- ++bitpos;
- }
- return 32 - bitpos;
-#endif
-}
-
-static inline int CountLeadingZeros(uint64_t value) {
-#if defined(__clang__) || defined(__GNUC__)
- if (value == 0) return 64;
- return static_cast<int>(__builtin_clzll(value));
-#elif defined(_MSC_VER)
- unsigned long index; // NOLINT
- if (_BitScanReverse64(&index, value)) { // NOLINT
- return 63 - static_cast<int>(index);
- } else {
- return 64;
- }
-#else
- int bitpos = 0;
- while (value != 0) {
- value >>= 1;
- ++bitpos;
- }
- return 64 - bitpos;
-#endif
-}
-
-static inline int CountTrailingZeros(uint32_t value) {
-#if defined(__clang__) || defined(__GNUC__)
- if (value == 0) return 32;
- return static_cast<int>(__builtin_ctzl(value));
-#elif defined(_MSC_VER)
- unsigned long index; // NOLINT
- if (_BitScanForward(&index, value)) {
- return static_cast<int>(index);
- } else {
- return 32;
- }
-#else
- int bitpos = 0;
- if (value) {
- while (value & 1 == 0) {
- value >>= 1;
- ++bitpos;
- }
- } else {
- bitpos = 32;
- }
- return bitpos;
-#endif
-}
-
-static inline int CountTrailingZeros(uint64_t value) {
-#if defined(__clang__) || defined(__GNUC__)
- if (value == 0) return 64;
- return static_cast<int>(__builtin_ctzll(value));
-#elif defined(_MSC_VER)
- unsigned long index; // NOLINT
- if (_BitScanForward64(&index, value)) {
- return static_cast<int>(index);
- } else {
- return 64;
- }
-#else
- int bitpos = 0;
- if (value) {
- while (value & 1 == 0) {
- value >>= 1;
- ++bitpos;
- }
- } else {
- bitpos = 64;
- }
- return bitpos;
-#endif
-}
-
-// Returns the minimum number of bits needed to represent an unsigned value
-static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
-
-// Returns ceil(log2(x)).
-static inline int Log2(uint64_t x) {
- // DCHECK_GT(x, 0);
- return NumRequiredBits(x - 1);
-}
-
-//
-// Utilities for reading and writing individual bits by their index
-// in a memory area.
-//
-
-// Bitmask selecting the k-th bit in a byte
-static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
-
-// the bitwise complement version of kBitmask
-static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
-
-// Bitmask selecting the (k - 1) preceding bits in a byte
-static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
-static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127};
-
-// the bitwise complement version of kPrecedingBitmask
-static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
-
+ return (static_cast<uint64_t>(1) << bit_index) - 1;
+}
+
+// Returns 'value' rounded up to the nearest multiple of 'factor'
+constexpr int64_t RoundUp(int64_t value, int64_t factor) {
+ return CeilDiv(value, factor) * factor;
+}
+
+// Returns 'value' rounded down to the nearest multiple of 'factor'
+constexpr int64_t RoundDown(int64_t value, int64_t factor) {
+ return (value / factor) * factor;
+}
+
+// Returns 'value' rounded up to the nearest multiple of 'factor' when factor
+// is a power of two.
+// The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
+// since we cannot return the correct result which would be 2**64.
+constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
+ // DCHECK(value >= 0);
+ // DCHECK(IsPowerOf2(factor));
+ return (value + (factor - 1)) & ~(factor - 1);
+}
+
+constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
+ // DCHECK(IsPowerOf2(factor));
+ return (value + (factor - 1)) & ~(factor - 1);
+}
+
+constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
+
+constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
+ return RoundUpToPowerOf2(num, 64);
+}
+
+// Returns the number of bytes covering a sliced bitmap. Find the length
+// rounded to cover full bytes on both extremities.
+//
+// The following example represents a slice (offset=10, length=9)
+//
+// 0 8 16 24
+// |-------|-------|------|
+// [ ] (slice)
+// [ ] (same slice aligned to bytes bounds, length=16)
+//
+// The covering bytes is the length (in bytes) of this new aligned slice.
+constexpr int64_t CoveringBytes(int64_t offset, int64_t length) {
+ return (BitUtil::RoundUp(length + offset, 8) - BitUtil::RoundDown(offset, 8)) / 8;
+}
+
+// Returns the 'num_bits' least-significant bits of 'v'.
+static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
+ if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
+ if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
+ int n = 64 - num_bits;
+ return (v << n) >> n;
+}
+
+/// \brief Count the number of leading zeros in an unsigned integer.
+static inline int CountLeadingZeros(uint32_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+ if (value == 0) return 32;
+ return static_cast<int>(__builtin_clz(value));
+#elif defined(_MSC_VER)
+ unsigned long index; // NOLINT
+ if (_BitScanReverse(&index, static_cast<unsigned long>(value))) { // NOLINT
+ return 31 - static_cast<int>(index);
+ } else {
+ return 32;
+ }
+#else
+ int bitpos = 0;
+ while (value != 0) {
+ value >>= 1;
+ ++bitpos;
+ }
+ return 32 - bitpos;
+#endif
+}
+
+static inline int CountLeadingZeros(uint64_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+ if (value == 0) return 64;
+ return static_cast<int>(__builtin_clzll(value));
+#elif defined(_MSC_VER)
+ unsigned long index; // NOLINT
+ if (_BitScanReverse64(&index, value)) { // NOLINT
+ return 63 - static_cast<int>(index);
+ } else {
+ return 64;
+ }
+#else
+ int bitpos = 0;
+ while (value != 0) {
+ value >>= 1;
+ ++bitpos;
+ }
+ return 64 - bitpos;
+#endif
+}
+
+static inline int CountTrailingZeros(uint32_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+ if (value == 0) return 32;
+ return static_cast<int>(__builtin_ctzl(value));
+#elif defined(_MSC_VER)
+ unsigned long index; // NOLINT
+ if (_BitScanForward(&index, value)) {
+ return static_cast<int>(index);
+ } else {
+ return 32;
+ }
+#else
+ int bitpos = 0;
+ if (value) {
+ while (value & 1 == 0) {
+ value >>= 1;
+ ++bitpos;
+ }
+ } else {
+ bitpos = 32;
+ }
+ return bitpos;
+#endif
+}
+
+static inline int CountTrailingZeros(uint64_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+ if (value == 0) return 64;
+ return static_cast<int>(__builtin_ctzll(value));
+#elif defined(_MSC_VER)
+ unsigned long index; // NOLINT
+ if (_BitScanForward64(&index, value)) {
+ return static_cast<int>(index);
+ } else {
+ return 64;
+ }
+#else
+ int bitpos = 0;
+ if (value) {
+ while (value & 1 == 0) {
+ value >>= 1;
+ ++bitpos;
+ }
+ } else {
+ bitpos = 64;
+ }
+ return bitpos;
+#endif
+}
+
+// Returns the minimum number of bits needed to represent an unsigned value
+static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
+
+// Returns ceil(log2(x)).
+static inline int Log2(uint64_t x) {
+ // DCHECK_GT(x, 0);
+ return NumRequiredBits(x - 1);
+}
+
+//
+// Utilities for reading and writing individual bits by their index
+// in a memory area.
+//
+
+// Bitmask selecting the k-th bit in a byte
+static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+
+// the bitwise complement version of kBitmask
+static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
+
+// Bitmask selecting the (k - 1) preceding bits in a byte
+static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127};
+
+// the bitwise complement version of kPrecedingBitmask
+static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
+
static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
- return (bits[i >> 3] >> (i & 0x07)) & 1;
-}
-
-// Gets the i-th bit from a byte. Should only be used with i <= 7.
+ return (bits[i >> 3] >> (i & 0x07)) & 1;
+}
+
+// Gets the i-th bit from a byte. Should only be used with i <= 7.
static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
return byte & kBitmask[i];
}
-
-static inline void ClearBit(uint8_t* bits, int64_t i) {
- bits[i / 8] &= kFlippedBitmask[i % 8];
-}
-
-static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
-
-static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
- // https://graphics.stanford.edu/~seander/bithacks.html
- // "Conditionally set or clear bits without branching"
- // NOTE: this seems to confuse Valgrind as it reads from potentially
- // uninitialized memory
- bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
- kBitmask[i % 8];
-}
-
-/// \brief set or clear a range of bits quickly
-ARROW_EXPORT
-void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
-
+
+static inline void ClearBit(uint8_t* bits, int64_t i) {
+ bits[i / 8] &= kFlippedBitmask[i % 8];
+}
+
+static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
+
+static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
+ // https://graphics.stanford.edu/~seander/bithacks.html
+ // "Conditionally set or clear bits without branching"
+ // NOTE: this seems to confuse Valgrind as it reads from potentially
+ // uninitialized memory
+ bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
+ kBitmask[i % 8];
+}
+
+/// \brief set or clear a range of bits quickly
+ARROW_EXPORT
+void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
+
/// \brief Sets all bits in the bitmap to true
ARROW_EXPORT
void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
@@ -350,5 +350,5 @@ constexpr Word SpliceWord(int n, Word low, Word high) {
return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
}
-} // namespace BitUtil
-} // namespace arrow
+} // namespace BitUtil
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
index 33d1dee1957..ce0e27dd4f6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
@@ -1,51 +1,51 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bitmap.h"
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-
-#include "arrow/array/array_primitive.h"
-#include "arrow/buffer.h"
-#include "arrow/util/bitmap_ops.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace internal {
-
-std::string Bitmap::ToString() const {
- std::string out(length_ + ((length_ - 1) / 8), ' ');
- for (int64_t i = 0; i < length_; ++i) {
- out[i + (i / 8)] = GetBit(i) ? '1' : '0';
- }
- return out;
-}
-
-std::shared_ptr<BooleanArray> Bitmap::ToArray() const {
- return std::make_shared<BooleanArray>(length_, buffer_, nullptr, 0, offset_);
-}
-
-std::string Bitmap::Diff(const Bitmap& other) const {
- return ToArray()->Diff(*other.ToArray());
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bitmap.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+std::string Bitmap::ToString() const {
+ std::string out(length_ + ((length_ - 1) / 8), ' ');
+ for (int64_t i = 0; i < length_; ++i) {
+ out[i + (i / 8)] = GetBit(i) ? '1' : '0';
+ }
+ return out;
+}
+
+std::shared_ptr<BooleanArray> Bitmap::ToArray() const {
+ return std::make_shared<BooleanArray>(length_, buffer_, nullptr, 0, offset_);
+}
+
+std::string Bitmap::Diff(const Bitmap& other) const {
+ return ToArray()->Diff(*other.ToArray());
+}
+
void Bitmap::CopyFrom(const Bitmap& other) {
::arrow::internal::CopyBitmap(other.buffer_->data(), other.offset_, other.length_,
buffer_->mutable_data(), offset_);
@@ -56,20 +56,20 @@ void Bitmap::CopyFromInverted(const Bitmap& other) {
buffer_->mutable_data(), offset_);
}
-bool Bitmap::Equals(const Bitmap& other) const {
- if (length_ != other.length_) {
- return false;
- }
- return BitmapEquals(buffer_->data(), offset_, other.buffer_->data(), other.offset(),
- length_);
-}
-
-int64_t Bitmap::BitLength(const Bitmap* bitmaps, size_t N) {
- for (size_t i = 1; i < N; ++i) {
- DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length());
- }
- return bitmaps[0].length();
-}
-
-} // namespace internal
-} // namespace arrow
+bool Bitmap::Equals(const Bitmap& other) const {
+ if (length_ != other.length_) {
+ return false;
+ }
+ return BitmapEquals(buffer_->data(), offset_, other.buffer_->data(), other.offset(),
+ length_);
+}
+
+int64_t Bitmap::BitLength(const Bitmap* bitmaps, size_t N) {
+ for (size_t i = 1; i < N; ++i) {
+ DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length());
+ }
+ return bitmaps[0].length();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
index 141f863c0b8..bd68224d43b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
@@ -1,95 +1,95 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <array>
-#include <bitset>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/bitmap_writer.h"
-#include "arrow/util/compare.h"
+#include "arrow/util/compare.h"
#include "arrow/util/endian.h"
-#include "arrow/util/functional.h"
-#include "arrow/util/string_builder.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class BooleanArray;
-
-namespace internal {
-
-class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
- public util::EqualityComparable<Bitmap> {
- public:
- template <typename Word>
- using View = util::basic_string_view<Word>;
-
- Bitmap() = default;
-
- Bitmap(std::shared_ptr<Buffer> buffer, int64_t offset, int64_t length)
- : buffer_(std::move(buffer)), offset_(offset), length_(length) {}
-
- Bitmap(const void* data, int64_t offset, int64_t length)
- : buffer_(std::make_shared<Buffer>(static_cast<const uint8_t*>(data),
- BitUtil::BytesForBits(offset + length))),
- offset_(offset),
- length_(length) {}
-
- Bitmap(void* data, int64_t offset, int64_t length)
- : buffer_(std::make_shared<MutableBuffer>(static_cast<uint8_t*>(data),
- BitUtil::BytesForBits(offset + length))),
- offset_(offset),
- length_(length) {}
-
- Bitmap Slice(int64_t offset) const {
- return Bitmap(buffer_, offset_ + offset, length_ - offset);
- }
-
- Bitmap Slice(int64_t offset, int64_t length) const {
- return Bitmap(buffer_, offset_ + offset, length);
- }
-
- std::string ToString() const;
-
- bool Equals(const Bitmap& other) const;
-
- std::string Diff(const Bitmap& other) const;
-
- bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); }
-
- bool operator[](int64_t i) const { return GetBit(i); }
-
- void SetBitTo(int64_t i, bool v) const {
- BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v);
- }
-
+#include "arrow/util/functional.h"
+#include "arrow/util/string_builder.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class BooleanArray;
+
+namespace internal {
+
+class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
+ public util::EqualityComparable<Bitmap> {
+ public:
+ template <typename Word>
+ using View = util::basic_string_view<Word>;
+
+ Bitmap() = default;
+
+ Bitmap(std::shared_ptr<Buffer> buffer, int64_t offset, int64_t length)
+ : buffer_(std::move(buffer)), offset_(offset), length_(length) {}
+
+ Bitmap(const void* data, int64_t offset, int64_t length)
+ : buffer_(std::make_shared<Buffer>(static_cast<const uint8_t*>(data),
+ BitUtil::BytesForBits(offset + length))),
+ offset_(offset),
+ length_(length) {}
+
+ Bitmap(void* data, int64_t offset, int64_t length)
+ : buffer_(std::make_shared<MutableBuffer>(static_cast<uint8_t*>(data),
+ BitUtil::BytesForBits(offset + length))),
+ offset_(offset),
+ length_(length) {}
+
+ Bitmap Slice(int64_t offset) const {
+ return Bitmap(buffer_, offset_ + offset, length_ - offset);
+ }
+
+ Bitmap Slice(int64_t offset, int64_t length) const {
+ return Bitmap(buffer_, offset_ + offset, length);
+ }
+
+ std::string ToString() const;
+
+ bool Equals(const Bitmap& other) const;
+
+ std::string Diff(const Bitmap& other) const;
+
+ bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); }
+
+ bool operator[](int64_t i) const { return GetBit(i); }
+
+ void SetBitTo(int64_t i, bool v) const {
+ BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v);
+ }
+
void SetBitsTo(bool v) {
BitUtil::SetBitsTo(buffer_->mutable_data(), offset_, length_, v);
}
@@ -97,21 +97,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
void CopyFrom(const Bitmap& other);
void CopyFromInverted(const Bitmap& other);
- /// \brief Visit bits from each bitmap as bitset<N>
- ///
- /// All bitmaps must have identical length.
- template <size_t N, typename Visitor>
- static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) {
- int64_t bit_length = BitLength(bitmaps, N);
- std::bitset<N> bits;
- for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
- for (size_t i = 0; i < N; ++i) {
- bits[i] = bitmaps[i].GetBit(bit_i);
- }
- visitor(bits);
- }
- }
-
+ /// \brief Visit bits from each bitmap as bitset<N>
+ ///
+ /// All bitmaps must have identical length.
+ template <size_t N, typename Visitor>
+ static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) {
+ int64_t bit_length = BitLength(bitmaps, N);
+ std::bitset<N> bits;
+ for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+ for (size_t i = 0; i < N; ++i) {
+ bits[i] = bitmaps[i].GetBit(bit_i);
+ }
+ visitor(bits);
+ }
+ }
+
/// \brief Visit bits from each bitmap as bitset<N>
///
/// All bitmaps must have identical length.
@@ -127,122 +127,122 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
}
}
- /// \brief Visit words of bits from each bitmap as array<Word, N>
- ///
- /// All bitmaps must have identical length. The first bit in a visited bitmap
- /// may be offset within the first visited word, but words will otherwise contain
- /// densely packed bits loaded from the bitmap. That offset within the first word is
- /// returned.
- ///
- /// TODO(bkietz) allow for early termination
+ /// \brief Visit words of bits from each bitmap as array<Word, N>
+ ///
+ /// All bitmaps must have identical length. The first bit in a visited bitmap
+ /// may be offset within the first visited word, but words will otherwise contain
+ /// densely packed bits loaded from the bitmap. That offset within the first word is
+ /// returned.
+ ///
+ /// TODO(bkietz) allow for early termination
// NOTE: this function is efficient on 3+ sufficiently large bitmaps.
// It also has a large prolog / epilog overhead and should be used
// carefully in other cases.
// For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
// and BitmapUInt64Reader.
- template <size_t N, typename Visitor,
+ template <size_t N, typename Visitor,
typename Word = typename std::decay<
internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
- static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) {
- constexpr int64_t kBitWidth = sizeof(Word) * 8;
-
- // local, mutable variables which will be sliced/decremented to represent consumption:
- Bitmap bitmaps[N];
- int64_t offsets[N];
- int64_t bit_length = BitLength(bitmaps_arg, N);
- View<Word> words[N];
- for (size_t i = 0; i < N; ++i) {
- bitmaps[i] = bitmaps_arg[i];
- offsets[i] = bitmaps[i].template word_offset<Word>();
- assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
- words[i] = bitmaps[i].template words<Word>();
- }
-
- auto consume = [&](int64_t consumed_bits) {
- for (size_t i = 0; i < N; ++i) {
- bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits);
- offsets[i] = bitmaps[i].template word_offset<Word>();
- assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
- words[i] = bitmaps[i].template words<Word>();
- }
- bit_length -= consumed_bits;
- };
-
- std::array<Word, N> visited_words;
- visited_words.fill(0);
-
- if (bit_length <= kBitWidth * 2) {
- // bitmaps fit into one or two words so don't bother with optimization
- while (bit_length > 0) {
- auto leading_bits = std::min(bit_length, kBitWidth);
- SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words);
- visitor(visited_words);
- consume(leading_bits);
- }
- return 0;
- }
-
- int64_t max_offset = *std::max_element(offsets, offsets + N);
- int64_t min_offset = *std::min_element(offsets, offsets + N);
- if (max_offset > 0) {
- // consume leading bits
- auto leading_bits = kBitWidth - min_offset;
- SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words);
- visitor(visited_words);
- consume(leading_bits);
- }
- assert(*std::min_element(offsets, offsets + N) == 0);
-
- int64_t whole_word_count = bit_length / kBitWidth;
- assert(whole_word_count >= 1);
-
- if (min_offset == max_offset) {
- // all offsets were identical, all leading bits have been consumed
- assert(
- std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; }));
-
- for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) {
- for (size_t i = 0; i < N; ++i) {
- visited_words[i] = words[i][word_i];
- }
- visitor(visited_words);
- }
- consume(whole_word_count * kBitWidth);
- } else {
- // leading bits from potentially incomplete words have been consumed
-
- // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely
- // within the bitmap for all i
- for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) {
- for (size_t i = 0; i < N; ++i) {
- if (offsets[i] == 0) {
- visited_words[i] = words[i][word_i];
- } else {
- auto words0 = BitUtil::ToLittleEndian(words[i][word_i]);
- auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]);
- visited_words[i] = BitUtil::FromLittleEndian(
- (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i])));
- }
- }
- visitor(visited_words);
- }
- consume((whole_word_count - 1) * kBitWidth);
-
- SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words);
-
- visitor(visited_words);
- consume(kBitWidth);
- }
-
- // load remaining bits
- if (bit_length > 0) {
- SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words);
- visitor(visited_words);
- }
-
- return min_offset;
- }
-
+ static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) {
+ constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+ // local, mutable variables which will be sliced/decremented to represent consumption:
+ Bitmap bitmaps[N];
+ int64_t offsets[N];
+ int64_t bit_length = BitLength(bitmaps_arg, N);
+ View<Word> words[N];
+ for (size_t i = 0; i < N; ++i) {
+ bitmaps[i] = bitmaps_arg[i];
+ offsets[i] = bitmaps[i].template word_offset<Word>();
+ assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
+ words[i] = bitmaps[i].template words<Word>();
+ }
+
+ auto consume = [&](int64_t consumed_bits) {
+ for (size_t i = 0; i < N; ++i) {
+ bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits);
+ offsets[i] = bitmaps[i].template word_offset<Word>();
+ assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
+ words[i] = bitmaps[i].template words<Word>();
+ }
+ bit_length -= consumed_bits;
+ };
+
+ std::array<Word, N> visited_words;
+ visited_words.fill(0);
+
+ if (bit_length <= kBitWidth * 2) {
+ // bitmaps fit into one or two words so don't bother with optimization
+ while (bit_length > 0) {
+ auto leading_bits = std::min(bit_length, kBitWidth);
+ SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words);
+ visitor(visited_words);
+ consume(leading_bits);
+ }
+ return 0;
+ }
+
+ int64_t max_offset = *std::max_element(offsets, offsets + N);
+ int64_t min_offset = *std::min_element(offsets, offsets + N);
+ if (max_offset > 0) {
+ // consume leading bits
+ auto leading_bits = kBitWidth - min_offset;
+ SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words);
+ visitor(visited_words);
+ consume(leading_bits);
+ }
+ assert(*std::min_element(offsets, offsets + N) == 0);
+
+ int64_t whole_word_count = bit_length / kBitWidth;
+ assert(whole_word_count >= 1);
+
+ if (min_offset == max_offset) {
+ // all offsets were identical, all leading bits have been consumed
+ assert(
+ std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; }));
+
+ for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) {
+ for (size_t i = 0; i < N; ++i) {
+ visited_words[i] = words[i][word_i];
+ }
+ visitor(visited_words);
+ }
+ consume(whole_word_count * kBitWidth);
+ } else {
+ // leading bits from potentially incomplete words have been consumed
+
+ // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely
+ // within the bitmap for all i
+ for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) {
+ for (size_t i = 0; i < N; ++i) {
+ if (offsets[i] == 0) {
+ visited_words[i] = words[i][word_i];
+ } else {
+ auto words0 = BitUtil::ToLittleEndian(words[i][word_i]);
+ auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]);
+ visited_words[i] = BitUtil::FromLittleEndian(
+ (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i])));
+ }
+ }
+ visitor(visited_words);
+ }
+ consume((whole_word_count - 1) * kBitWidth);
+
+ SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words);
+
+ visitor(visited_words);
+ consume(kBitWidth);
+ }
+
+ // load remaining bits
+ if (bit_length > 0) {
+ SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words);
+ visitor(visited_words);
+ }
+
+ return min_offset;
+ }
+
template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor,
typename Word = typename std::decay<
internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
@@ -369,82 +369,82 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
}
}
- const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
-
- /// offset of first bit relative to buffer().data()
- int64_t offset() const { return offset_; }
-
- /// number of bits in this Bitmap
- int64_t length() const { return length_; }
-
- /// string_view of all bytes which contain any bit in this Bitmap
- util::bytes_view bytes() const {
- auto byte_offset = offset_ / 8;
- auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset;
- return util::bytes_view(buffer_->data() + byte_offset, byte_count);
- }
-
- private:
- /// string_view of all Words which contain any bit in this Bitmap
- ///
- /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
- /// words() would span bits [16, 48).
- ///
- /// 0 16 32 48 64
- /// |-------|-------|------|------| (buffer)
- /// [ ] (bitmap)
- /// |-------|------| (returned words)
- ///
- /// \warning The words may contain bytes which lie outside the buffer or are
- /// uninitialized.
- template <typename Word>
- View<Word> words() const {
- auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
- auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
- auto word_byte_count =
- BitUtil::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr + bytes().size()),
- static_cast<int64_t>(sizeof(Word))) -
- words_addr;
- return View<Word>(reinterpret_cast<const Word*>(words_addr),
- word_byte_count / sizeof(Word));
- }
-
- /// offset of first bit relative to words<Word>().data()
- template <typename Word>
- int64_t word_offset() const {
- return offset_ + 8 * (reinterpret_cast<intptr_t>(buffer_->data()) -
- reinterpret_cast<intptr_t>(words<Word>().data()));
- }
-
- /// load words from bitmaps bitwise
- template <size_t N, typename Word>
- static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset,
- int64_t out_length, bool set_trailing_bits,
- std::array<Word, N>* out) {
- out->fill(0);
-
- int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0;
-
- Bitmap slices[N], out_bitmaps[N];
- for (size_t i = 0; i < N; ++i) {
- slices[i] = bitmaps[i].Slice(offset, out_length);
- out_bitmaps[i] = Bitmap(&out->at(i), out_offset, out_length);
- }
-
- int64_t bit_i = 0;
- Bitmap::VisitBits(slices, [&](std::bitset<N> bits) {
- for (size_t i = 0; i < N; ++i) {
- out_bitmaps[i].SetBitTo(bit_i, bits[i]);
- }
- ++bit_i;
- });
- }
-
- std::shared_ptr<BooleanArray> ToArray() const;
-
- /// assert bitmaps have identical length and return that length
- static int64_t BitLength(const Bitmap* bitmaps, size_t N);
-
+ const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
+
+ /// offset of first bit relative to buffer().data()
+ int64_t offset() const { return offset_; }
+
+ /// number of bits in this Bitmap
+ int64_t length() const { return length_; }
+
+ /// string_view of all bytes which contain any bit in this Bitmap
+ util::bytes_view bytes() const {
+ auto byte_offset = offset_ / 8;
+ auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset;
+ return util::bytes_view(buffer_->data() + byte_offset, byte_count);
+ }
+
+ private:
+ /// string_view of all Words which contain any bit in this Bitmap
+ ///
+ /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
+ /// words() would span bits [16, 48).
+ ///
+ /// 0 16 32 48 64
+ /// |-------|-------|------|------| (buffer)
+ /// [ ] (bitmap)
+ /// |-------|------| (returned words)
+ ///
+ /// \warning The words may contain bytes which lie outside the buffer or are
+ /// uninitialized.
+ template <typename Word>
+ View<Word> words() const {
+ auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
+ auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
+ auto word_byte_count =
+ BitUtil::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr + bytes().size()),
+ static_cast<int64_t>(sizeof(Word))) -
+ words_addr;
+ return View<Word>(reinterpret_cast<const Word*>(words_addr),
+ word_byte_count / sizeof(Word));
+ }
+
+ /// offset of first bit relative to words<Word>().data()
+ template <typename Word>
+ int64_t word_offset() const {
+ return offset_ + 8 * (reinterpret_cast<intptr_t>(buffer_->data()) -
+ reinterpret_cast<intptr_t>(words<Word>().data()));
+ }
+
+ /// load words from bitmaps bitwise
+ template <size_t N, typename Word>
+ static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset,
+ int64_t out_length, bool set_trailing_bits,
+ std::array<Word, N>* out) {
+ out->fill(0);
+
+ int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0;
+
+ Bitmap slices[N], out_bitmaps[N];
+ for (size_t i = 0; i < N; ++i) {
+ slices[i] = bitmaps[i].Slice(offset, out_length);
+ out_bitmaps[i] = Bitmap(&out->at(i), out_offset, out_length);
+ }
+
+ int64_t bit_i = 0;
+ Bitmap::VisitBits(slices, [&](std::bitset<N> bits) {
+ for (size_t i = 0; i < N; ++i) {
+ out_bitmaps[i].SetBitTo(bit_i, bits[i]);
+ }
+ ++bit_i;
+ });
+ }
+
+ std::shared_ptr<BooleanArray> ToArray() const;
+
+ /// assert bitmaps have identical length and return that length
+ static int64_t BitLength(const Bitmap* bitmaps, size_t N);
+
template <size_t N>
static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) {
for (size_t i = 1; i < N; ++i) {
@@ -453,9 +453,9 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
return bitmaps[0].length();
}
- std::shared_ptr<Buffer> buffer_;
- int64_t offset_ = 0, length_ = 0;
-};
-
-} // namespace internal
-} // namespace arrow
+ std::shared_ptr<Buffer> buffer_;
+ int64_t offset_ = 0, length_ = 0;
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.cc
index 9a91b7ac675..030b43a94ef 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.cc
@@ -1,72 +1,72 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bitmap_builders.h"
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/buffer.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/bit_util.h"
-
-namespace arrow {
-namespace internal {
-
-namespace {
-
-void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {
- for (size_t i = 0; i < bytes.size(); ++i) {
- if (bytes[i] > 0) {
- BitUtil::SetBit(bits, i);
- }
- }
-}
-
-} // namespace
-
-Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>& bytes,
- MemoryPool* pool) {
- int64_t bit_length = BitUtil::BytesForBits(bytes.size());
-
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(bit_length, pool));
- uint8_t* out_buf = buffer->mutable_data();
- memset(out_buf, 0, static_cast<size_t>(buffer->capacity()));
- FillBitsFromBytes(bytes, out_buf);
- return std::move(buffer);
-}
-
-Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length,
- int64_t straggler_pos, bool value) {
- if (straggler_pos < 0 || straggler_pos >= length) {
- return Status::Invalid("invalid straggler_pos ", straggler_pos);
- }
-
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(BitUtil::BytesForBits(length), pool));
-
- auto bitmap_data = buffer->mutable_data();
- BitUtil::SetBitsTo(bitmap_data, 0, length, value);
- BitUtil::SetBitTo(bitmap_data, straggler_pos, !value);
- return std::move(buffer);
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bitmap_builders.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ if (bytes[i] > 0) {
+ BitUtil::SetBit(bits, i);
+ }
+ }
+}
+
+} // namespace
+
+Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>& bytes,
+ MemoryPool* pool) {
+ int64_t bit_length = BitUtil::BytesForBits(bytes.size());
+
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(bit_length, pool));
+ uint8_t* out_buf = buffer->mutable_data();
+ memset(out_buf, 0, static_cast<size_t>(buffer->capacity()));
+ FillBitsFromBytes(bytes, out_buf);
+ return std::move(buffer);
+}
+
+Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length,
+ int64_t straggler_pos, bool value) {
+ if (straggler_pos < 0 || straggler_pos >= length) {
+ return Status::Invalid("invalid straggler_pos ", straggler_pos);
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(BitUtil::BytesForBits(length), pool));
+
+ auto bitmap_data = buffer->mutable_data();
+ BitUtil::SetBitsTo(bitmap_data, 0, length, value);
+ BitUtil::SetBitTo(bitmap_data, straggler_pos, !value);
+ return std::move(buffer);
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.h
index 5bd2ad44140..6a3542eb97f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_builders.h
@@ -1,43 +1,43 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-/// \brief Generate Bitmap with all position to `value` except for one found
-/// at `straggler_pos`.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length,
- int64_t straggler_pos, bool value = true);
-
-/// \brief Convert vector of bytes to bitmap buffer
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>&,
- MemoryPool* pool = default_memory_pool());
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief Generate Bitmap with all position to `value` except for one found
+/// at `straggler_pos`.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length,
+ int64_t straggler_pos, bool value = true);
+
+/// \brief Convert vector of bytes to bitmap buffer
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>&,
+ MemoryPool* pool = default_memory_pool());
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
index 129fa913231..a670cdb1f7f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
@@ -1,111 +1,111 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/buffer.h"
-#include "arrow/memory_pool.h"
-#include "arrow/result.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-// A std::generate() like function to write sequential bits into a bitmap area.
-// Bits preceding the bitmap area are preserved, bits following the bitmap
-// area may be clobbered.
-
-template <class Generator>
-void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
- if (length == 0) {
- return;
- }
- uint8_t* cur = bitmap + start_offset / 8;
- uint8_t bit_mask = BitUtil::kBitmask[start_offset % 8];
- uint8_t current_byte = *cur & BitUtil::kPrecedingBitmask[start_offset % 8];
-
- for (int64_t index = 0; index < length; ++index) {
- const bool bit = g();
- current_byte = bit ? (current_byte | bit_mask) : current_byte;
- bit_mask = static_cast<uint8_t>(bit_mask << 1);
- if (bit_mask == 0) {
- bit_mask = 1;
- *cur++ = current_byte;
- current_byte = 0;
- }
- }
- if (bit_mask != 1) {
- *cur++ = current_byte;
- }
-}
-
-// Like GenerateBits(), but unrolls its main loop for higher performance.
-
-template <class Generator>
-void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
- Generator&& g) {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// A std::generate() like function to write sequential bits into a bitmap area.
+// Bits preceding the bitmap area are preserved, bits following the bitmap
+// area may be clobbered.
+
+template <class Generator>
+void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
+ if (length == 0) {
+ return;
+ }
+ uint8_t* cur = bitmap + start_offset / 8;
+ uint8_t bit_mask = BitUtil::kBitmask[start_offset % 8];
+ uint8_t current_byte = *cur & BitUtil::kPrecedingBitmask[start_offset % 8];
+
+ for (int64_t index = 0; index < length; ++index) {
+ const bool bit = g();
+ current_byte = bit ? (current_byte | bit_mask) : current_byte;
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ if (bit_mask == 0) {
+ bit_mask = 1;
+ *cur++ = current_byte;
+ current_byte = 0;
+ }
+ }
+ if (bit_mask != 1) {
+ *cur++ = current_byte;
+ }
+}
+
+// Like GenerateBits(), but unrolls its main loop for higher performance.
+
+template <class Generator>
+void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
+ Generator&& g) {
static_assert(std::is_same<typename std::result_of<Generator && ()>::type, bool>::value,
"Functor passed to GenerateBitsUnrolled must return bool");
- if (length == 0) {
- return;
- }
- uint8_t current_byte;
- uint8_t* cur = bitmap + start_offset / 8;
- const uint64_t start_bit_offset = start_offset % 8;
- uint8_t bit_mask = BitUtil::kBitmask[start_bit_offset];
- int64_t remaining = length;
-
- if (bit_mask != 0x01) {
- current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
- while (bit_mask != 0 && remaining > 0) {
+ if (length == 0) {
+ return;
+ }
+ uint8_t current_byte;
+ uint8_t* cur = bitmap + start_offset / 8;
+ const uint64_t start_bit_offset = start_offset % 8;
+ uint8_t bit_mask = BitUtil::kBitmask[start_bit_offset];
+ int64_t remaining = length;
+
+ if (bit_mask != 0x01) {
+ current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
+ while (bit_mask != 0 && remaining > 0) {
current_byte |= g() * bit_mask;
- bit_mask = static_cast<uint8_t>(bit_mask << 1);
- --remaining;
- }
- *cur++ = current_byte;
- }
-
- int64_t remaining_bytes = remaining / 8;
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ --remaining;
+ }
+ *cur++ = current_byte;
+ }
+
+ int64_t remaining_bytes = remaining / 8;
uint8_t out_results[8];
- while (remaining_bytes-- > 0) {
+ while (remaining_bytes-- > 0) {
for (int i = 0; i < 8; ++i) {
out_results[i] = g();
}
*cur++ = (out_results[0] | out_results[1] << 1 | out_results[2] << 2 |
out_results[3] << 3 | out_results[4] << 4 | out_results[5] << 5 |
out_results[6] << 6 | out_results[7] << 7);
- }
-
- int64_t remaining_bits = remaining % 8;
- if (remaining_bits) {
- current_byte = 0;
- bit_mask = 0x01;
- while (remaining_bits-- > 0) {
+ }
+
+ int64_t remaining_bits = remaining % 8;
+ if (remaining_bits) {
+ current_byte = 0;
+ bit_mask = 0x01;
+ while (remaining_bits-- > 0) {
current_byte |= g() * bit_mask;
- bit_mask = static_cast<uint8_t>(bit_mask << 1);
- }
- *cur++ = current_byte;
- }
-}
-
-} // namespace internal
-} // namespace arrow
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ }
+ *cur++ = current_byte;
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
index 63c8b008f4a..241b06d2dfd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
@@ -1,220 +1,220 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bitmap_ops.h"
-
-#include <cstdint>
-#include <cstring>
-#include <functional>
-#include <memory>
-
-#include "arrow/buffer.h"
-#include "arrow/result.h"
-#include "arrow/util/align_util.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_reader.h"
-#include "arrow/util/bitmap_writer.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace internal {
-
-int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
- constexpr int64_t pop_len = sizeof(uint64_t) * 8;
- DCHECK_GE(bit_offset, 0);
- int64_t count = 0;
-
- const auto p = BitmapWordAlign<pop_len / 8>(data, bit_offset, length);
- for (int64_t i = bit_offset; i < bit_offset + p.leading_bits; ++i) {
- if (BitUtil::GetBit(data, i)) {
- ++count;
- }
- }
-
- if (p.aligned_words > 0) {
- // popcount as much as possible with the widest possible count
- const uint64_t* u64_data = reinterpret_cast<const uint64_t*>(p.aligned_start);
- DCHECK_EQ(reinterpret_cast<size_t>(u64_data) & 7, 0);
- const uint64_t* end = u64_data + p.aligned_words;
-
- constexpr int64_t kCountUnrollFactor = 4;
- const int64_t words_rounded = BitUtil::RoundDown(p.aligned_words, kCountUnrollFactor);
- int64_t count_unroll[kCountUnrollFactor] = {0};
-
- // Unroll the loop for better performance
- for (int64_t i = 0; i < words_rounded; i += kCountUnrollFactor) {
- for (int64_t k = 0; k < kCountUnrollFactor; k++) {
- count_unroll[k] += BitUtil::PopCount(u64_data[k]);
- }
- u64_data += kCountUnrollFactor;
- }
- for (int64_t k = 0; k < kCountUnrollFactor; k++) {
- count += count_unroll[k];
- }
-
- // The trailing part
- for (; u64_data < end; ++u64_data) {
- count += BitUtil::PopCount(*u64_data);
- }
- }
-
- // Account for left over bits (in theory we could fall back to smaller
- // versions of popcount but the code complexity is likely not worth it)
- for (int64_t i = p.trailing_bit_offset; i < bit_offset + length; ++i) {
- if (BitUtil::GetBit(data, i)) {
- ++count;
- }
- }
-
- return count;
-}
-
-enum class TransferMode : bool { Copy, Invert };
-
-template <TransferMode mode>
-void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
- int64_t dest_offset, uint8_t* dest) {
- int64_t bit_offset = offset % 8;
- int64_t dest_bit_offset = dest_offset % 8;
-
- if (bit_offset || dest_bit_offset) {
- auto reader = internal::BitmapWordReader<uint64_t>(data, offset, length);
- auto writer = internal::BitmapWordWriter<uint64_t>(dest, dest_offset, length);
-
- auto nwords = reader.words();
- while (nwords--) {
- auto word = reader.NextWord();
- writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
- }
- auto nbytes = reader.trailing_bytes();
- while (nbytes--) {
- int valid_bits;
- auto byte = reader.NextTrailingByte(valid_bits);
- writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
- }
- } else if (length) {
- int64_t num_bytes = BitUtil::BytesForBits(length);
-
- // Shift by its byte offset
- data += offset / 8;
- dest += dest_offset / 8;
-
- // Take care of the trailing bits in the last byte
- // E.g., if trailing_bits = 5, last byte should be
- // - low 3 bits: new bits from last byte of data buffer
- // - high 5 bits: old bits from last byte of dest buffer
- int64_t trailing_bits = num_bytes * 8 - length;
- uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1;
- uint8_t last_data;
-
- if (mode == TransferMode::Invert) {
- for (int64_t i = 0; i < num_bytes - 1; i++) {
- dest[i] = static_cast<uint8_t>(~(data[i]));
- }
- last_data = ~data[num_bytes - 1];
- } else {
- std::memcpy(dest, data, static_cast<size_t>(num_bytes - 1));
- last_data = data[num_bytes - 1];
- }
-
- // Set last byte
- dest[num_bytes - 1] &= ~trail_mask;
- dest[num_bytes - 1] |= last_data & trail_mask;
- }
-}
-
-template <TransferMode mode>
-Result<std::shared_ptr<Buffer>> TransferBitmap(MemoryPool* pool, const uint8_t* data,
- int64_t offset, int64_t length) {
- ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool));
- uint8_t* dest = buffer->mutable_data();
-
- TransferBitmap<mode>(data, offset, length, 0, dest);
-
- // As we have freshly allocated this bitmap, we should take care of zeroing the
- // remaining bits.
- int64_t num_bytes = BitUtil::BytesForBits(length);
- int64_t bits_to_zero = num_bytes * 8 - length;
- for (int64_t i = length; i < length + bits_to_zero; ++i) {
- // Both branches may copy extra bits - unsetting to match specification.
- BitUtil::ClearBit(dest, i);
- }
- return buffer;
-}
-
-void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
- int64_t dest_offset) {
- TransferBitmap<TransferMode::Copy>(data, offset, length, dest_offset, dest);
-}
-
-void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
- int64_t dest_offset) {
- TransferBitmap<TransferMode::Invert>(data, offset, length, dest_offset, dest);
-}
-
-Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data,
- int64_t offset, int64_t length) {
- return TransferBitmap<TransferMode::Copy>(pool, data, offset, length);
-}
-
-Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bitmap_ops.h"
+
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/util/align_util.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
+ constexpr int64_t pop_len = sizeof(uint64_t) * 8;
+ DCHECK_GE(bit_offset, 0);
+ int64_t count = 0;
+
+ const auto p = BitmapWordAlign<pop_len / 8>(data, bit_offset, length);
+ for (int64_t i = bit_offset; i < bit_offset + p.leading_bits; ++i) {
+ if (BitUtil::GetBit(data, i)) {
+ ++count;
+ }
+ }
+
+ if (p.aligned_words > 0) {
+ // popcount as much as possible with the widest possible count
+ const uint64_t* u64_data = reinterpret_cast<const uint64_t*>(p.aligned_start);
+ DCHECK_EQ(reinterpret_cast<size_t>(u64_data) & 7, 0);
+ const uint64_t* end = u64_data + p.aligned_words;
+
+ constexpr int64_t kCountUnrollFactor = 4;
+ const int64_t words_rounded = BitUtil::RoundDown(p.aligned_words, kCountUnrollFactor);
+ int64_t count_unroll[kCountUnrollFactor] = {0};
+
+ // Unroll the loop for better performance
+ for (int64_t i = 0; i < words_rounded; i += kCountUnrollFactor) {
+ for (int64_t k = 0; k < kCountUnrollFactor; k++) {
+ count_unroll[k] += BitUtil::PopCount(u64_data[k]);
+ }
+ u64_data += kCountUnrollFactor;
+ }
+ for (int64_t k = 0; k < kCountUnrollFactor; k++) {
+ count += count_unroll[k];
+ }
+
+ // The trailing part
+ for (; u64_data < end; ++u64_data) {
+ count += BitUtil::PopCount(*u64_data);
+ }
+ }
+
+ // Account for left over bits (in theory we could fall back to smaller
+ // versions of popcount but the code complexity is likely not worth it)
+ for (int64_t i = p.trailing_bit_offset; i < bit_offset + length; ++i) {
+ if (BitUtil::GetBit(data, i)) {
+ ++count;
+ }
+ }
+
+ return count;
+}
+
+enum class TransferMode : bool { Copy, Invert };
+
+template <TransferMode mode>
+void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
+ int64_t dest_offset, uint8_t* dest) {
+ int64_t bit_offset = offset % 8;
+ int64_t dest_bit_offset = dest_offset % 8;
+
+ if (bit_offset || dest_bit_offset) {
+ auto reader = internal::BitmapWordReader<uint64_t>(data, offset, length);
+ auto writer = internal::BitmapWordWriter<uint64_t>(dest, dest_offset, length);
+
+ auto nwords = reader.words();
+ while (nwords--) {
+ auto word = reader.NextWord();
+ writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
+ }
+ auto nbytes = reader.trailing_bytes();
+ while (nbytes--) {
+ int valid_bits;
+ auto byte = reader.NextTrailingByte(valid_bits);
+ writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
+ }
+ } else if (length) {
+ int64_t num_bytes = BitUtil::BytesForBits(length);
+
+ // Shift by its byte offset
+ data += offset / 8;
+ dest += dest_offset / 8;
+
+ // Take care of the trailing bits in the last byte
+ // E.g., if trailing_bits = 5, last byte should be
+ // - low 3 bits: new bits from last byte of data buffer
+ // - high 5 bits: old bits from last byte of dest buffer
+ int64_t trailing_bits = num_bytes * 8 - length;
+ uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1;
+ uint8_t last_data;
+
+ if (mode == TransferMode::Invert) {
+ for (int64_t i = 0; i < num_bytes - 1; i++) {
+ dest[i] = static_cast<uint8_t>(~(data[i]));
+ }
+ last_data = ~data[num_bytes - 1];
+ } else {
+ std::memcpy(dest, data, static_cast<size_t>(num_bytes - 1));
+ last_data = data[num_bytes - 1];
+ }
+
+ // Set last byte
+ dest[num_bytes - 1] &= ~trail_mask;
+ dest[num_bytes - 1] |= last_data & trail_mask;
+ }
+}
+
+template <TransferMode mode>
+Result<std::shared_ptr<Buffer>> TransferBitmap(MemoryPool* pool, const uint8_t* data,
+ int64_t offset, int64_t length) {
+ ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool));
+ uint8_t* dest = buffer->mutable_data();
+
+ TransferBitmap<mode>(data, offset, length, 0, dest);
+
+ // As we have freshly allocated this bitmap, we should take care of zeroing the
+ // remaining bits.
+ int64_t num_bytes = BitUtil::BytesForBits(length);
+ int64_t bits_to_zero = num_bytes * 8 - length;
+ for (int64_t i = length; i < length + bits_to_zero; ++i) {
+ // Both branches may copy extra bits - unsetting to match specification.
+ BitUtil::ClearBit(dest, i);
+ }
+ return buffer;
+}
+
+void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
+ int64_t dest_offset) {
+ TransferBitmap<TransferMode::Copy>(data, offset, length, dest_offset, dest);
+}
+
+void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
+ int64_t dest_offset) {
+ TransferBitmap<TransferMode::Invert>(data, offset, length, dest_offset, dest);
+}
+
+Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data,
+ int64_t offset, int64_t length) {
+ return TransferBitmap<TransferMode::Copy>(pool, data, offset, length);
+}
+
+Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
int64_t offset, int64_t length) {
- return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
-}
-
-bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length) {
- if (left_offset % 8 == 0 && right_offset % 8 == 0) {
- // byte aligned, can use memcmp
- bool bytes_equal =
- std::memcmp(left + left_offset / 8, right + right_offset / 8, length / 8) == 0;
- if (!bytes_equal) {
- return false;
- }
- for (int64_t i = (length / 8) * 8; i < length; ++i) {
- if (BitUtil::GetBit(left, left_offset + i) !=
- BitUtil::GetBit(right, right_offset + i)) {
- return false;
- }
- }
- return true;
- }
-
- // Unaligned slow case
- auto left_reader = internal::BitmapWordReader<uint64_t>(left, left_offset, length);
- auto right_reader = internal::BitmapWordReader<uint64_t>(right, right_offset, length);
-
- auto nwords = left_reader.words();
- while (nwords--) {
- if (left_reader.NextWord() != right_reader.NextWord()) {
- return false;
- }
- }
- auto nbytes = left_reader.trailing_bytes();
- while (nbytes--) {
- int valid_bits;
- if (left_reader.NextTrailingByte(valid_bits) !=
- right_reader.NextTrailingByte(valid_bits)) {
- return false;
- }
- }
- return true;
-}
-
+ return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
+}
+
+bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length) {
+ if (left_offset % 8 == 0 && right_offset % 8 == 0) {
+ // byte aligned, can use memcmp
+ bool bytes_equal =
+ std::memcmp(left + left_offset / 8, right + right_offset / 8, length / 8) == 0;
+ if (!bytes_equal) {
+ return false;
+ }
+ for (int64_t i = (length / 8) * 8; i < length; ++i) {
+ if (BitUtil::GetBit(left, left_offset + i) !=
+ BitUtil::GetBit(right, right_offset + i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // Unaligned slow case
+ auto left_reader = internal::BitmapWordReader<uint64_t>(left, left_offset, length);
+ auto right_reader = internal::BitmapWordReader<uint64_t>(right, right_offset, length);
+
+ auto nwords = left_reader.words();
+ while (nwords--) {
+ if (left_reader.NextWord() != right_reader.NextWord()) {
+ return false;
+ }
+ }
+ auto nbytes = left_reader.trailing_bytes();
+ while (nbytes--) {
+ int valid_bits;
+ if (left_reader.NextTrailingByte(valid_bits) !=
+ right_reader.NextTrailingByte(valid_bits)) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length) {
if (left == nullptr && right == nullptr) {
@@ -235,117 +235,117 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offs
right ? right->data() : nullptr, right_offset, length);
}
-namespace {
-
-template <template <typename> class BitOp>
-void AlignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, uint8_t* out, int64_t out_offset,
- int64_t length) {
- BitOp<uint8_t> op;
- DCHECK_EQ(left_offset % 8, right_offset % 8);
- DCHECK_EQ(left_offset % 8, out_offset % 8);
-
- const int64_t nbytes = BitUtil::BytesForBits(length + left_offset % 8);
- left += left_offset / 8;
- right += right_offset / 8;
- out += out_offset / 8;
- for (int64_t i = 0; i < nbytes; ++i) {
- out[i] = op(left[i], right[i]);
- }
-}
-
-template <template <typename> class BitOp>
-void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, uint8_t* out, int64_t out_offset,
- int64_t length) {
- BitOp<uint64_t> op_word;
- BitOp<uint8_t> op_byte;
-
- auto left_reader = internal::BitmapWordReader<uint64_t>(left, left_offset, length);
- auto right_reader = internal::BitmapWordReader<uint64_t>(right, right_offset, length);
- auto writer = internal::BitmapWordWriter<uint64_t>(out, out_offset, length);
-
- auto nwords = left_reader.words();
- while (nwords--) {
- writer.PutNextWord(op_word(left_reader.NextWord(), right_reader.NextWord()));
- }
- auto nbytes = left_reader.trailing_bytes();
- while (nbytes--) {
- int left_valid_bits, right_valid_bits;
- uint8_t left_byte = left_reader.NextTrailingByte(left_valid_bits);
- uint8_t right_byte = right_reader.NextTrailingByte(right_valid_bits);
- DCHECK_EQ(left_valid_bits, right_valid_bits);
- writer.PutNextTrailingByte(op_byte(left_byte, right_byte), left_valid_bits);
- }
-}
-
-template <template <typename> class BitOp>
-void BitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* dest) {
- if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) {
- // Fast case: can use bytewise AND
- AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
- length);
- } else {
- // Unaligned
- UnalignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
- length);
- }
-}
-
-template <template <typename> class BitOp>
-Result<std::shared_ptr<Buffer>> BitmapOp(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset) {
- const int64_t phys_bits = length + out_offset;
- ARROW_ASSIGN_OR_RAISE(auto out_buffer, AllocateEmptyBitmap(phys_bits, pool));
- BitmapOp<BitOp>(left, left_offset, right, right_offset, length, out_offset,
- out_buffer->mutable_data());
- return out_buffer;
-}
-
-} // namespace
-
-Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset) {
- return BitmapOp<std::bit_and>(pool, left, left_offset, right, right_offset, length,
- out_offset);
-}
-
-void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
- BitmapOp<std::bit_and>(left, left_offset, right, right_offset, length, out_offset, out);
-}
-
-Result<std::shared_ptr<Buffer>> BitmapOr(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset) {
- return BitmapOp<std::bit_or>(pool, left, left_offset, right, right_offset, length,
- out_offset);
-}
-
-void BitmapOr(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
- BitmapOp<std::bit_or>(left, left_offset, right, right_offset, length, out_offset, out);
-}
-
-Result<std::shared_ptr<Buffer>> BitmapXor(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset) {
- return BitmapOp<std::bit_xor>(pool, left, left_offset, right, right_offset, length,
- out_offset);
-}
-
-void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
- BitmapOp<std::bit_xor>(left, left_offset, right, right_offset, length, out_offset, out);
-}
-
+namespace {
+
+template <template <typename> class BitOp>
+void AlignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, uint8_t* out, int64_t out_offset,
+ int64_t length) {
+ BitOp<uint8_t> op;
+ DCHECK_EQ(left_offset % 8, right_offset % 8);
+ DCHECK_EQ(left_offset % 8, out_offset % 8);
+
+ const int64_t nbytes = BitUtil::BytesForBits(length + left_offset % 8);
+ left += left_offset / 8;
+ right += right_offset / 8;
+ out += out_offset / 8;
+ for (int64_t i = 0; i < nbytes; ++i) {
+ out[i] = op(left[i], right[i]);
+ }
+}
+
+template <template <typename> class BitOp>
+void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, uint8_t* out, int64_t out_offset,
+ int64_t length) {
+ BitOp<uint64_t> op_word;
+ BitOp<uint8_t> op_byte;
+
+ auto left_reader = internal::BitmapWordReader<uint64_t>(left, left_offset, length);
+ auto right_reader = internal::BitmapWordReader<uint64_t>(right, right_offset, length);
+ auto writer = internal::BitmapWordWriter<uint64_t>(out, out_offset, length);
+
+ auto nwords = left_reader.words();
+ while (nwords--) {
+ writer.PutNextWord(op_word(left_reader.NextWord(), right_reader.NextWord()));
+ }
+ auto nbytes = left_reader.trailing_bytes();
+ while (nbytes--) {
+ int left_valid_bits, right_valid_bits;
+ uint8_t left_byte = left_reader.NextTrailingByte(left_valid_bits);
+ uint8_t right_byte = right_reader.NextTrailingByte(right_valid_bits);
+ DCHECK_EQ(left_valid_bits, right_valid_bits);
+ writer.PutNextTrailingByte(op_byte(left_byte, right_byte), left_valid_bits);
+ }
+}
+
+template <template <typename> class BitOp>
+void BitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* dest) {
+ if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) {
+ // Fast case: can use bytewise AND
+ AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
+ length);
+ } else {
+ // Unaligned
+ UnalignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
+ length);
+ }
+}
+
+template <template <typename> class BitOp>
+Result<std::shared_ptr<Buffer>> BitmapOp(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset) {
+ const int64_t phys_bits = length + out_offset;
+ ARROW_ASSIGN_OR_RAISE(auto out_buffer, AllocateEmptyBitmap(phys_bits, pool));
+ BitmapOp<BitOp>(left, left_offset, right, right_offset, length, out_offset,
+ out_buffer->mutable_data());
+ return out_buffer;
+}
+
+} // namespace
+
+Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset) {
+ return BitmapOp<std::bit_and>(pool, left, left_offset, right, right_offset, length,
+ out_offset);
+}
+
+void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+ BitmapOp<std::bit_and>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
+Result<std::shared_ptr<Buffer>> BitmapOr(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset) {
+ return BitmapOp<std::bit_or>(pool, left, left_offset, right, right_offset, length,
+ out_offset);
+}
+
+void BitmapOr(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+ BitmapOp<std::bit_or>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
+Result<std::shared_ptr<Buffer>> BitmapXor(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset) {
+ return BitmapOp<std::bit_xor>(pool, left, left_offset, right, right_offset, length,
+ out_offset);
+}
+
+void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+ BitmapOp<std::bit_xor>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
template <typename T>
struct AndNotOp {
constexpr T operator()(const T& l, const T& r) const { return l & ~r; }
@@ -383,5 +383,5 @@ void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
BitmapOp<OrNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
index 40a7797a239..a756653841a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
@@ -1,101 +1,101 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/result.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Buffer;
-class MemoryPool;
-
-namespace internal {
-
-// ----------------------------------------------------------------------
-// Bitmap utilities
-
-/// Copy a bit range of an existing bitmap
-///
-/// \param[in] pool memory pool to allocate memory from
-/// \param[in] bitmap source data
-/// \param[in] offset bit offset into the source data
-/// \param[in] length number of bits to copy
-///
-/// \return Status message
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* bitmap,
- int64_t offset, int64_t length);
-
-/// Copy a bit range of an existing bitmap into an existing bitmap
-///
-/// \param[in] bitmap source data
-/// \param[in] offset bit offset into the source data
-/// \param[in] length number of bits to copy
-/// \param[in] dest_offset bit offset into the destination
-/// \param[out] dest the destination buffer, must have at least space for
-/// (offset + length) bits
-ARROW_EXPORT
-void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
- int64_t dest_offset);
-
-/// Invert a bit range of an existing bitmap into an existing bitmap
-///
-/// \param[in] bitmap source data
-/// \param[in] offset bit offset into the source data
-/// \param[in] length number of bits to copy
-/// \param[in] dest_offset bit offset into the destination
-/// \param[out] dest the destination buffer, must have at least space for
-/// (offset + length) bits
-ARROW_EXPORT
-void InvertBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
- int64_t dest_offset);
-
-/// Invert a bit range of an existing bitmap
-///
-/// \param[in] pool memory pool to allocate memory from
-/// \param[in] bitmap source data
-/// \param[in] offset bit offset into the source data
-/// \param[in] length number of bits to copy
-///
-/// \return Status message
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* bitmap,
- int64_t offset, int64_t length);
-
-/// Compute the number of 1's in the given data array
-///
-/// \param[in] data a packed LSB-ordered bitmap as a byte array
-/// \param[in] bit_offset a bitwise offset into the bitmap
-/// \param[in] length the number of bits to inspect in the bitmap relative to
-/// the offset
-///
-/// \return The number of set (1) bits in the range
-ARROW_EXPORT
-int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length);
-
-ARROW_EXPORT
-bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length);
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/result.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+
+namespace internal {
+
+// ----------------------------------------------------------------------
+// Bitmap utilities
+
+/// Copy a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+///
+/// \return Status message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* bitmap,
+ int64_t offset, int64_t length);
+
+/// Copy a bit range of an existing bitmap into an existing bitmap
+///
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+/// \param[in] dest_offset bit offset into the destination
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
+ARROW_EXPORT
+void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
+ int64_t dest_offset);
+
+/// Invert a bit range of an existing bitmap into an existing bitmap
+///
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+/// \param[in] dest_offset bit offset into the destination
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
+ARROW_EXPORT
+void InvertBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
+ int64_t dest_offset);
+
+/// Invert a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+///
+/// \return Status message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* bitmap,
+ int64_t offset, int64_t length);
+
+/// Compute the number of 1's in the given data array
+///
+/// \param[in] data a packed LSB-ordered bitmap as a byte array
+/// \param[in] bit_offset a bitwise offset into the bitmap
+/// \param[in] length the number of bits to inspect in the bitmap relative to
+/// the offset
+///
+/// \return The number of set (1) bits in the range
+ARROW_EXPORT
+int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length);
+
+ARROW_EXPORT
+bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length);
+
// Same as BitmapEquals, but considers a NULL bitmap pointer the same as an
// all-ones bitmap.
ARROW_EXPORT
@@ -107,63 +107,63 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offs
const std::shared_ptr<Buffer>& right, int64_t right_offset,
int64_t length);
-/// \brief Do a "bitmap and" on right and left buffers starting at
-/// their respective bit-offsets for the given bit-length and put
-/// the results in out_buffer starting at the given bit-offset.
-///
-/// out_buffer will be allocated and initialized to zeros using pool before
-/// the operation.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset);
-
-/// \brief Do a "bitmap and" on right and left buffers starting at
-/// their respective bit-offsets for the given bit-length and put
-/// the results in out starting at the given bit-offset.
-ARROW_EXPORT
-void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
-
-/// \brief Do a "bitmap or" for the given bit length on right and left buffers
-/// starting at their respective bit-offsets and put the results in out_buffer
-/// starting at the given bit-offset.
-///
-/// out_buffer will be allocated and initialized to zeros using pool before
-/// the operation.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> BitmapOr(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset);
-
-/// \brief Do a "bitmap or" for the given bit length on right and left buffers
-/// starting at their respective bit-offsets and put the results in out
-/// starting at the given bit-offset.
-ARROW_EXPORT
-void BitmapOr(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
-
-/// \brief Do a "bitmap xor" for the given bit-length on right and left
-/// buffers starting at their respective bit-offsets and put the results in
-/// out_buffer starting at the given bit offset.
-///
-/// out_buffer will be allocated and initialized to zeros using pool before
-/// the operation.
-ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> BitmapXor(MemoryPool* pool, const uint8_t* left,
- int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length,
- int64_t out_offset);
-
-/// \brief Do a "bitmap xor" for the given bit-length on right and left
-/// buffers starting at their respective bit-offsets and put the results in
-/// out starting at the given bit offset.
-ARROW_EXPORT
-void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
- int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
-
+/// \brief Do a "bitmap and" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset);
+
+/// \brief Do a "bitmap and" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap or" for the given bit length on right and left buffers
+/// starting at their respective bit-offsets and put the results in out_buffer
+/// starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOr(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset);
+
+/// \brief Do a "bitmap or" for the given bit length on right and left buffers
+/// starting at their respective bit-offsets and put the results in out
+/// starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOr(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap xor" for the given bit-length on right and left
+/// buffers starting at their respective bit-offsets and put the results in
+/// out_buffer starting at the given bit offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapXor(MemoryPool* pool, const uint8_t* left,
+ int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length,
+ int64_t out_offset);
+
+/// \brief Do a "bitmap xor" for the given bit-length on right and left
+/// buffers starting at their respective bit-offsets and put the results in
+/// out starting at the given bit offset.
+ARROW_EXPORT
+void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+ int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
/// \brief Do a "bitmap and not" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and put
/// the results in out_buffer starting at the given bit-offset.
@@ -202,5 +202,5 @@ ARROW_EXPORT
void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
index 7c43747fafb..6b7ce3bfe61 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
@@ -1,75 +1,75 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-
-#include "arrow/buffer.h"
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/buffer.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace internal {
-
-class BitmapReader {
- public:
- BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
- : bitmap_(bitmap), position_(0), length_(length) {
- current_byte_ = 0;
- byte_offset_ = start_offset / 8;
- bit_offset_ = start_offset % 8;
- if (length > 0) {
- current_byte_ = bitmap[byte_offset_];
- }
- }
-
- bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
-
- bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
-
- void Next() {
- ++bit_offset_;
- ++position_;
- if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
- bit_offset_ = 0;
- ++byte_offset_;
- if (ARROW_PREDICT_TRUE(position_ < length_)) {
- current_byte_ = bitmap_[byte_offset_];
- }
- }
- }
-
- int64_t position() const { return position_; }
-
- int64_t length() const { return length_; }
-
- private:
- const uint8_t* bitmap_;
- int64_t position_;
- int64_t length_;
-
- uint8_t current_byte_;
- int64_t byte_offset_;
- int64_t bit_offset_;
-};
-
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+class BitmapReader {
+ public:
+ BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap), position_(0), length_(length) {
+ current_byte_ = 0;
+ byte_offset_ = start_offset / 8;
+ bit_offset_ = start_offset % 8;
+ if (length > 0) {
+ current_byte_ = bitmap[byte_offset_];
+ }
+ }
+
+ bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
+
+ bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
+
+ void Next() {
+ ++bit_offset_;
+ ++position_;
+ if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
+ bit_offset_ = 0;
+ ++byte_offset_;
+ if (ARROW_PREDICT_TRUE(position_ < length_)) {
+ current_byte_ = bitmap_[byte_offset_];
+ }
+ }
+ }
+
+ int64_t position() const { return position_; }
+
+ int64_t length() const { return length_; }
+
+ private:
+ const uint8_t* bitmap_;
+ int64_t position_;
+ int64_t length_;
+
+ uint8_t current_byte_;
+ int64_t byte_offset_;
+ int64_t bit_offset_;
+};
+
// XXX Cannot name it BitmapWordReader because the name is already used
// in bitmap_ops.cc
@@ -254,18 +254,18 @@ class BitmapWordReader {
}
};
-/// \brief Index into a possibly non-existent bitmap
-struct OptionalBitIndexer {
- const uint8_t* bitmap;
- const int64_t offset;
-
- explicit OptionalBitIndexer(const std::shared_ptr<Buffer>& buffer, int64_t offset = 0)
- : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {}
-
- bool operator[](int64_t i) const {
+/// \brief Index into a possibly non-existent bitmap
+struct OptionalBitIndexer {
+ const uint8_t* bitmap;
+ const int64_t offset;
+
+ explicit OptionalBitIndexer(const std::shared_ptr<Buffer>& buffer, int64_t offset = 0)
+ : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {}
+
+ bool operator[](int64_t i) const {
return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i);
- }
-};
-
-} // namespace internal
-} // namespace arrow
+ }
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
index d5c6d909df0..50ff80e2e80 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
@@ -1,185 +1,185 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace internal {
-
-class BitmapWriter {
- // A sequential bitwise writer that preserves surrounding bit values.
-
- public:
- BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
- : bitmap_(bitmap), position_(0), length_(length) {
- byte_offset_ = start_offset / 8;
- bit_mask_ = BitUtil::kBitmask[start_offset % 8];
- if (length > 0) {
- current_byte_ = bitmap[byte_offset_];
- } else {
- current_byte_ = 0;
- }
- }
-
- void Set() { current_byte_ |= bit_mask_; }
-
- void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
-
- void Next() {
- bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
- ++position_;
- if (bit_mask_ == 0) {
- // Finished this byte, need advancing
- bit_mask_ = 0x01;
- bitmap_[byte_offset_++] = current_byte_;
- if (ARROW_PREDICT_TRUE(position_ < length_)) {
- current_byte_ = bitmap_[byte_offset_];
- }
- }
- }
-
- void Finish() {
- // Store current byte if we didn't went past bitmap storage
- if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
- bitmap_[byte_offset_] = current_byte_;
- }
- }
-
- int64_t position() const { return position_; }
-
- private:
- uint8_t* bitmap_;
- int64_t position_;
- int64_t length_;
-
- uint8_t current_byte_;
- uint8_t bit_mask_;
- int64_t byte_offset_;
-};
-
-class FirstTimeBitmapWriter {
- // Like BitmapWriter, but any bit values *following* the bits written
- // might be clobbered. It is hence faster than BitmapWriter, and can
- // also avoid false positives with Valgrind.
-
- public:
- FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
- : bitmap_(bitmap), position_(0), length_(length) {
- current_byte_ = 0;
- byte_offset_ = start_offset / 8;
- bit_mask_ = BitUtil::kBitmask[start_offset % 8];
- if (length > 0) {
- current_byte_ = bitmap[byte_offset_] & BitUtil::kPrecedingBitmask[start_offset % 8];
- } else {
- current_byte_ = 0;
- }
- }
-
- /// Appends number_of_bits from word to valid_bits and valid_bits_offset.
- ///
- /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
- /// to be unset (i.e. 0).
- /// \param[in] number_of_bits The number of bits to append from word.
- void AppendWord(uint64_t word, int64_t number_of_bits) {
- if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
- return;
- }
-
- // Location that the first byte needs to be written to.
- uint8_t* append_position = bitmap_ + byte_offset_;
-
- // Update state variables except for current_byte_ here.
- position_ += number_of_bits;
- int64_t bit_offset = BitUtil::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
- bit_mask_ = BitUtil::kBitmask[(bit_offset + number_of_bits) % 8];
- byte_offset_ += (bit_offset + number_of_bits) / 8;
-
- if (bit_offset != 0) {
- // We are in the middle of the byte. This code updates the byte and shifts
- // bits appropriately within word so it can be memcpy'd below.
- int64_t bits_to_carry = 8 - bit_offset;
- // Carry over bits from word to current_byte_. We assume any extra bits in word
- // unset so no additional accounting is needed for when number_of_bits <
- // bits_to_carry.
- current_byte_ |= (word & BitUtil::kPrecedingBitmask[bits_to_carry]) << bit_offset;
- // Check if everything is transfered into current_byte_.
- if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
- return;
- }
- *append_position = current_byte_;
- append_position++;
- // Move the carry bits off of word.
- word = word >> bits_to_carry;
- number_of_bits -= bits_to_carry;
- }
- word = BitUtil::ToLittleEndian(word);
- int64_t bytes_for_word = ::arrow::BitUtil::BytesForBits(number_of_bits);
- std::memcpy(append_position, &word, bytes_for_word);
- // At this point, the previous current_byte_ has been written to bitmap_.
- // The new current_byte_ is either the last relevant byte in 'word'
- // or cleared if the new position is byte aligned (i.e. a fresh byte).
- if (bit_mask_ == 0x1) {
- current_byte_ = 0;
- } else {
- current_byte_ = *(append_position + bytes_for_word - 1);
- }
- }
-
- void Set() { current_byte_ |= bit_mask_; }
-
- void Clear() {}
-
- void Next() {
- bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
- ++position_;
- if (bit_mask_ == 0) {
- // Finished this byte, need advancing
- bit_mask_ = 0x01;
- bitmap_[byte_offset_++] = current_byte_;
- current_byte_ = 0;
- }
- }
-
- void Finish() {
- // Store current byte if we didn't went go bitmap storage
- if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
- bitmap_[byte_offset_] = current_byte_;
- }
- }
-
- int64_t position() const { return position_; }
-
- private:
- uint8_t* bitmap_;
- int64_t position_;
- int64_t length_;
-
- uint8_t current_byte_;
- uint8_t bit_mask_;
- int64_t byte_offset_;
-};
-
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+class BitmapWriter {
+ // A sequential bitwise writer that preserves surrounding bit values.
+
+ public:
+ BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap), position_(0), length_(length) {
+ byte_offset_ = start_offset / 8;
+ bit_mask_ = BitUtil::kBitmask[start_offset % 8];
+ if (length > 0) {
+ current_byte_ = bitmap[byte_offset_];
+ } else {
+ current_byte_ = 0;
+ }
+ }
+
+ void Set() { current_byte_ |= bit_mask_; }
+
+ void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
+
+ void Next() {
+ bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
+ ++position_;
+ if (bit_mask_ == 0) {
+ // Finished this byte, need advancing
+ bit_mask_ = 0x01;
+ bitmap_[byte_offset_++] = current_byte_;
+ if (ARROW_PREDICT_TRUE(position_ < length_)) {
+ current_byte_ = bitmap_[byte_offset_];
+ }
+ }
+ }
+
+ void Finish() {
+ // Store current byte if we didn't went past bitmap storage
+ if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
+ bitmap_[byte_offset_] = current_byte_;
+ }
+ }
+
+ int64_t position() const { return position_; }
+
+ private:
+ uint8_t* bitmap_;
+ int64_t position_;
+ int64_t length_;
+
+ uint8_t current_byte_;
+ uint8_t bit_mask_;
+ int64_t byte_offset_;
+};
+
+class FirstTimeBitmapWriter {
+ // Like BitmapWriter, but any bit values *following* the bits written
+ // might be clobbered. It is hence faster than BitmapWriter, and can
+ // also avoid false positives with Valgrind.
+
+ public:
+ FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap), position_(0), length_(length) {
+ current_byte_ = 0;
+ byte_offset_ = start_offset / 8;
+ bit_mask_ = BitUtil::kBitmask[start_offset % 8];
+ if (length > 0) {
+ current_byte_ = bitmap[byte_offset_] & BitUtil::kPrecedingBitmask[start_offset % 8];
+ } else {
+ current_byte_ = 0;
+ }
+ }
+
+ /// Appends number_of_bits from word to valid_bits and valid_bits_offset.
+ ///
+ /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
+ /// to be unset (i.e. 0).
+ /// \param[in] number_of_bits The number of bits to append from word.
+ void AppendWord(uint64_t word, int64_t number_of_bits) {
+ if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
+ return;
+ }
+
+ // Location that the first byte needs to be written to.
+ uint8_t* append_position = bitmap_ + byte_offset_;
+
+ // Update state variables except for current_byte_ here.
+ position_ += number_of_bits;
+ int64_t bit_offset = BitUtil::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
+ bit_mask_ = BitUtil::kBitmask[(bit_offset + number_of_bits) % 8];
+ byte_offset_ += (bit_offset + number_of_bits) / 8;
+
+ if (bit_offset != 0) {
+ // We are in the middle of the byte. This code updates the byte and shifts
+ // bits appropriately within word so it can be memcpy'd below.
+ int64_t bits_to_carry = 8 - bit_offset;
+ // Carry over bits from word to current_byte_. We assume any extra bits in word
+ // unset so no additional accounting is needed for when number_of_bits <
+ // bits_to_carry.
+ current_byte_ |= (word & BitUtil::kPrecedingBitmask[bits_to_carry]) << bit_offset;
+ // Check if everything is transfered into current_byte_.
+ if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
+ return;
+ }
+ *append_position = current_byte_;
+ append_position++;
+ // Move the carry bits off of word.
+ word = word >> bits_to_carry;
+ number_of_bits -= bits_to_carry;
+ }
+ word = BitUtil::ToLittleEndian(word);
+ int64_t bytes_for_word = ::arrow::BitUtil::BytesForBits(number_of_bits);
+ std::memcpy(append_position, &word, bytes_for_word);
+ // At this point, the previous current_byte_ has been written to bitmap_.
+ // The new current_byte_ is either the last relevant byte in 'word'
+ // or cleared if the new position is byte aligned (i.e. a fresh byte).
+ if (bit_mask_ == 0x1) {
+ current_byte_ = 0;
+ } else {
+ current_byte_ = *(append_position + bytes_for_word - 1);
+ }
+ }
+
+ void Set() { current_byte_ |= bit_mask_; }
+
+ void Clear() {}
+
+ void Next() {
+ bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
+ ++position_;
+ if (bit_mask_ == 0) {
+ // Finished this byte, need advancing
+ bit_mask_ = 0x01;
+ bitmap_[byte_offset_++] = current_byte_;
+ current_byte_ = 0;
+ }
+ }
+
+ void Finish() {
+ // Store current byte if we didn't went go bitmap storage
+ if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
+ bitmap_[byte_offset_] = current_byte_;
+ }
+ }
+
+ int64_t position() const { return position_; }
+
+ private:
+ uint8_t* bitmap_;
+ int64_t position_;
+ int64_t length_;
+
+ uint8_t current_byte_;
+ uint8_t bit_mask_;
+ int64_t byte_offset_;
+};
+
template <typename Word, bool may_have_byte_offset = true>
class BitmapWordWriter {
public:
@@ -281,5 +281,5 @@ class BitmapWordWriter {
}
};
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
index d9cafd602a2..ac8abe17e21 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
@@ -1,178 +1,178 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/bpacking.h"
-#include "arrow/util/bpacking_default.h"
-#include "arrow/util/cpu_info.h"
-#include "arrow/util/dispatch.h"
-#include "arrow/util/logging.h"
-
-#if defined(ARROW_HAVE_RUNTIME_AVX2)
-#error #include "arrow/util/bpacking_avx2.h"
-#endif
-#if defined(ARROW_HAVE_RUNTIME_AVX512)
-#error #include "arrow/util/bpacking_avx512.h"
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bpacking.h"
+#include "arrow/util/bpacking_default.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/dispatch.h"
+#include "arrow/util/logging.h"
+
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+#error #include "arrow/util/bpacking_avx2.h"
+#endif
+#if defined(ARROW_HAVE_RUNTIME_AVX512)
+#error #include "arrow/util/bpacking_avx512.h"
+#endif
#if defined(ARROW_HAVE_NEON)
#error #include "arrow/util/bpacking_neon.h"
#endif
-
-namespace arrow {
-namespace internal {
-
-namespace {
-
-int unpack32_default(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
- batch_size = batch_size / 32 * 32;
- int num_loops = batch_size / 32;
-
- switch (num_bits) {
- case 0:
- for (int i = 0; i < num_loops; ++i) in = nullunpacker32(in, out + i * 32);
- break;
- case 1:
- for (int i = 0; i < num_loops; ++i) in = unpack1_32(in, out + i * 32);
- break;
- case 2:
- for (int i = 0; i < num_loops; ++i) in = unpack2_32(in, out + i * 32);
- break;
- case 3:
- for (int i = 0; i < num_loops; ++i) in = unpack3_32(in, out + i * 32);
- break;
- case 4:
- for (int i = 0; i < num_loops; ++i) in = unpack4_32(in, out + i * 32);
- break;
- case 5:
- for (int i = 0; i < num_loops; ++i) in = unpack5_32(in, out + i * 32);
- break;
- case 6:
- for (int i = 0; i < num_loops; ++i) in = unpack6_32(in, out + i * 32);
- break;
- case 7:
- for (int i = 0; i < num_loops; ++i) in = unpack7_32(in, out + i * 32);
- break;
- case 8:
- for (int i = 0; i < num_loops; ++i) in = unpack8_32(in, out + i * 32);
- break;
- case 9:
- for (int i = 0; i < num_loops; ++i) in = unpack9_32(in, out + i * 32);
- break;
- case 10:
- for (int i = 0; i < num_loops; ++i) in = unpack10_32(in, out + i * 32);
- break;
- case 11:
- for (int i = 0; i < num_loops; ++i) in = unpack11_32(in, out + i * 32);
- break;
- case 12:
- for (int i = 0; i < num_loops; ++i) in = unpack12_32(in, out + i * 32);
- break;
- case 13:
- for (int i = 0; i < num_loops; ++i) in = unpack13_32(in, out + i * 32);
- break;
- case 14:
- for (int i = 0; i < num_loops; ++i) in = unpack14_32(in, out + i * 32);
- break;
- case 15:
- for (int i = 0; i < num_loops; ++i) in = unpack15_32(in, out + i * 32);
- break;
- case 16:
- for (int i = 0; i < num_loops; ++i) in = unpack16_32(in, out + i * 32);
- break;
- case 17:
- for (int i = 0; i < num_loops; ++i) in = unpack17_32(in, out + i * 32);
- break;
- case 18:
- for (int i = 0; i < num_loops; ++i) in = unpack18_32(in, out + i * 32);
- break;
- case 19:
- for (int i = 0; i < num_loops; ++i) in = unpack19_32(in, out + i * 32);
- break;
- case 20:
- for (int i = 0; i < num_loops; ++i) in = unpack20_32(in, out + i * 32);
- break;
- case 21:
- for (int i = 0; i < num_loops; ++i) in = unpack21_32(in, out + i * 32);
- break;
- case 22:
- for (int i = 0; i < num_loops; ++i) in = unpack22_32(in, out + i * 32);
- break;
- case 23:
- for (int i = 0; i < num_loops; ++i) in = unpack23_32(in, out + i * 32);
- break;
- case 24:
- for (int i = 0; i < num_loops; ++i) in = unpack24_32(in, out + i * 32);
- break;
- case 25:
- for (int i = 0; i < num_loops; ++i) in = unpack25_32(in, out + i * 32);
- break;
- case 26:
- for (int i = 0; i < num_loops; ++i) in = unpack26_32(in, out + i * 32);
- break;
- case 27:
- for (int i = 0; i < num_loops; ++i) in = unpack27_32(in, out + i * 32);
- break;
- case 28:
- for (int i = 0; i < num_loops; ++i) in = unpack28_32(in, out + i * 32);
- break;
- case 29:
- for (int i = 0; i < num_loops; ++i) in = unpack29_32(in, out + i * 32);
- break;
- case 30:
- for (int i = 0; i < num_loops; ++i) in = unpack30_32(in, out + i * 32);
- break;
- case 31:
- for (int i = 0; i < num_loops; ++i) in = unpack31_32(in, out + i * 32);
- break;
- case 32:
- for (int i = 0; i < num_loops; ++i) in = unpack32_32(in, out + i * 32);
- break;
- default:
- DCHECK(false) << "Unsupported num_bits";
- }
-
- return batch_size;
-}
-
-struct Unpack32DynamicFunction {
- using FunctionType = decltype(&unpack32_default);
-
- static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
- return {
- { DispatchLevel::NONE, unpack32_default }
-#if defined(ARROW_HAVE_RUNTIME_AVX2)
- , { DispatchLevel::AVX2, unpack32_avx2 }
-#endif
-#if defined(ARROW_HAVE_RUNTIME_AVX512)
- , { DispatchLevel::AVX512, unpack32_avx512 }
-#endif
- };
- }
-};
-
-} // namespace
-
-int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+int unpack32_default(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+ batch_size = batch_size / 32 * 32;
+ int num_loops = batch_size / 32;
+
+ switch (num_bits) {
+ case 0:
+ for (int i = 0; i < num_loops; ++i) in = nullunpacker32(in, out + i * 32);
+ break;
+ case 1:
+ for (int i = 0; i < num_loops; ++i) in = unpack1_32(in, out + i * 32);
+ break;
+ case 2:
+ for (int i = 0; i < num_loops; ++i) in = unpack2_32(in, out + i * 32);
+ break;
+ case 3:
+ for (int i = 0; i < num_loops; ++i) in = unpack3_32(in, out + i * 32);
+ break;
+ case 4:
+ for (int i = 0; i < num_loops; ++i) in = unpack4_32(in, out + i * 32);
+ break;
+ case 5:
+ for (int i = 0; i < num_loops; ++i) in = unpack5_32(in, out + i * 32);
+ break;
+ case 6:
+ for (int i = 0; i < num_loops; ++i) in = unpack6_32(in, out + i * 32);
+ break;
+ case 7:
+ for (int i = 0; i < num_loops; ++i) in = unpack7_32(in, out + i * 32);
+ break;
+ case 8:
+ for (int i = 0; i < num_loops; ++i) in = unpack8_32(in, out + i * 32);
+ break;
+ case 9:
+ for (int i = 0; i < num_loops; ++i) in = unpack9_32(in, out + i * 32);
+ break;
+ case 10:
+ for (int i = 0; i < num_loops; ++i) in = unpack10_32(in, out + i * 32);
+ break;
+ case 11:
+ for (int i = 0; i < num_loops; ++i) in = unpack11_32(in, out + i * 32);
+ break;
+ case 12:
+ for (int i = 0; i < num_loops; ++i) in = unpack12_32(in, out + i * 32);
+ break;
+ case 13:
+ for (int i = 0; i < num_loops; ++i) in = unpack13_32(in, out + i * 32);
+ break;
+ case 14:
+ for (int i = 0; i < num_loops; ++i) in = unpack14_32(in, out + i * 32);
+ break;
+ case 15:
+ for (int i = 0; i < num_loops; ++i) in = unpack15_32(in, out + i * 32);
+ break;
+ case 16:
+ for (int i = 0; i < num_loops; ++i) in = unpack16_32(in, out + i * 32);
+ break;
+ case 17:
+ for (int i = 0; i < num_loops; ++i) in = unpack17_32(in, out + i * 32);
+ break;
+ case 18:
+ for (int i = 0; i < num_loops; ++i) in = unpack18_32(in, out + i * 32);
+ break;
+ case 19:
+ for (int i = 0; i < num_loops; ++i) in = unpack19_32(in, out + i * 32);
+ break;
+ case 20:
+ for (int i = 0; i < num_loops; ++i) in = unpack20_32(in, out + i * 32);
+ break;
+ case 21:
+ for (int i = 0; i < num_loops; ++i) in = unpack21_32(in, out + i * 32);
+ break;
+ case 22:
+ for (int i = 0; i < num_loops; ++i) in = unpack22_32(in, out + i * 32);
+ break;
+ case 23:
+ for (int i = 0; i < num_loops; ++i) in = unpack23_32(in, out + i * 32);
+ break;
+ case 24:
+ for (int i = 0; i < num_loops; ++i) in = unpack24_32(in, out + i * 32);
+ break;
+ case 25:
+ for (int i = 0; i < num_loops; ++i) in = unpack25_32(in, out + i * 32);
+ break;
+ case 26:
+ for (int i = 0; i < num_loops; ++i) in = unpack26_32(in, out + i * 32);
+ break;
+ case 27:
+ for (int i = 0; i < num_loops; ++i) in = unpack27_32(in, out + i * 32);
+ break;
+ case 28:
+ for (int i = 0; i < num_loops; ++i) in = unpack28_32(in, out + i * 32);
+ break;
+ case 29:
+ for (int i = 0; i < num_loops; ++i) in = unpack29_32(in, out + i * 32);
+ break;
+ case 30:
+ for (int i = 0; i < num_loops; ++i) in = unpack30_32(in, out + i * 32);
+ break;
+ case 31:
+ for (int i = 0; i < num_loops; ++i) in = unpack31_32(in, out + i * 32);
+ break;
+ case 32:
+ for (int i = 0; i < num_loops; ++i) in = unpack32_32(in, out + i * 32);
+ break;
+ default:
+ DCHECK(false) << "Unsupported num_bits";
+ }
+
+ return batch_size;
+}
+
+struct Unpack32DynamicFunction {
+ using FunctionType = decltype(&unpack32_default);
+
+ static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+ return {
+ { DispatchLevel::NONE, unpack32_default }
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+ , { DispatchLevel::AVX2, unpack32_avx2 }
+#endif
+#if defined(ARROW_HAVE_RUNTIME_AVX512)
+ , { DispatchLevel::AVX512, unpack32_avx512 }
+#endif
+ };
+ }
+};
+
+} // namespace
+
+int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
#if defined(ARROW_HAVE_NEON)
return unpack32_neon(in, out, batch_size, num_bits);
#else
- static DynamicDispatch<Unpack32DynamicFunction> dispatch;
- return dispatch.func(in, out, batch_size, num_bits);
+ static DynamicDispatch<Unpack32DynamicFunction> dispatch;
+ return dispatch.func(in, out, batch_size, num_bits);
#endif
-}
-
-} // namespace internal
-} // namespace arrow
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
index e5a4dbbed89..a7f27607395 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include "arrow/util/endian.h"
-#include "arrow/util/visibility.h"
-
-#include <stdint.h>
-
-namespace arrow {
-namespace internal {
-
-ARROW_EXPORT
-int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
-
-} // namespace internal
-} // namespace arrow
+#include "arrow/util/visibility.h"
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+ARROW_EXPORT
+int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking_default.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking_default.h
index d2516effa4d..be8ec1444ec 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking_default.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking_default.h
@@ -1,4251 +1,4251 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This file was modified from its original version for inclusion in parquet-cpp.
-// Original source:
-// https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp
-// The original copyright notice follows.
-
-// This code is released under the
-// Apache License Version 2.0 http://www.apache.org/licenses/.
-// (c) Daniel Lemire 2013
-
-#pragma once
-
-#include "arrow/util/bit_util.h"
-#include "arrow/util/ubsan.h"
-
-namespace arrow {
-namespace internal {
-
-inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) & 1;
- out++;
- *out = (inl >> 1) & 1;
- out++;
- *out = (inl >> 2) & 1;
- out++;
- *out = (inl >> 3) & 1;
- out++;
- *out = (inl >> 4) & 1;
- out++;
- *out = (inl >> 5) & 1;
- out++;
- *out = (inl >> 6) & 1;
- out++;
- *out = (inl >> 7) & 1;
- out++;
- *out = (inl >> 8) & 1;
- out++;
- *out = (inl >> 9) & 1;
- out++;
- *out = (inl >> 10) & 1;
- out++;
- *out = (inl >> 11) & 1;
- out++;
- *out = (inl >> 12) & 1;
- out++;
- *out = (inl >> 13) & 1;
- out++;
- *out = (inl >> 14) & 1;
- out++;
- *out = (inl >> 15) & 1;
- out++;
- *out = (inl >> 16) & 1;
- out++;
- *out = (inl >> 17) & 1;
- out++;
- *out = (inl >> 18) & 1;
- out++;
- *out = (inl >> 19) & 1;
- out++;
- *out = (inl >> 20) & 1;
- out++;
- *out = (inl >> 21) & 1;
- out++;
- *out = (inl >> 22) & 1;
- out++;
- *out = (inl >> 23) & 1;
- out++;
- *out = (inl >> 24) & 1;
- out++;
- *out = (inl >> 25) & 1;
- out++;
- *out = (inl >> 26) & 1;
- out++;
- *out = (inl >> 27) & 1;
- out++;
- *out = (inl >> 28) & 1;
- out++;
- *out = (inl >> 29) & 1;
- out++;
- *out = (inl >> 30) & 1;
- out++;
- *out = (inl >> 31);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 2);
- out++;
- *out = (inl >> 2) % (1U << 2);
- out++;
- *out = (inl >> 4) % (1U << 2);
- out++;
- *out = (inl >> 6) % (1U << 2);
- out++;
- *out = (inl >> 8) % (1U << 2);
- out++;
- *out = (inl >> 10) % (1U << 2);
- out++;
- *out = (inl >> 12) % (1U << 2);
- out++;
- *out = (inl >> 14) % (1U << 2);
- out++;
- *out = (inl >> 16) % (1U << 2);
- out++;
- *out = (inl >> 18) % (1U << 2);
- out++;
- *out = (inl >> 20) % (1U << 2);
- out++;
- *out = (inl >> 22) % (1U << 2);
- out++;
- *out = (inl >> 24) % (1U << 2);
- out++;
- *out = (inl >> 26) % (1U << 2);
- out++;
- *out = (inl >> 28) % (1U << 2);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 2);
- out++;
- *out = (inl >> 2) % (1U << 2);
- out++;
- *out = (inl >> 4) % (1U << 2);
- out++;
- *out = (inl >> 6) % (1U << 2);
- out++;
- *out = (inl >> 8) % (1U << 2);
- out++;
- *out = (inl >> 10) % (1U << 2);
- out++;
- *out = (inl >> 12) % (1U << 2);
- out++;
- *out = (inl >> 14) % (1U << 2);
- out++;
- *out = (inl >> 16) % (1U << 2);
- out++;
- *out = (inl >> 18) % (1U << 2);
- out++;
- *out = (inl >> 20) % (1U << 2);
- out++;
- *out = (inl >> 22) % (1U << 2);
- out++;
- *out = (inl >> 24) % (1U << 2);
- out++;
- *out = (inl >> 26) % (1U << 2);
- out++;
- *out = (inl >> 28) % (1U << 2);
- out++;
- *out = (inl >> 30);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 3);
- out++;
- *out = (inl >> 3) % (1U << 3);
- out++;
- *out = (inl >> 6) % (1U << 3);
- out++;
- *out = (inl >> 9) % (1U << 3);
- out++;
- *out = (inl >> 12) % (1U << 3);
- out++;
- *out = (inl >> 15) % (1U << 3);
- out++;
- *out = (inl >> 18) % (1U << 3);
- out++;
- *out = (inl >> 21) % (1U << 3);
- out++;
- *out = (inl >> 24) % (1U << 3);
- out++;
- *out = (inl >> 27) % (1U << 3);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (3 - 1);
- out++;
- *out = (inl >> 1) % (1U << 3);
- out++;
- *out = (inl >> 4) % (1U << 3);
- out++;
- *out = (inl >> 7) % (1U << 3);
- out++;
- *out = (inl >> 10) % (1U << 3);
- out++;
- *out = (inl >> 13) % (1U << 3);
- out++;
- *out = (inl >> 16) % (1U << 3);
- out++;
- *out = (inl >> 19) % (1U << 3);
- out++;
- *out = (inl >> 22) % (1U << 3);
- out++;
- *out = (inl >> 25) % (1U << 3);
- out++;
- *out = (inl >> 28) % (1U << 3);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (3 - 2);
- out++;
- *out = (inl >> 2) % (1U << 3);
- out++;
- *out = (inl >> 5) % (1U << 3);
- out++;
- *out = (inl >> 8) % (1U << 3);
- out++;
- *out = (inl >> 11) % (1U << 3);
- out++;
- *out = (inl >> 14) % (1U << 3);
- out++;
- *out = (inl >> 17) % (1U << 3);
- out++;
- *out = (inl >> 20) % (1U << 3);
- out++;
- *out = (inl >> 23) % (1U << 3);
- out++;
- *out = (inl >> 26) % (1U << 3);
- out++;
- *out = (inl >> 29);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 4);
- out++;
- *out = (inl >> 4) % (1U << 4);
- out++;
- *out = (inl >> 8) % (1U << 4);
- out++;
- *out = (inl >> 12) % (1U << 4);
- out++;
- *out = (inl >> 16) % (1U << 4);
- out++;
- *out = (inl >> 20) % (1U << 4);
- out++;
- *out = (inl >> 24) % (1U << 4);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 4);
- out++;
- *out = (inl >> 4) % (1U << 4);
- out++;
- *out = (inl >> 8) % (1U << 4);
- out++;
- *out = (inl >> 12) % (1U << 4);
- out++;
- *out = (inl >> 16) % (1U << 4);
- out++;
- *out = (inl >> 20) % (1U << 4);
- out++;
- *out = (inl >> 24) % (1U << 4);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 4);
- out++;
- *out = (inl >> 4) % (1U << 4);
- out++;
- *out = (inl >> 8) % (1U << 4);
- out++;
- *out = (inl >> 12) % (1U << 4);
- out++;
- *out = (inl >> 16) % (1U << 4);
- out++;
- *out = (inl >> 20) % (1U << 4);
- out++;
- *out = (inl >> 24) % (1U << 4);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 4);
- out++;
- *out = (inl >> 4) % (1U << 4);
- out++;
- *out = (inl >> 8) % (1U << 4);
- out++;
- *out = (inl >> 12) % (1U << 4);
- out++;
- *out = (inl >> 16) % (1U << 4);
- out++;
- *out = (inl >> 20) % (1U << 4);
- out++;
- *out = (inl >> 24) % (1U << 4);
- out++;
- *out = (inl >> 28);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 5);
- out++;
- *out = (inl >> 5) % (1U << 5);
- out++;
- *out = (inl >> 10) % (1U << 5);
- out++;
- *out = (inl >> 15) % (1U << 5);
- out++;
- *out = (inl >> 20) % (1U << 5);
- out++;
- *out = (inl >> 25) % (1U << 5);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (5 - 3);
- out++;
- *out = (inl >> 3) % (1U << 5);
- out++;
- *out = (inl >> 8) % (1U << 5);
- out++;
- *out = (inl >> 13) % (1U << 5);
- out++;
- *out = (inl >> 18) % (1U << 5);
- out++;
- *out = (inl >> 23) % (1U << 5);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (5 - 1);
- out++;
- *out = (inl >> 1) % (1U << 5);
- out++;
- *out = (inl >> 6) % (1U << 5);
- out++;
- *out = (inl >> 11) % (1U << 5);
- out++;
- *out = (inl >> 16) % (1U << 5);
- out++;
- *out = (inl >> 21) % (1U << 5);
- out++;
- *out = (inl >> 26) % (1U << 5);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (5 - 4);
- out++;
- *out = (inl >> 4) % (1U << 5);
- out++;
- *out = (inl >> 9) % (1U << 5);
- out++;
- *out = (inl >> 14) % (1U << 5);
- out++;
- *out = (inl >> 19) % (1U << 5);
- out++;
- *out = (inl >> 24) % (1U << 5);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (5 - 2);
- out++;
- *out = (inl >> 2) % (1U << 5);
- out++;
- *out = (inl >> 7) % (1U << 5);
- out++;
- *out = (inl >> 12) % (1U << 5);
- out++;
- *out = (inl >> 17) % (1U << 5);
- out++;
- *out = (inl >> 22) % (1U << 5);
- out++;
- *out = (inl >> 27);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 6);
- out++;
- *out = (inl >> 6) % (1U << 6);
- out++;
- *out = (inl >> 12) % (1U << 6);
- out++;
- *out = (inl >> 18) % (1U << 6);
- out++;
- *out = (inl >> 24) % (1U << 6);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (6 - 4);
- out++;
- *out = (inl >> 4) % (1U << 6);
- out++;
- *out = (inl >> 10) % (1U << 6);
- out++;
- *out = (inl >> 16) % (1U << 6);
- out++;
- *out = (inl >> 22) % (1U << 6);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (6 - 2);
- out++;
- *out = (inl >> 2) % (1U << 6);
- out++;
- *out = (inl >> 8) % (1U << 6);
- out++;
- *out = (inl >> 14) % (1U << 6);
- out++;
- *out = (inl >> 20) % (1U << 6);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 6);
- out++;
- *out = (inl >> 6) % (1U << 6);
- out++;
- *out = (inl >> 12) % (1U << 6);
- out++;
- *out = (inl >> 18) % (1U << 6);
- out++;
- *out = (inl >> 24) % (1U << 6);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (6 - 4);
- out++;
- *out = (inl >> 4) % (1U << 6);
- out++;
- *out = (inl >> 10) % (1U << 6);
- out++;
- *out = (inl >> 16) % (1U << 6);
- out++;
- *out = (inl >> 22) % (1U << 6);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (6 - 2);
- out++;
- *out = (inl >> 2) % (1U << 6);
- out++;
- *out = (inl >> 8) % (1U << 6);
- out++;
- *out = (inl >> 14) % (1U << 6);
- out++;
- *out = (inl >> 20) % (1U << 6);
- out++;
- *out = (inl >> 26);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 7);
- out++;
- *out = (inl >> 7) % (1U << 7);
- out++;
- *out = (inl >> 14) % (1U << 7);
- out++;
- *out = (inl >> 21) % (1U << 7);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (7 - 3);
- out++;
- *out = (inl >> 3) % (1U << 7);
- out++;
- *out = (inl >> 10) % (1U << 7);
- out++;
- *out = (inl >> 17) % (1U << 7);
- out++;
- *out = (inl >> 24) % (1U << 7);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (7 - 6);
- out++;
- *out = (inl >> 6) % (1U << 7);
- out++;
- *out = (inl >> 13) % (1U << 7);
- out++;
- *out = (inl >> 20) % (1U << 7);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (7 - 2);
- out++;
- *out = (inl >> 2) % (1U << 7);
- out++;
- *out = (inl >> 9) % (1U << 7);
- out++;
- *out = (inl >> 16) % (1U << 7);
- out++;
- *out = (inl >> 23) % (1U << 7);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (7 - 5);
- out++;
- *out = (inl >> 5) % (1U << 7);
- out++;
- *out = (inl >> 12) % (1U << 7);
- out++;
- *out = (inl >> 19) % (1U << 7);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (7 - 1);
- out++;
- *out = (inl >> 1) % (1U << 7);
- out++;
- *out = (inl >> 8) % (1U << 7);
- out++;
- *out = (inl >> 15) % (1U << 7);
- out++;
- *out = (inl >> 22) % (1U << 7);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (7 - 4);
- out++;
- *out = (inl >> 4) % (1U << 7);
- out++;
- *out = (inl >> 11) % (1U << 7);
- out++;
- *out = (inl >> 18) % (1U << 7);
- out++;
- *out = (inl >> 25);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 8);
- out++;
- *out = (inl >> 8) % (1U << 8);
- out++;
- *out = (inl >> 16) % (1U << 8);
- out++;
- *out = (inl >> 24);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 9);
- out++;
- *out = (inl >> 9) % (1U << 9);
- out++;
- *out = (inl >> 18) % (1U << 9);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (9 - 4);
- out++;
- *out = (inl >> 4) % (1U << 9);
- out++;
- *out = (inl >> 13) % (1U << 9);
- out++;
- *out = (inl >> 22) % (1U << 9);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (9 - 8);
- out++;
- *out = (inl >> 8) % (1U << 9);
- out++;
- *out = (inl >> 17) % (1U << 9);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (9 - 3);
- out++;
- *out = (inl >> 3) % (1U << 9);
- out++;
- *out = (inl >> 12) % (1U << 9);
- out++;
- *out = (inl >> 21) % (1U << 9);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (9 - 7);
- out++;
- *out = (inl >> 7) % (1U << 9);
- out++;
- *out = (inl >> 16) % (1U << 9);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (9 - 2);
- out++;
- *out = (inl >> 2) % (1U << 9);
- out++;
- *out = (inl >> 11) % (1U << 9);
- out++;
- *out = (inl >> 20) % (1U << 9);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (9 - 6);
- out++;
- *out = (inl >> 6) % (1U << 9);
- out++;
- *out = (inl >> 15) % (1U << 9);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (9 - 1);
- out++;
- *out = (inl >> 1) % (1U << 9);
- out++;
- *out = (inl >> 10) % (1U << 9);
- out++;
- *out = (inl >> 19) % (1U << 9);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (9 - 5);
- out++;
- *out = (inl >> 5) % (1U << 9);
- out++;
- *out = (inl >> 14) % (1U << 9);
- out++;
- *out = (inl >> 23);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 10);
- out++;
- *out = (inl >> 10) % (1U << 10);
- out++;
- *out = (inl >> 20) % (1U << 10);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (10 - 8);
- out++;
- *out = (inl >> 8) % (1U << 10);
- out++;
- *out = (inl >> 18) % (1U << 10);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (10 - 6);
- out++;
- *out = (inl >> 6) % (1U << 10);
- out++;
- *out = (inl >> 16) % (1U << 10);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (10 - 4);
- out++;
- *out = (inl >> 4) % (1U << 10);
- out++;
- *out = (inl >> 14) % (1U << 10);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (10 - 2);
- out++;
- *out = (inl >> 2) % (1U << 10);
- out++;
- *out = (inl >> 12) % (1U << 10);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 10);
- out++;
- *out = (inl >> 10) % (1U << 10);
- out++;
- *out = (inl >> 20) % (1U << 10);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (10 - 8);
- out++;
- *out = (inl >> 8) % (1U << 10);
- out++;
- *out = (inl >> 18) % (1U << 10);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (10 - 6);
- out++;
- *out = (inl >> 6) % (1U << 10);
- out++;
- *out = (inl >> 16) % (1U << 10);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (10 - 4);
- out++;
- *out = (inl >> 4) % (1U << 10);
- out++;
- *out = (inl >> 14) % (1U << 10);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (10 - 2);
- out++;
- *out = (inl >> 2) % (1U << 10);
- out++;
- *out = (inl >> 12) % (1U << 10);
- out++;
- *out = (inl >> 22);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 11);
- out++;
- *out = (inl >> 11) % (1U << 11);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (11 - 1);
- out++;
- *out = (inl >> 1) % (1U << 11);
- out++;
- *out = (inl >> 12) % (1U << 11);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (11 - 2);
- out++;
- *out = (inl >> 2) % (1U << 11);
- out++;
- *out = (inl >> 13) % (1U << 11);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (11 - 3);
- out++;
- *out = (inl >> 3) % (1U << 11);
- out++;
- *out = (inl >> 14) % (1U << 11);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (11 - 4);
- out++;
- *out = (inl >> 4) % (1U << 11);
- out++;
- *out = (inl >> 15) % (1U << 11);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (11 - 5);
- out++;
- *out = (inl >> 5) % (1U << 11);
- out++;
- *out = (inl >> 16) % (1U << 11);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (11 - 6);
- out++;
- *out = (inl >> 6) % (1U << 11);
- out++;
- *out = (inl >> 17) % (1U << 11);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (11 - 7);
- out++;
- *out = (inl >> 7) % (1U << 11);
- out++;
- *out = (inl >> 18) % (1U << 11);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (11 - 8);
- out++;
- *out = (inl >> 8) % (1U << 11);
- out++;
- *out = (inl >> 19) % (1U << 11);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (11 - 9);
- out++;
- *out = (inl >> 9) % (1U << 11);
- out++;
- *out = (inl >> 20) % (1U << 11);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (11 - 10);
- out++;
- *out = (inl >> 10) % (1U << 11);
- out++;
- *out = (inl >> 21);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 12);
- out++;
- *out = (inl >> 12) % (1U << 12);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (12 - 4);
- out++;
- *out = (inl >> 4) % (1U << 12);
- out++;
- *out = (inl >> 16) % (1U << 12);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (12 - 8);
- out++;
- *out = (inl >> 8) % (1U << 12);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 12);
- out++;
- *out = (inl >> 12) % (1U << 12);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (12 - 4);
- out++;
- *out = (inl >> 4) % (1U << 12);
- out++;
- *out = (inl >> 16) % (1U << 12);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (12 - 8);
- out++;
- *out = (inl >> 8) % (1U << 12);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 12);
- out++;
- *out = (inl >> 12) % (1U << 12);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (12 - 4);
- out++;
- *out = (inl >> 4) % (1U << 12);
- out++;
- *out = (inl >> 16) % (1U << 12);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (12 - 8);
- out++;
- *out = (inl >> 8) % (1U << 12);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 12);
- out++;
- *out = (inl >> 12) % (1U << 12);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (12 - 4);
- out++;
- *out = (inl >> 4) % (1U << 12);
- out++;
- *out = (inl >> 16) % (1U << 12);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (12 - 8);
- out++;
- *out = (inl >> 8) % (1U << 12);
- out++;
- *out = (inl >> 20);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 13);
- out++;
- *out = (inl >> 13) % (1U << 13);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (13 - 7);
- out++;
- *out = (inl >> 7) % (1U << 13);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (13 - 1);
- out++;
- *out = (inl >> 1) % (1U << 13);
- out++;
- *out = (inl >> 14) % (1U << 13);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (13 - 8);
- out++;
- *out = (inl >> 8) % (1U << 13);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (13 - 2);
- out++;
- *out = (inl >> 2) % (1U << 13);
- out++;
- *out = (inl >> 15) % (1U << 13);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (13 - 9);
- out++;
- *out = (inl >> 9) % (1U << 13);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (13 - 3);
- out++;
- *out = (inl >> 3) % (1U << 13);
- out++;
- *out = (inl >> 16) % (1U << 13);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (13 - 10);
- out++;
- *out = (inl >> 10) % (1U << 13);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (13 - 4);
- out++;
- *out = (inl >> 4) % (1U << 13);
- out++;
- *out = (inl >> 17) % (1U << 13);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (13 - 11);
- out++;
- *out = (inl >> 11) % (1U << 13);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (13 - 5);
- out++;
- *out = (inl >> 5) % (1U << 13);
- out++;
- *out = (inl >> 18) % (1U << 13);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (13 - 12);
- out++;
- *out = (inl >> 12) % (1U << 13);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (13 - 6);
- out++;
- *out = (inl >> 6) % (1U << 13);
- out++;
- *out = (inl >> 19);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 14);
- out++;
- *out = (inl >> 14) % (1U << 14);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (14 - 10);
- out++;
- *out = (inl >> 10) % (1U << 14);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (14 - 6);
- out++;
- *out = (inl >> 6) % (1U << 14);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (14 - 2);
- out++;
- *out = (inl >> 2) % (1U << 14);
- out++;
- *out = (inl >> 16) % (1U << 14);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (14 - 12);
- out++;
- *out = (inl >> 12) % (1U << 14);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (14 - 8);
- out++;
- *out = (inl >> 8) % (1U << 14);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (14 - 4);
- out++;
- *out = (inl >> 4) % (1U << 14);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 14);
- out++;
- *out = (inl >> 14) % (1U << 14);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (14 - 10);
- out++;
- *out = (inl >> 10) % (1U << 14);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (14 - 6);
- out++;
- *out = (inl >> 6) % (1U << 14);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (14 - 2);
- out++;
- *out = (inl >> 2) % (1U << 14);
- out++;
- *out = (inl >> 16) % (1U << 14);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (14 - 12);
- out++;
- *out = (inl >> 12) % (1U << 14);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (14 - 8);
- out++;
- *out = (inl >> 8) % (1U << 14);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (14 - 4);
- out++;
- *out = (inl >> 4) % (1U << 14);
- out++;
- *out = (inl >> 18);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 15);
- out++;
- *out = (inl >> 15) % (1U << 15);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (15 - 13);
- out++;
- *out = (inl >> 13) % (1U << 15);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (15 - 11);
- out++;
- *out = (inl >> 11) % (1U << 15);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (15 - 9);
- out++;
- *out = (inl >> 9) % (1U << 15);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (15 - 7);
- out++;
- *out = (inl >> 7) % (1U << 15);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (15 - 5);
- out++;
- *out = (inl >> 5) % (1U << 15);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (15 - 3);
- out++;
- *out = (inl >> 3) % (1U << 15);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (15 - 1);
- out++;
- *out = (inl >> 1) % (1U << 15);
- out++;
- *out = (inl >> 16) % (1U << 15);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (15 - 14);
- out++;
- *out = (inl >> 14) % (1U << 15);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (15 - 12);
- out++;
- *out = (inl >> 12) % (1U << 15);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (15 - 10);
- out++;
- *out = (inl >> 10) % (1U << 15);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (15 - 8);
- out++;
- *out = (inl >> 8) % (1U << 15);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (15 - 6);
- out++;
- *out = (inl >> 6) % (1U << 15);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (15 - 4);
- out++;
- *out = (inl >> 4) % (1U << 15);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (15 - 2);
- out++;
- *out = (inl >> 2) % (1U << 15);
- out++;
- *out = (inl >> 17);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 16);
- out++;
- *out = (inl >> 16);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (17 - 2);
- out++;
- *out = (inl >> 2) % (1U << 17);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (17 - 4);
- out++;
- *out = (inl >> 4) % (1U << 17);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (17 - 6);
- out++;
- *out = (inl >> 6) % (1U << 17);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (17 - 8);
- out++;
- *out = (inl >> 8) % (1U << 17);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (17 - 10);
- out++;
- *out = (inl >> 10) % (1U << 17);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (17 - 12);
- out++;
- *out = (inl >> 12) % (1U << 17);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (17 - 14);
- out++;
- *out = (inl >> 14) % (1U << 17);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (17 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (17 - 1);
- out++;
- *out = (inl >> 1) % (1U << 17);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (17 - 3);
- out++;
- *out = (inl >> 3) % (1U << 17);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (17 - 5);
- out++;
- *out = (inl >> 5) % (1U << 17);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (17 - 7);
- out++;
- *out = (inl >> 7) % (1U << 17);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (17 - 9);
- out++;
- *out = (inl >> 9) % (1U << 17);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (17 - 11);
- out++;
- *out = (inl >> 11) % (1U << 17);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (17 - 13);
- out++;
- *out = (inl >> 13) % (1U << 17);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (17 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (18 - 4);
- out++;
- *out = (inl >> 4) % (1U << 18);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (18 - 8);
- out++;
- *out = (inl >> 8) % (1U << 18);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (18 - 12);
- out++;
- *out = (inl >> 12) % (1U << 18);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (18 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (18 - 2);
- out++;
- *out = (inl >> 2) % (1U << 18);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (18 - 6);
- out++;
- *out = (inl >> 6) % (1U << 18);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (18 - 10);
- out++;
- *out = (inl >> 10) % (1U << 18);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (18 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (18 - 4);
- out++;
- *out = (inl >> 4) % (1U << 18);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (18 - 8);
- out++;
- *out = (inl >> 8) % (1U << 18);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (18 - 12);
- out++;
- *out = (inl >> 12) % (1U << 18);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (18 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (18 - 2);
- out++;
- *out = (inl >> 2) % (1U << 18);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (18 - 6);
- out++;
- *out = (inl >> 6) % (1U << 18);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (18 - 10);
- out++;
- *out = (inl >> 10) % (1U << 18);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (18 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (19 - 6);
- out++;
- *out = (inl >> 6) % (1U << 19);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (19 - 12);
- out++;
- *out = (inl >> 12) % (1U << 19);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (19 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (19 - 5);
- out++;
- *out = (inl >> 5) % (1U << 19);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (19 - 11);
- out++;
- *out = (inl >> 11) % (1U << 19);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (19 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (19 - 4);
- out++;
- *out = (inl >> 4) % (1U << 19);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (19 - 10);
- out++;
- *out = (inl >> 10) % (1U << 19);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (19 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (19 - 3);
- out++;
- *out = (inl >> 3) % (1U << 19);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (19 - 9);
- out++;
- *out = (inl >> 9) % (1U << 19);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (19 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (19 - 2);
- out++;
- *out = (inl >> 2) % (1U << 19);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (19 - 8);
- out++;
- *out = (inl >> 8) % (1U << 19);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (19 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (19 - 1);
- out++;
- *out = (inl >> 1) % (1U << 19);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (19 - 7);
- out++;
- *out = (inl >> 7) % (1U << 19);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (19 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (20 - 8);
- out++;
- *out = (inl >> 8) % (1U << 20);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (20 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (20 - 4);
- out++;
- *out = (inl >> 4) % (1U << 20);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (20 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (20 - 8);
- out++;
- *out = (inl >> 8) % (1U << 20);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (20 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (20 - 4);
- out++;
- *out = (inl >> 4) % (1U << 20);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (20 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (20 - 8);
- out++;
- *out = (inl >> 8) % (1U << 20);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (20 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (20 - 4);
- out++;
- *out = (inl >> 4) % (1U << 20);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (20 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (20 - 8);
- out++;
- *out = (inl >> 8) % (1U << 20);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (20 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (20 - 4);
- out++;
- *out = (inl >> 4) % (1U << 20);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (20 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (21 - 10);
- out++;
- *out = (inl >> 10) % (1U << 21);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (21 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (21 - 9);
- out++;
- *out = (inl >> 9) % (1U << 21);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (21 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (21 - 8);
- out++;
- *out = (inl >> 8) % (1U << 21);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (21 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (21 - 7);
- out++;
- *out = (inl >> 7) % (1U << 21);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (21 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (21 - 6);
- out++;
- *out = (inl >> 6) % (1U << 21);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (21 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (21 - 5);
- out++;
- *out = (inl >> 5) % (1U << 21);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (21 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (21 - 4);
- out++;
- *out = (inl >> 4) % (1U << 21);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (21 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (21 - 3);
- out++;
- *out = (inl >> 3) % (1U << 21);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (21 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (21 - 2);
- out++;
- *out = (inl >> 2) % (1U << 21);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (21 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (21 - 1);
- out++;
- *out = (inl >> 1) % (1U << 21);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (21 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (22 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (22 - 2);
- out++;
- *out = (inl >> 2) % (1U << 22);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (22 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (22 - 4);
- out++;
- *out = (inl >> 4) % (1U << 22);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (22 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (22 - 6);
- out++;
- *out = (inl >> 6) % (1U << 22);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (22 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (22 - 8);
- out++;
- *out = (inl >> 8) % (1U << 22);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (22 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (22 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (22 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (22 - 2);
- out++;
- *out = (inl >> 2) % (1U << 22);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (22 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (22 - 4);
- out++;
- *out = (inl >> 4) % (1U << 22);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (22 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (22 - 6);
- out++;
- *out = (inl >> 6) % (1U << 22);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (22 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (22 - 8);
- out++;
- *out = (inl >> 8) % (1U << 22);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (22 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (22 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 23);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (23 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (23 - 5);
- out++;
- *out = (inl >> 5) % (1U << 23);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (23 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (23 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (23 - 1);
- out++;
- *out = (inl >> 1) % (1U << 23);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (23 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (23 - 6);
- out++;
- *out = (inl >> 6) % (1U << 23);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (23 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (23 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (23 - 2);
- out++;
- *out = (inl >> 2) % (1U << 23);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (23 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (23 - 7);
- out++;
- *out = (inl >> 7) % (1U << 23);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 21)) << (23 - 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (23 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (23 - 3);
- out++;
- *out = (inl >> 3) % (1U << 23);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (23 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (23 - 8);
- out++;
- *out = (inl >> 8) % (1U << 23);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (23 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (23 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (23 - 4);
- out++;
- *out = (inl >> 4) % (1U << 23);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (23 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (23 - 9);
- out++;
- *out = (inl >> 9);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (24 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (24 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 25);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (25 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (25 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (25 - 4);
- out++;
- *out = (inl >> 4) % (1U << 25);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (25 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (25 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (25 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (25 - 1);
- out++;
- *out = (inl >> 1) % (1U << 25);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (25 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (25 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (25 - 5);
- out++;
- *out = (inl >> 5) % (1U << 25);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 23)) << (25 - 23);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (25 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (25 - 9);
- out++;
- *out = (inl >> 9);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (25 - 2);
- out++;
- *out = (inl >> 2) % (1U << 25);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (25 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (25 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (25 - 6);
- out++;
- *out = (inl >> 6) % (1U << 25);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (25 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (25 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (25 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (25 - 3);
- out++;
- *out = (inl >> 3) % (1U << 25);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 21)) << (25 - 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (25 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (25 - 7);
- out++;
- *out = (inl >> 7);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (26 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (26 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (26 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (26 - 2);
- out++;
- *out = (inl >> 2) % (1U << 26);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (26 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (26 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (26 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (26 - 4);
- out++;
- *out = (inl >> 4) % (1U << 26);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (26 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (26 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (26 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (26 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (26 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (26 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (26 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (26 - 2);
- out++;
- *out = (inl >> 2) % (1U << 26);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (26 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (26 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (26 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (26 - 4);
- out++;
- *out = (inl >> 4) % (1U << 26);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (26 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (26 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (26 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (26 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 27);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (27 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (27 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (27 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (27 - 7);
- out++;
- *out = (inl >> 7);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (27 - 2);
- out++;
- *out = (inl >> 2) % (1U << 27);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (27 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (27 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (27 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (27 - 9);
- out++;
- *out = (inl >> 9);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (27 - 4);
- out++;
- *out = (inl >> 4) % (1U << 27);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 26)) << (27 - 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 21)) << (27 - 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (27 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (27 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (27 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (27 - 1);
- out++;
- *out = (inl >> 1) % (1U << 27);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 23)) << (27 - 23);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (27 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (27 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (27 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (27 - 3);
- out++;
- *out = (inl >> 3) % (1U << 27);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 25)) << (27 - 25);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (27 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (27 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (27 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (27 - 5);
- out++;
- *out = (inl >> 5);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (28 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (28 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (28 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (28 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (28 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (28 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (28 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (28 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (28 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (28 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (28 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (28 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (28 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (28 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (28 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (28 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (28 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (28 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (28 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (28 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (28 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (28 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (28 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (28 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 29);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 26)) << (29 - 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 23)) << (29 - 23);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (29 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (29 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (29 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (29 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (29 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (29 - 5);
- out++;
- *out = (inl >> 5);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (29 - 2);
- out++;
- *out = (inl >> 2) % (1U << 29);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 28)) << (29 - 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 25)) << (29 - 25);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (29 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (29 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (29 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (29 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (29 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (29 - 7);
- out++;
- *out = (inl >> 7);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (29 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (29 - 1);
- out++;
- *out = (inl >> 1) % (1U << 29);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 27)) << (29 - 27);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (29 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 21)) << (29 - 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (29 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (29 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (29 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (29 - 9);
- out++;
- *out = (inl >> 9);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (29 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (29 - 3);
- out++;
- *out = (inl >> 3);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 30);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 28)) << (30 - 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 26)) << (30 - 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (30 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (30 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (30 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (30 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (30 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (30 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (30 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (30 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (30 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (30 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (30 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (30 - 2);
- out++;
- *out = (inl >> 2);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0) % (1U << 30);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 28)) << (30 - 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 26)) << (30 - 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (30 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (30 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (30 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (30 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (30 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (30 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (30 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (30 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (30 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (30 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (30 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (30 - 2);
- out++;
- *out = (inl >> 2);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0) % (1U << 31);
- out++;
- *out = (inl >> 31);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 30)) << (31 - 30);
- out++;
- *out = (inl >> 30);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 29)) << (31 - 29);
- out++;
- *out = (inl >> 29);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 28)) << (31 - 28);
- out++;
- *out = (inl >> 28);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 27)) << (31 - 27);
- out++;
- *out = (inl >> 27);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 26)) << (31 - 26);
- out++;
- *out = (inl >> 26);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 25)) << (31 - 25);
- out++;
- *out = (inl >> 25);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 24)) << (31 - 24);
- out++;
- *out = (inl >> 24);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 23)) << (31 - 23);
- out++;
- *out = (inl >> 23);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 22)) << (31 - 22);
- out++;
- *out = (inl >> 22);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 21)) << (31 - 21);
- out++;
- *out = (inl >> 21);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 20)) << (31 - 20);
- out++;
- *out = (inl >> 20);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 19)) << (31 - 19);
- out++;
- *out = (inl >> 19);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 18)) << (31 - 18);
- out++;
- *out = (inl >> 18);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 17)) << (31 - 17);
- out++;
- *out = (inl >> 17);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 16)) << (31 - 16);
- out++;
- *out = (inl >> 16);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 15)) << (31 - 15);
- out++;
- *out = (inl >> 15);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 14)) << (31 - 14);
- out++;
- *out = (inl >> 14);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 13)) << (31 - 13);
- out++;
- *out = (inl >> 13);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 12)) << (31 - 12);
- out++;
- *out = (inl >> 12);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 11)) << (31 - 11);
- out++;
- *out = (inl >> 11);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 10)) << (31 - 10);
- out++;
- *out = (inl >> 10);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 9)) << (31 - 9);
- out++;
- *out = (inl >> 9);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 8)) << (31 - 8);
- out++;
- *out = (inl >> 8);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 7)) << (31 - 7);
- out++;
- *out = (inl >> 7);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 6)) << (31 - 6);
- out++;
- *out = (inl >> 6);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 5)) << (31 - 5);
- out++;
- *out = (inl >> 5);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 4)) << (31 - 4);
- out++;
- *out = (inl >> 4);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 3)) << (31 - 3);
- out++;
- *out = (inl >> 3);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 2)) << (31 - 2);
- out++;
- *out = (inl >> 2);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out |= (inl % (1U << 1)) << (31 - 1);
- out++;
- *out = (inl >> 1);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
- uint32_t inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- inl = util::SafeLoad(in);
- inl = arrow::BitUtil::FromLittleEndian(inl);
- out++;
- *out = (inl >> 0);
- ++in;
- out++;
-
- return in;
-}
-
-inline const uint32_t* nullunpacker32(const uint32_t* in, uint32_t* out) {
- for (int k = 0; k < 32; ++k) {
- out[k] = 0;
- }
- return in;
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This file was modified from its original version for inclusion in parquet-cpp.
+// Original source:
+// https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp
+// The original copyright notice follows.
+
+// This code is released under the
+// Apache License Version 2.0 http://www.apache.org/licenses/.
+// (c) Daniel Lemire 2013
+
+#pragma once
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+
+inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) & 1;
+ out++;
+ *out = (inl >> 1) & 1;
+ out++;
+ *out = (inl >> 2) & 1;
+ out++;
+ *out = (inl >> 3) & 1;
+ out++;
+ *out = (inl >> 4) & 1;
+ out++;
+ *out = (inl >> 5) & 1;
+ out++;
+ *out = (inl >> 6) & 1;
+ out++;
+ *out = (inl >> 7) & 1;
+ out++;
+ *out = (inl >> 8) & 1;
+ out++;
+ *out = (inl >> 9) & 1;
+ out++;
+ *out = (inl >> 10) & 1;
+ out++;
+ *out = (inl >> 11) & 1;
+ out++;
+ *out = (inl >> 12) & 1;
+ out++;
+ *out = (inl >> 13) & 1;
+ out++;
+ *out = (inl >> 14) & 1;
+ out++;
+ *out = (inl >> 15) & 1;
+ out++;
+ *out = (inl >> 16) & 1;
+ out++;
+ *out = (inl >> 17) & 1;
+ out++;
+ *out = (inl >> 18) & 1;
+ out++;
+ *out = (inl >> 19) & 1;
+ out++;
+ *out = (inl >> 20) & 1;
+ out++;
+ *out = (inl >> 21) & 1;
+ out++;
+ *out = (inl >> 22) & 1;
+ out++;
+ *out = (inl >> 23) & 1;
+ out++;
+ *out = (inl >> 24) & 1;
+ out++;
+ *out = (inl >> 25) & 1;
+ out++;
+ *out = (inl >> 26) & 1;
+ out++;
+ *out = (inl >> 27) & 1;
+ out++;
+ *out = (inl >> 28) & 1;
+ out++;
+ *out = (inl >> 29) & 1;
+ out++;
+ *out = (inl >> 30) & 1;
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 2);
+ out++;
+ *out = (inl >> 2) % (1U << 2);
+ out++;
+ *out = (inl >> 4) % (1U << 2);
+ out++;
+ *out = (inl >> 6) % (1U << 2);
+ out++;
+ *out = (inl >> 8) % (1U << 2);
+ out++;
+ *out = (inl >> 10) % (1U << 2);
+ out++;
+ *out = (inl >> 12) % (1U << 2);
+ out++;
+ *out = (inl >> 14) % (1U << 2);
+ out++;
+ *out = (inl >> 16) % (1U << 2);
+ out++;
+ *out = (inl >> 18) % (1U << 2);
+ out++;
+ *out = (inl >> 20) % (1U << 2);
+ out++;
+ *out = (inl >> 22) % (1U << 2);
+ out++;
+ *out = (inl >> 24) % (1U << 2);
+ out++;
+ *out = (inl >> 26) % (1U << 2);
+ out++;
+ *out = (inl >> 28) % (1U << 2);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 2);
+ out++;
+ *out = (inl >> 2) % (1U << 2);
+ out++;
+ *out = (inl >> 4) % (1U << 2);
+ out++;
+ *out = (inl >> 6) % (1U << 2);
+ out++;
+ *out = (inl >> 8) % (1U << 2);
+ out++;
+ *out = (inl >> 10) % (1U << 2);
+ out++;
+ *out = (inl >> 12) % (1U << 2);
+ out++;
+ *out = (inl >> 14) % (1U << 2);
+ out++;
+ *out = (inl >> 16) % (1U << 2);
+ out++;
+ *out = (inl >> 18) % (1U << 2);
+ out++;
+ *out = (inl >> 20) % (1U << 2);
+ out++;
+ *out = (inl >> 22) % (1U << 2);
+ out++;
+ *out = (inl >> 24) % (1U << 2);
+ out++;
+ *out = (inl >> 26) % (1U << 2);
+ out++;
+ *out = (inl >> 28) % (1U << 2);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 3);
+ out++;
+ *out = (inl >> 3) % (1U << 3);
+ out++;
+ *out = (inl >> 6) % (1U << 3);
+ out++;
+ *out = (inl >> 9) % (1U << 3);
+ out++;
+ *out = (inl >> 12) % (1U << 3);
+ out++;
+ *out = (inl >> 15) % (1U << 3);
+ out++;
+ *out = (inl >> 18) % (1U << 3);
+ out++;
+ *out = (inl >> 21) % (1U << 3);
+ out++;
+ *out = (inl >> 24) % (1U << 3);
+ out++;
+ *out = (inl >> 27) % (1U << 3);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (3 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 3);
+ out++;
+ *out = (inl >> 4) % (1U << 3);
+ out++;
+ *out = (inl >> 7) % (1U << 3);
+ out++;
+ *out = (inl >> 10) % (1U << 3);
+ out++;
+ *out = (inl >> 13) % (1U << 3);
+ out++;
+ *out = (inl >> 16) % (1U << 3);
+ out++;
+ *out = (inl >> 19) % (1U << 3);
+ out++;
+ *out = (inl >> 22) % (1U << 3);
+ out++;
+ *out = (inl >> 25) % (1U << 3);
+ out++;
+ *out = (inl >> 28) % (1U << 3);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (3 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 3);
+ out++;
+ *out = (inl >> 5) % (1U << 3);
+ out++;
+ *out = (inl >> 8) % (1U << 3);
+ out++;
+ *out = (inl >> 11) % (1U << 3);
+ out++;
+ *out = (inl >> 14) % (1U << 3);
+ out++;
+ *out = (inl >> 17) % (1U << 3);
+ out++;
+ *out = (inl >> 20) % (1U << 3);
+ out++;
+ *out = (inl >> 23) % (1U << 3);
+ out++;
+ *out = (inl >> 26) % (1U << 3);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 4);
+ out++;
+ *out = (inl >> 4) % (1U << 4);
+ out++;
+ *out = (inl >> 8) % (1U << 4);
+ out++;
+ *out = (inl >> 12) % (1U << 4);
+ out++;
+ *out = (inl >> 16) % (1U << 4);
+ out++;
+ *out = (inl >> 20) % (1U << 4);
+ out++;
+ *out = (inl >> 24) % (1U << 4);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 4);
+ out++;
+ *out = (inl >> 4) % (1U << 4);
+ out++;
+ *out = (inl >> 8) % (1U << 4);
+ out++;
+ *out = (inl >> 12) % (1U << 4);
+ out++;
+ *out = (inl >> 16) % (1U << 4);
+ out++;
+ *out = (inl >> 20) % (1U << 4);
+ out++;
+ *out = (inl >> 24) % (1U << 4);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 4);
+ out++;
+ *out = (inl >> 4) % (1U << 4);
+ out++;
+ *out = (inl >> 8) % (1U << 4);
+ out++;
+ *out = (inl >> 12) % (1U << 4);
+ out++;
+ *out = (inl >> 16) % (1U << 4);
+ out++;
+ *out = (inl >> 20) % (1U << 4);
+ out++;
+ *out = (inl >> 24) % (1U << 4);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 4);
+ out++;
+ *out = (inl >> 4) % (1U << 4);
+ out++;
+ *out = (inl >> 8) % (1U << 4);
+ out++;
+ *out = (inl >> 12) % (1U << 4);
+ out++;
+ *out = (inl >> 16) % (1U << 4);
+ out++;
+ *out = (inl >> 20) % (1U << 4);
+ out++;
+ *out = (inl >> 24) % (1U << 4);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 5);
+ out++;
+ *out = (inl >> 5) % (1U << 5);
+ out++;
+ *out = (inl >> 10) % (1U << 5);
+ out++;
+ *out = (inl >> 15) % (1U << 5);
+ out++;
+ *out = (inl >> 20) % (1U << 5);
+ out++;
+ *out = (inl >> 25) % (1U << 5);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (5 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 5);
+ out++;
+ *out = (inl >> 8) % (1U << 5);
+ out++;
+ *out = (inl >> 13) % (1U << 5);
+ out++;
+ *out = (inl >> 18) % (1U << 5);
+ out++;
+ *out = (inl >> 23) % (1U << 5);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (5 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 5);
+ out++;
+ *out = (inl >> 6) % (1U << 5);
+ out++;
+ *out = (inl >> 11) % (1U << 5);
+ out++;
+ *out = (inl >> 16) % (1U << 5);
+ out++;
+ *out = (inl >> 21) % (1U << 5);
+ out++;
+ *out = (inl >> 26) % (1U << 5);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (5 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 5);
+ out++;
+ *out = (inl >> 9) % (1U << 5);
+ out++;
+ *out = (inl >> 14) % (1U << 5);
+ out++;
+ *out = (inl >> 19) % (1U << 5);
+ out++;
+ *out = (inl >> 24) % (1U << 5);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (5 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 5);
+ out++;
+ *out = (inl >> 7) % (1U << 5);
+ out++;
+ *out = (inl >> 12) % (1U << 5);
+ out++;
+ *out = (inl >> 17) % (1U << 5);
+ out++;
+ *out = (inl >> 22) % (1U << 5);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 6);
+ out++;
+ *out = (inl >> 6) % (1U << 6);
+ out++;
+ *out = (inl >> 12) % (1U << 6);
+ out++;
+ *out = (inl >> 18) % (1U << 6);
+ out++;
+ *out = (inl >> 24) % (1U << 6);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (6 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 6);
+ out++;
+ *out = (inl >> 10) % (1U << 6);
+ out++;
+ *out = (inl >> 16) % (1U << 6);
+ out++;
+ *out = (inl >> 22) % (1U << 6);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (6 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 6);
+ out++;
+ *out = (inl >> 8) % (1U << 6);
+ out++;
+ *out = (inl >> 14) % (1U << 6);
+ out++;
+ *out = (inl >> 20) % (1U << 6);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 6);
+ out++;
+ *out = (inl >> 6) % (1U << 6);
+ out++;
+ *out = (inl >> 12) % (1U << 6);
+ out++;
+ *out = (inl >> 18) % (1U << 6);
+ out++;
+ *out = (inl >> 24) % (1U << 6);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (6 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 6);
+ out++;
+ *out = (inl >> 10) % (1U << 6);
+ out++;
+ *out = (inl >> 16) % (1U << 6);
+ out++;
+ *out = (inl >> 22) % (1U << 6);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (6 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 6);
+ out++;
+ *out = (inl >> 8) % (1U << 6);
+ out++;
+ *out = (inl >> 14) % (1U << 6);
+ out++;
+ *out = (inl >> 20) % (1U << 6);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 7);
+ out++;
+ *out = (inl >> 7) % (1U << 7);
+ out++;
+ *out = (inl >> 14) % (1U << 7);
+ out++;
+ *out = (inl >> 21) % (1U << 7);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (7 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 7);
+ out++;
+ *out = (inl >> 10) % (1U << 7);
+ out++;
+ *out = (inl >> 17) % (1U << 7);
+ out++;
+ *out = (inl >> 24) % (1U << 7);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (7 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 7);
+ out++;
+ *out = (inl >> 13) % (1U << 7);
+ out++;
+ *out = (inl >> 20) % (1U << 7);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (7 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 7);
+ out++;
+ *out = (inl >> 9) % (1U << 7);
+ out++;
+ *out = (inl >> 16) % (1U << 7);
+ out++;
+ *out = (inl >> 23) % (1U << 7);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (7 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 7);
+ out++;
+ *out = (inl >> 12) % (1U << 7);
+ out++;
+ *out = (inl >> 19) % (1U << 7);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (7 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 7);
+ out++;
+ *out = (inl >> 8) % (1U << 7);
+ out++;
+ *out = (inl >> 15) % (1U << 7);
+ out++;
+ *out = (inl >> 22) % (1U << 7);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (7 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 7);
+ out++;
+ *out = (inl >> 11) % (1U << 7);
+ out++;
+ *out = (inl >> 18) % (1U << 7);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 8);
+ out++;
+ *out = (inl >> 8) % (1U << 8);
+ out++;
+ *out = (inl >> 16) % (1U << 8);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 9);
+ out++;
+ *out = (inl >> 9) % (1U << 9);
+ out++;
+ *out = (inl >> 18) % (1U << 9);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (9 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 9);
+ out++;
+ *out = (inl >> 13) % (1U << 9);
+ out++;
+ *out = (inl >> 22) % (1U << 9);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (9 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 9);
+ out++;
+ *out = (inl >> 17) % (1U << 9);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (9 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 9);
+ out++;
+ *out = (inl >> 12) % (1U << 9);
+ out++;
+ *out = (inl >> 21) % (1U << 9);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (9 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 9);
+ out++;
+ *out = (inl >> 16) % (1U << 9);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (9 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 9);
+ out++;
+ *out = (inl >> 11) % (1U << 9);
+ out++;
+ *out = (inl >> 20) % (1U << 9);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (9 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 9);
+ out++;
+ *out = (inl >> 15) % (1U << 9);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (9 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 9);
+ out++;
+ *out = (inl >> 10) % (1U << 9);
+ out++;
+ *out = (inl >> 19) % (1U << 9);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (9 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 9);
+ out++;
+ *out = (inl >> 14) % (1U << 9);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 10);
+ out++;
+ *out = (inl >> 10) % (1U << 10);
+ out++;
+ *out = (inl >> 20) % (1U << 10);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (10 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 10);
+ out++;
+ *out = (inl >> 18) % (1U << 10);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (10 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 10);
+ out++;
+ *out = (inl >> 16) % (1U << 10);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (10 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 10);
+ out++;
+ *out = (inl >> 14) % (1U << 10);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (10 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 10);
+ out++;
+ *out = (inl >> 12) % (1U << 10);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 10);
+ out++;
+ *out = (inl >> 10) % (1U << 10);
+ out++;
+ *out = (inl >> 20) % (1U << 10);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (10 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 10);
+ out++;
+ *out = (inl >> 18) % (1U << 10);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (10 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 10);
+ out++;
+ *out = (inl >> 16) % (1U << 10);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (10 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 10);
+ out++;
+ *out = (inl >> 14) % (1U << 10);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (10 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 10);
+ out++;
+ *out = (inl >> 12) % (1U << 10);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 11);
+ out++;
+ *out = (inl >> 11) % (1U << 11);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (11 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 11);
+ out++;
+ *out = (inl >> 12) % (1U << 11);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (11 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 11);
+ out++;
+ *out = (inl >> 13) % (1U << 11);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (11 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 11);
+ out++;
+ *out = (inl >> 14) % (1U << 11);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (11 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 11);
+ out++;
+ *out = (inl >> 15) % (1U << 11);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (11 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 11);
+ out++;
+ *out = (inl >> 16) % (1U << 11);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (11 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 11);
+ out++;
+ *out = (inl >> 17) % (1U << 11);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (11 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 11);
+ out++;
+ *out = (inl >> 18) % (1U << 11);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (11 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 11);
+ out++;
+ *out = (inl >> 19) % (1U << 11);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (11 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 11);
+ out++;
+ *out = (inl >> 20) % (1U << 11);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (11 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 11);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 12);
+ out++;
+ *out = (inl >> 12) % (1U << 12);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (12 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 12);
+ out++;
+ *out = (inl >> 16) % (1U << 12);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (12 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 12);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 12);
+ out++;
+ *out = (inl >> 12) % (1U << 12);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (12 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 12);
+ out++;
+ *out = (inl >> 16) % (1U << 12);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (12 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 12);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 12);
+ out++;
+ *out = (inl >> 12) % (1U << 12);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (12 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 12);
+ out++;
+ *out = (inl >> 16) % (1U << 12);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (12 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 12);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 12);
+ out++;
+ *out = (inl >> 12) % (1U << 12);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (12 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 12);
+ out++;
+ *out = (inl >> 16) % (1U << 12);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (12 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 12);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 13);
+ out++;
+ *out = (inl >> 13) % (1U << 13);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (13 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 13);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (13 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 13);
+ out++;
+ *out = (inl >> 14) % (1U << 13);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (13 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 13);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (13 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 13);
+ out++;
+ *out = (inl >> 15) % (1U << 13);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (13 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 13);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (13 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 13);
+ out++;
+ *out = (inl >> 16) % (1U << 13);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (13 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 13);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (13 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 13);
+ out++;
+ *out = (inl >> 17) % (1U << 13);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (13 - 11);
+ out++;
+ *out = (inl >> 11) % (1U << 13);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (13 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 13);
+ out++;
+ *out = (inl >> 18) % (1U << 13);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (13 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 13);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (13 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 13);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 14);
+ out++;
+ *out = (inl >> 14) % (1U << 14);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (14 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 14);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (14 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 14);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (14 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 14);
+ out++;
+ *out = (inl >> 16) % (1U << 14);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (14 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 14);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (14 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 14);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (14 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 14);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 14);
+ out++;
+ *out = (inl >> 14) % (1U << 14);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (14 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 14);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (14 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 14);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (14 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 14);
+ out++;
+ *out = (inl >> 16) % (1U << 14);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (14 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 14);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (14 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 14);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (14 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 14);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 15);
+ out++;
+ *out = (inl >> 15) % (1U << 15);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (15 - 13);
+ out++;
+ *out = (inl >> 13) % (1U << 15);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (15 - 11);
+ out++;
+ *out = (inl >> 11) % (1U << 15);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (15 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 15);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (15 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 15);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (15 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 15);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (15 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 15);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (15 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 15);
+ out++;
+ *out = (inl >> 16) % (1U << 15);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (15 - 14);
+ out++;
+ *out = (inl >> 14) % (1U << 15);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (15 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 15);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (15 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 15);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (15 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 15);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (15 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 15);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (15 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 15);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (15 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 15);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (17 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 17);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (17 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 17);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (17 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 17);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (17 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 17);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (17 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 17);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (17 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 17);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (17 - 14);
+ out++;
+ *out = (inl >> 14) % (1U << 17);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (17 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (17 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 17);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (17 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 17);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (17 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 17);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (17 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 17);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (17 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 17);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (17 - 11);
+ out++;
+ *out = (inl >> 11) % (1U << 17);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (17 - 13);
+ out++;
+ *out = (inl >> 13) % (1U << 17);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (17 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (18 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 18);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (18 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 18);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (18 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 18);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (18 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (18 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 18);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (18 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 18);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (18 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 18);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (18 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (18 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 18);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (18 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 18);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (18 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 18);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (18 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (18 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 18);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (18 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 18);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (18 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 18);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (18 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (19 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 19);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (19 - 12);
+ out++;
+ *out = (inl >> 12) % (1U << 19);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (19 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (19 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 19);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (19 - 11);
+ out++;
+ *out = (inl >> 11) % (1U << 19);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (19 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (19 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 19);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (19 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 19);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (19 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (19 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 19);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (19 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 19);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (19 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (19 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 19);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (19 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 19);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (19 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (19 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 19);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (19 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 19);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (19 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (20 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 20);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (20 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (20 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 20);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (20 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (20 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 20);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (20 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (20 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 20);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (20 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (20 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 20);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (20 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (20 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 20);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (20 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (20 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 20);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (20 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (20 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 20);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (20 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (21 - 10);
+ out++;
+ *out = (inl >> 10) % (1U << 21);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (21 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (21 - 9);
+ out++;
+ *out = (inl >> 9) % (1U << 21);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (21 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (21 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 21);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (21 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (21 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 21);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (21 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (21 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 21);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (21 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (21 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 21);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (21 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (21 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 21);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (21 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (21 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 21);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (21 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (21 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 21);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (21 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (21 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 21);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (21 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (22 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (22 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 22);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (22 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (22 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 22);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (22 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (22 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 22);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (22 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (22 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 22);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (22 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (22 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (22 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (22 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 22);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (22 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (22 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 22);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (22 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (22 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 22);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (22 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (22 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 22);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (22 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (22 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 23);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (23 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (23 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 23);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (23 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (23 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (23 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 23);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (23 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (23 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 23);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (23 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (23 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (23 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 23);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (23 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (23 - 7);
+ out++;
+ *out = (inl >> 7) % (1U << 23);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 21)) << (23 - 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (23 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (23 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 23);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (23 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (23 - 8);
+ out++;
+ *out = (inl >> 8) % (1U << 23);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (23 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (23 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (23 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 23);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (23 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (23 - 9);
+ out++;
+ *out = (inl >> 9);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (24 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (24 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 25);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (25 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (25 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (25 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 25);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (25 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (25 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (25 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (25 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 25);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (25 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (25 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (25 - 5);
+ out++;
+ *out = (inl >> 5) % (1U << 25);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 23)) << (25 - 23);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (25 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (25 - 9);
+ out++;
+ *out = (inl >> 9);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (25 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 25);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (25 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (25 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (25 - 6);
+ out++;
+ *out = (inl >> 6) % (1U << 25);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (25 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (25 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (25 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (25 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 25);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 21)) << (25 - 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (25 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (25 - 7);
+ out++;
+ *out = (inl >> 7);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (26 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (26 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (26 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (26 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 26);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (26 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (26 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (26 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (26 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 26);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (26 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (26 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (26 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (26 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (26 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (26 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (26 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (26 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 26);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (26 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (26 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (26 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (26 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 26);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (26 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (26 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (26 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (26 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 27);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (27 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (27 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (27 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (27 - 7);
+ out++;
+ *out = (inl >> 7);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (27 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 27);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (27 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (27 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (27 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (27 - 9);
+ out++;
+ *out = (inl >> 9);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (27 - 4);
+ out++;
+ *out = (inl >> 4) % (1U << 27);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 26)) << (27 - 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 21)) << (27 - 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (27 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (27 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (27 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (27 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 27);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 23)) << (27 - 23);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (27 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (27 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (27 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (27 - 3);
+ out++;
+ *out = (inl >> 3) % (1U << 27);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 25)) << (27 - 25);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (27 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (27 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (27 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (27 - 5);
+ out++;
+ *out = (inl >> 5);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (28 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (28 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (28 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (28 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (28 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (28 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (28 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (28 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (28 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (28 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (28 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (28 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (28 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (28 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (28 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (28 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (28 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (28 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (28 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (28 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (28 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (28 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (28 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (28 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 29);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 26)) << (29 - 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 23)) << (29 - 23);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (29 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (29 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (29 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (29 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (29 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (29 - 5);
+ out++;
+ *out = (inl >> 5);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (29 - 2);
+ out++;
+ *out = (inl >> 2) % (1U << 29);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 28)) << (29 - 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 25)) << (29 - 25);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (29 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (29 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (29 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (29 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (29 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (29 - 7);
+ out++;
+ *out = (inl >> 7);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (29 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (29 - 1);
+ out++;
+ *out = (inl >> 1) % (1U << 29);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 27)) << (29 - 27);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (29 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 21)) << (29 - 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (29 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (29 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (29 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (29 - 9);
+ out++;
+ *out = (inl >> 9);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (29 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (29 - 3);
+ out++;
+ *out = (inl >> 3);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 30);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 28)) << (30 - 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 26)) << (30 - 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (30 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (30 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (30 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (30 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (30 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (30 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (30 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (30 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (30 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (30 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (30 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (30 - 2);
+ out++;
+ *out = (inl >> 2);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0) % (1U << 30);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 28)) << (30 - 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 26)) << (30 - 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (30 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (30 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (30 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (30 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (30 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (30 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (30 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (30 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (30 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (30 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (30 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (30 - 2);
+ out++;
+ *out = (inl >> 2);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0) % (1U << 31);
+ out++;
+ *out = (inl >> 31);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 30)) << (31 - 30);
+ out++;
+ *out = (inl >> 30);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 29)) << (31 - 29);
+ out++;
+ *out = (inl >> 29);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 28)) << (31 - 28);
+ out++;
+ *out = (inl >> 28);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 27)) << (31 - 27);
+ out++;
+ *out = (inl >> 27);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 26)) << (31 - 26);
+ out++;
+ *out = (inl >> 26);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 25)) << (31 - 25);
+ out++;
+ *out = (inl >> 25);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 24)) << (31 - 24);
+ out++;
+ *out = (inl >> 24);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 23)) << (31 - 23);
+ out++;
+ *out = (inl >> 23);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 22)) << (31 - 22);
+ out++;
+ *out = (inl >> 22);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 21)) << (31 - 21);
+ out++;
+ *out = (inl >> 21);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 20)) << (31 - 20);
+ out++;
+ *out = (inl >> 20);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 19)) << (31 - 19);
+ out++;
+ *out = (inl >> 19);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 18)) << (31 - 18);
+ out++;
+ *out = (inl >> 18);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 17)) << (31 - 17);
+ out++;
+ *out = (inl >> 17);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 16)) << (31 - 16);
+ out++;
+ *out = (inl >> 16);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 15)) << (31 - 15);
+ out++;
+ *out = (inl >> 15);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 14)) << (31 - 14);
+ out++;
+ *out = (inl >> 14);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 13)) << (31 - 13);
+ out++;
+ *out = (inl >> 13);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 12)) << (31 - 12);
+ out++;
+ *out = (inl >> 12);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 11)) << (31 - 11);
+ out++;
+ *out = (inl >> 11);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 10)) << (31 - 10);
+ out++;
+ *out = (inl >> 10);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 9)) << (31 - 9);
+ out++;
+ *out = (inl >> 9);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 8)) << (31 - 8);
+ out++;
+ *out = (inl >> 8);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 7)) << (31 - 7);
+ out++;
+ *out = (inl >> 7);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 6)) << (31 - 6);
+ out++;
+ *out = (inl >> 6);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 5)) << (31 - 5);
+ out++;
+ *out = (inl >> 5);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 4)) << (31 - 4);
+ out++;
+ *out = (inl >> 4);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 3)) << (31 - 3);
+ out++;
+ *out = (inl >> 3);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 2)) << (31 - 2);
+ out++;
+ *out = (inl >> 2);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out |= (inl % (1U << 1)) << (31 - 1);
+ out++;
+ *out = (inl >> 1);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+ uint32_t inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ inl = util::SafeLoad(in);
+ inl = arrow::BitUtil::FromLittleEndian(inl);
+ out++;
+ *out = (inl >> 0);
+ ++in;
+ out++;
+
+ return in;
+}
+
+inline const uint32_t* nullunpacker32(const uint32_t* in, uint32_t* out) {
+ for (int k = 0; k < 32; ++k) {
+ out[k] = 0;
+ }
+ return in;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/checked_cast.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/checked_cast.h
index 97f6b61a1f8..d009f2c015d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/checked_cast.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/checked_cast.h
@@ -1,61 +1,61 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-namespace arrow {
-namespace internal {
-
-template <typename OutputType, typename InputType>
-inline OutputType checked_cast(InputType&& value) {
- static_assert(std::is_class<typename std::remove_pointer<
- typename std::remove_reference<InputType>::type>::type>::value,
- "checked_cast input type must be a class");
- static_assert(std::is_class<typename std::remove_pointer<
- typename std::remove_reference<OutputType>::type>::type>::value,
- "checked_cast output type must be a class");
-#ifdef NDEBUG
- return static_cast<OutputType>(value);
-#else
- return dynamic_cast<OutputType>(value);
-#endif
-}
-
-template <class T, class U>
-std::shared_ptr<T> checked_pointer_cast(std::shared_ptr<U> r) noexcept {
-#ifdef NDEBUG
- return std::static_pointer_cast<T>(std::move(r));
-#else
- return std::dynamic_pointer_cast<T>(std::move(r));
-#endif
-}
-
-template <class T, class U>
-std::unique_ptr<T> checked_pointer_cast(std::unique_ptr<U> r) noexcept {
-#ifdef NDEBUG
- return std::unique_ptr<T>(static_cast<T*>(r.release()));
-#else
- return std::unique_ptr<T>(dynamic_cast<T*>(r.release()));
-#endif
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace arrow {
+namespace internal {
+
+template <typename OutputType, typename InputType>
+inline OutputType checked_cast(InputType&& value) {
+ static_assert(std::is_class<typename std::remove_pointer<
+ typename std::remove_reference<InputType>::type>::type>::value,
+ "checked_cast input type must be a class");
+ static_assert(std::is_class<typename std::remove_pointer<
+ typename std::remove_reference<OutputType>::type>::type>::value,
+ "checked_cast output type must be a class");
+#ifdef NDEBUG
+ return static_cast<OutputType>(value);
+#else
+ return dynamic_cast<OutputType>(value);
+#endif
+}
+
+template <class T, class U>
+std::shared_ptr<T> checked_pointer_cast(std::shared_ptr<U> r) noexcept {
+#ifdef NDEBUG
+ return std::static_pointer_cast<T>(std::move(r));
+#else
+ return std::dynamic_pointer_cast<T>(std::move(r));
+#endif
+}
+
+template <class T, class U>
+std::unique_ptr<T> checked_pointer_cast(std::unique_ptr<U> r) noexcept {
+#ifdef NDEBUG
+ return std::unique_ptr<T>(static_cast<T*>(r.release()));
+#else
+ return std::unique_ptr<T>(dynamic_cast<T*>(r.release()));
+#endif
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compare.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/compare.h
index 6477bf139f5..34f511d71ab 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compare.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compare.h
@@ -1,62 +1,62 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-
-/// CRTP helper for declaring equality comparison. Defines operator== and operator!=
-template <typename T>
-class EqualityComparable {
- public:
- ~EqualityComparable() {
- static_assert(
- std::is_same<decltype(std::declval<const T>().Equals(std::declval<const T>())),
- bool>::value,
- "EqualityComparable depends on the method T::Equals(const T&) const");
- }
-
- template <typename... Extra>
- bool Equals(const std::shared_ptr<T>& other, Extra&&... extra) const {
- if (other == NULLPTR) {
- return false;
- }
- return cast().Equals(*other, std::forward<Extra>(extra)...);
- }
-
- struct PtrsEqual {
- bool operator()(const std::shared_ptr<T>& l, const std::shared_ptr<T>& r) const {
- return l->Equals(r);
- }
- };
-
- bool operator==(const T& other) const { return cast().Equals(other); }
- bool operator!=(const T& other) const { return !(cast() == other); }
-
- private:
- const T& cast() const { return static_cast<const T&>(*this); }
-};
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+/// CRTP helper for declaring equality comparison. Defines operator== and operator!=
+template <typename T>
+class EqualityComparable {
+ public:
+ ~EqualityComparable() {
+ static_assert(
+ std::is_same<decltype(std::declval<const T>().Equals(std::declval<const T>())),
+ bool>::value,
+ "EqualityComparable depends on the method T::Equals(const T&) const");
+ }
+
+ template <typename... Extra>
+ bool Equals(const std::shared_ptr<T>& other, Extra&&... extra) const {
+ if (other == NULLPTR) {
+ return false;
+ }
+ return cast().Equals(*other, std::forward<Extra>(extra)...);
+ }
+
+ struct PtrsEqual {
+ bool operator()(const std::shared_ptr<T>& l, const std::shared_ptr<T>& r) const {
+ return l->Equals(r);
+ }
+ };
+
+ bool operator==(const T& other) const { return cast().Equals(other); }
+ bool operator!=(const T& other) const { return !(cast() == other); }
+
+ private:
+ const T& cast() const { return static_cast<const T&>(*this); }
+};
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
index 8db199b4e76..66274f0cadf 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
@@ -1,34 +1,34 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression.h"
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/compression_internal.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace util {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/compression_internal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace util {
+
namespace {
Status CheckSupportsCompressionLevel(Compression::type type) {
@@ -41,80 +41,80 @@ Status CheckSupportsCompressionLevel(Compression::type type) {
} // namespace
-int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel; }
-
-Status Codec::Init() { return Status::OK(); }
-
-const std::string& Codec::GetCodecAsString(Compression::type t) {
- static const std::string uncompressed = "uncompressed", snappy = "snappy",
- gzip = "gzip", lzo = "lzo", brotli = "brotli",
- lz4_raw = "lz4_raw", lz4 = "lz4", lz4_hadoop = "lz4_hadoop",
- zstd = "zstd", bz2 = "bz2", unknown = "unknown";
-
- switch (t) {
- case Compression::UNCOMPRESSED:
- return uncompressed;
- case Compression::SNAPPY:
- return snappy;
- case Compression::GZIP:
- return gzip;
- case Compression::LZO:
- return lzo;
- case Compression::BROTLI:
- return brotli;
- case Compression::LZ4:
- return lz4_raw;
- case Compression::LZ4_FRAME:
- return lz4;
- case Compression::LZ4_HADOOP:
- return lz4_hadoop;
- case Compression::ZSTD:
- return zstd;
- case Compression::BZ2:
- return bz2;
- default:
- return unknown;
- }
-}
-
-Result<Compression::type> Codec::GetCompressionType(const std::string& name) {
- if (name == "uncompressed") {
- return Compression::UNCOMPRESSED;
- } else if (name == "gzip") {
- return Compression::GZIP;
- } else if (name == "snappy") {
- return Compression::SNAPPY;
- } else if (name == "lzo") {
- return Compression::LZO;
- } else if (name == "brotli") {
- return Compression::BROTLI;
- } else if (name == "lz4_raw") {
- return Compression::LZ4;
- } else if (name == "lz4") {
- return Compression::LZ4_FRAME;
- } else if (name == "lz4_hadoop") {
- return Compression::LZ4_HADOOP;
- } else if (name == "zstd") {
- return Compression::ZSTD;
- } else if (name == "bz2") {
- return Compression::BZ2;
- } else {
- return Status::Invalid("Unrecognized compression type: ", name);
- }
-}
-
-bool Codec::SupportsCompressionLevel(Compression::type codec) {
- switch (codec) {
- case Compression::GZIP:
- case Compression::BROTLI:
- case Compression::ZSTD:
- case Compression::BZ2:
- return true;
- default:
- return false;
- }
-}
-
+int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel; }
+
+Status Codec::Init() { return Status::OK(); }
+
+const std::string& Codec::GetCodecAsString(Compression::type t) {
+ static const std::string uncompressed = "uncompressed", snappy = "snappy",
+ gzip = "gzip", lzo = "lzo", brotli = "brotli",
+ lz4_raw = "lz4_raw", lz4 = "lz4", lz4_hadoop = "lz4_hadoop",
+ zstd = "zstd", bz2 = "bz2", unknown = "unknown";
+
+ switch (t) {
+ case Compression::UNCOMPRESSED:
+ return uncompressed;
+ case Compression::SNAPPY:
+ return snappy;
+ case Compression::GZIP:
+ return gzip;
+ case Compression::LZO:
+ return lzo;
+ case Compression::BROTLI:
+ return brotli;
+ case Compression::LZ4:
+ return lz4_raw;
+ case Compression::LZ4_FRAME:
+ return lz4;
+ case Compression::LZ4_HADOOP:
+ return lz4_hadoop;
+ case Compression::ZSTD:
+ return zstd;
+ case Compression::BZ2:
+ return bz2;
+ default:
+ return unknown;
+ }
+}
+
+Result<Compression::type> Codec::GetCompressionType(const std::string& name) {
+ if (name == "uncompressed") {
+ return Compression::UNCOMPRESSED;
+ } else if (name == "gzip") {
+ return Compression::GZIP;
+ } else if (name == "snappy") {
+ return Compression::SNAPPY;
+ } else if (name == "lzo") {
+ return Compression::LZO;
+ } else if (name == "brotli") {
+ return Compression::BROTLI;
+ } else if (name == "lz4_raw") {
+ return Compression::LZ4;
+ } else if (name == "lz4") {
+ return Compression::LZ4_FRAME;
+ } else if (name == "lz4_hadoop") {
+ return Compression::LZ4_HADOOP;
+ } else if (name == "zstd") {
+ return Compression::ZSTD;
+ } else if (name == "bz2") {
+ return Compression::BZ2;
+ } else {
+ return Status::Invalid("Unrecognized compression type: ", name);
+ }
+}
+
+bool Codec::SupportsCompressionLevel(Compression::type codec) {
+ switch (codec) {
+ case Compression::GZIP:
+ case Compression::BROTLI:
+ case Compression::ZSTD:
+ case Compression::BZ2:
+ return true;
+ default:
+ return false;
+ }
+}
+
Result<int> Codec::MaximumCompressionLevel(Compression::type codec_type) {
RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
@@ -133,129 +133,129 @@ Result<int> Codec::DefaultCompressionLevel(Compression::type codec_type) {
return codec->default_compression_level();
}
-Result<std::unique_ptr<Codec>> Codec::Create(Compression::type codec_type,
- int compression_level) {
- if (!IsAvailable(codec_type)) {
- if (codec_type == Compression::LZO) {
- return Status::NotImplemented("LZO codec not implemented");
- }
-
- auto name = GetCodecAsString(codec_type);
- if (name == "unknown") {
- return Status::Invalid("Unrecognized codec");
- }
-
- return Status::NotImplemented("Support for codec '", GetCodecAsString(codec_type),
- "' not built");
- }
-
- if (compression_level != kUseDefaultCompressionLevel &&
- !SupportsCompressionLevel(codec_type)) {
- return Status::Invalid("Codec '", GetCodecAsString(codec_type),
- "' doesn't support setting a compression level.");
- }
-
- std::unique_ptr<Codec> codec;
- switch (codec_type) {
- case Compression::UNCOMPRESSED:
- return nullptr;
- case Compression::SNAPPY:
-#ifdef ARROW_WITH_SNAPPY
- codec = internal::MakeSnappyCodec();
-#endif
- break;
- case Compression::GZIP:
-#ifdef ARROW_WITH_ZLIB
- codec = internal::MakeGZipCodec(compression_level);
-#endif
- break;
- case Compression::BROTLI:
-#ifdef ARROW_WITH_BROTLI
- codec = internal::MakeBrotliCodec(compression_level);
-#endif
- break;
- case Compression::LZ4:
-#ifdef ARROW_WITH_LZ4
- codec = internal::MakeLz4RawCodec();
-#endif
- break;
- case Compression::LZ4_FRAME:
-#ifdef ARROW_WITH_LZ4
- codec = internal::MakeLz4FrameCodec();
-#endif
- break;
- case Compression::LZ4_HADOOP:
-#ifdef ARROW_WITH_LZ4
- codec = internal::MakeLz4HadoopRawCodec();
-#endif
- break;
- case Compression::ZSTD:
-#ifdef ARROW_WITH_ZSTD
- codec = internal::MakeZSTDCodec(compression_level);
-#endif
- break;
- case Compression::BZ2:
-#ifdef ARROW_WITH_BZ2
- codec = internal::MakeBZ2Codec(compression_level);
-#endif
- break;
- default:
- break;
- }
-
- DCHECK_NE(codec, nullptr);
- RETURN_NOT_OK(codec->Init());
- return std::move(codec);
-}
-
-bool Codec::IsAvailable(Compression::type codec_type) {
- switch (codec_type) {
- case Compression::UNCOMPRESSED:
- return true;
- case Compression::SNAPPY:
-#ifdef ARROW_WITH_SNAPPY
- return true;
-#else
- return false;
-#endif
- case Compression::GZIP:
-#ifdef ARROW_WITH_ZLIB
- return true;
-#else
- return false;
-#endif
- case Compression::LZO:
- return false;
- case Compression::BROTLI:
-#ifdef ARROW_WITH_BROTLI
- return true;
-#else
- return false;
-#endif
- case Compression::LZ4:
- case Compression::LZ4_FRAME:
- case Compression::LZ4_HADOOP:
-#ifdef ARROW_WITH_LZ4
- return true;
-#else
- return false;
-#endif
- case Compression::ZSTD:
-#ifdef ARROW_WITH_ZSTD
- return true;
-#else
- return false;
-#endif
- case Compression::BZ2:
-#ifdef ARROW_WITH_BZ2
- return true;
-#else
- return false;
-#endif
- default:
- return false;
- }
-}
-
-} // namespace util
-} // namespace arrow
+Result<std::unique_ptr<Codec>> Codec::Create(Compression::type codec_type,
+ int compression_level) {
+ if (!IsAvailable(codec_type)) {
+ if (codec_type == Compression::LZO) {
+ return Status::NotImplemented("LZO codec not implemented");
+ }
+
+ auto name = GetCodecAsString(codec_type);
+ if (name == "unknown") {
+ return Status::Invalid("Unrecognized codec");
+ }
+
+ return Status::NotImplemented("Support for codec '", GetCodecAsString(codec_type),
+ "' not built");
+ }
+
+ if (compression_level != kUseDefaultCompressionLevel &&
+ !SupportsCompressionLevel(codec_type)) {
+ return Status::Invalid("Codec '", GetCodecAsString(codec_type),
+ "' doesn't support setting a compression level.");
+ }
+
+ std::unique_ptr<Codec> codec;
+ switch (codec_type) {
+ case Compression::UNCOMPRESSED:
+ return nullptr;
+ case Compression::SNAPPY:
+#ifdef ARROW_WITH_SNAPPY
+ codec = internal::MakeSnappyCodec();
+#endif
+ break;
+ case Compression::GZIP:
+#ifdef ARROW_WITH_ZLIB
+ codec = internal::MakeGZipCodec(compression_level);
+#endif
+ break;
+ case Compression::BROTLI:
+#ifdef ARROW_WITH_BROTLI
+ codec = internal::MakeBrotliCodec(compression_level);
+#endif
+ break;
+ case Compression::LZ4:
+#ifdef ARROW_WITH_LZ4
+ codec = internal::MakeLz4RawCodec();
+#endif
+ break;
+ case Compression::LZ4_FRAME:
+#ifdef ARROW_WITH_LZ4
+ codec = internal::MakeLz4FrameCodec();
+#endif
+ break;
+ case Compression::LZ4_HADOOP:
+#ifdef ARROW_WITH_LZ4
+ codec = internal::MakeLz4HadoopRawCodec();
+#endif
+ break;
+ case Compression::ZSTD:
+#ifdef ARROW_WITH_ZSTD
+ codec = internal::MakeZSTDCodec(compression_level);
+#endif
+ break;
+ case Compression::BZ2:
+#ifdef ARROW_WITH_BZ2
+ codec = internal::MakeBZ2Codec(compression_level);
+#endif
+ break;
+ default:
+ break;
+ }
+
+ DCHECK_NE(codec, nullptr);
+ RETURN_NOT_OK(codec->Init());
+ return std::move(codec);
+}
+
+bool Codec::IsAvailable(Compression::type codec_type) {
+ switch (codec_type) {
+ case Compression::UNCOMPRESSED:
+ return true;
+ case Compression::SNAPPY:
+#ifdef ARROW_WITH_SNAPPY
+ return true;
+#else
+ return false;
+#endif
+ case Compression::GZIP:
+#ifdef ARROW_WITH_ZLIB
+ return true;
+#else
+ return false;
+#endif
+ case Compression::LZO:
+ return false;
+ case Compression::BROTLI:
+#ifdef ARROW_WITH_BROTLI
+ return true;
+#else
+ return false;
+#endif
+ case Compression::LZ4:
+ case Compression::LZ4_FRAME:
+ case Compression::LZ4_HADOOP:
+#ifdef ARROW_WITH_LZ4
+ return true;
+#else
+ return false;
+#endif
+ case Compression::ZSTD:
+#ifdef ARROW_WITH_ZSTD
+ return true;
+#else
+ return false;
+#endif
+ case Compression::BZ2:
+#ifdef ARROW_WITH_BZ2
+ return true;
+#else
+ return false;
+#endif
+ default:
+ return false;
+ }
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
index 0832e82a606..b9673ef7a4a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
@@ -1,137 +1,137 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <string>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
constexpr int kUseDefaultCompressionLevel = std::numeric_limits<int>::min();
-
-/// \brief Streaming compressor interface
-///
-class ARROW_EXPORT Compressor {
- public:
- virtual ~Compressor() = default;
-
- struct CompressResult {
- int64_t bytes_read;
- int64_t bytes_written;
- };
- struct FlushResult {
- int64_t bytes_written;
- bool should_retry;
- };
- struct EndResult {
- int64_t bytes_written;
- bool should_retry;
- };
-
- /// \brief Compress some input.
- ///
- /// If bytes_read is 0 on return, then a larger output buffer should be supplied.
- virtual Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) = 0;
-
- /// \brief Flush part of the compressed output.
- ///
- /// If should_retry is true on return, Flush() should be called again
- /// with a larger buffer.
- virtual Result<FlushResult> Flush(int64_t output_len, uint8_t* output) = 0;
-
- /// \brief End compressing, doing whatever is necessary to end the stream.
- ///
- /// If should_retry is true on return, End() should be called again
- /// with a larger buffer. Otherwise, the Compressor should not be used anymore.
- ///
- /// End() implies Flush().
- virtual Result<EndResult> End(int64_t output_len, uint8_t* output) = 0;
-
- // XXX add methods for buffer size heuristics?
-};
-
-/// \brief Streaming decompressor interface
-///
-class ARROW_EXPORT Decompressor {
- public:
- virtual ~Decompressor() = default;
-
- struct DecompressResult {
- // XXX is need_more_output necessary? (Brotli?)
- int64_t bytes_read;
- int64_t bytes_written;
- bool need_more_output;
- };
-
- /// \brief Decompress some input.
- ///
- /// If need_more_output is true on return, a larger output buffer needs
- /// to be supplied.
- virtual Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) = 0;
-
- /// \brief Return whether the compressed stream is finished.
- ///
- /// This is a heuristic. If true is returned, then it is guaranteed
- /// that the stream is finished. If false is returned, however, it may
- /// simply be that the underlying library isn't able to provide the information.
- virtual bool IsFinished() = 0;
-
- /// \brief Reinitialize decompressor, making it ready for a new compressed stream.
- virtual Status Reset() = 0;
-
- // XXX add methods for buffer size heuristics?
-};
-
-/// \brief Compression codec
-class ARROW_EXPORT Codec {
- public:
- virtual ~Codec() = default;
-
- /// \brief Return special value to indicate that a codec implementation
- /// should use its default compression level
- static int UseDefaultCompressionLevel();
-
- /// \brief Return a string name for compression type
- static const std::string& GetCodecAsString(Compression::type t);
-
- /// \brief Return compression type for name (all upper case)
- static Result<Compression::type> GetCompressionType(const std::string& name);
-
- /// \brief Create a codec for the given compression algorithm
- static Result<std::unique_ptr<Codec>> Create(
- Compression::type codec, int compression_level = kUseDefaultCompressionLevel);
-
- /// \brief Return true if support for indicated codec has been enabled
- static bool IsAvailable(Compression::type codec);
-
- /// \brief Return true if indicated codec supports setting a compression level
- static bool SupportsCompressionLevel(Compression::type codec);
-
+
+/// \brief Streaming compressor interface
+///
+class ARROW_EXPORT Compressor {
+ public:
+ virtual ~Compressor() = default;
+
+ struct CompressResult {
+ int64_t bytes_read;
+ int64_t bytes_written;
+ };
+ struct FlushResult {
+ int64_t bytes_written;
+ bool should_retry;
+ };
+ struct EndResult {
+ int64_t bytes_written;
+ bool should_retry;
+ };
+
+ /// \brief Compress some input.
+ ///
+ /// If bytes_read is 0 on return, then a larger output buffer should be supplied.
+ virtual Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) = 0;
+
+ /// \brief Flush part of the compressed output.
+ ///
+ /// If should_retry is true on return, Flush() should be called again
+ /// with a larger buffer.
+ virtual Result<FlushResult> Flush(int64_t output_len, uint8_t* output) = 0;
+
+ /// \brief End compressing, doing whatever is necessary to end the stream.
+ ///
+ /// If should_retry is true on return, End() should be called again
+ /// with a larger buffer. Otherwise, the Compressor should not be used anymore.
+ ///
+ /// End() implies Flush().
+ virtual Result<EndResult> End(int64_t output_len, uint8_t* output) = 0;
+
+ // XXX add methods for buffer size heuristics?
+};
+
+/// \brief Streaming decompressor interface
+///
+class ARROW_EXPORT Decompressor {
+ public:
+ virtual ~Decompressor() = default;
+
+ struct DecompressResult {
+ // XXX is need_more_output necessary? (Brotli?)
+ int64_t bytes_read;
+ int64_t bytes_written;
+ bool need_more_output;
+ };
+
+ /// \brief Decompress some input.
+ ///
+ /// If need_more_output is true on return, a larger output buffer needs
+ /// to be supplied.
+ virtual Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) = 0;
+
+ /// \brief Return whether the compressed stream is finished.
+ ///
+ /// This is a heuristic. If true is returned, then it is guaranteed
+ /// that the stream is finished. If false is returned, however, it may
+ /// simply be that the underlying library isn't able to provide the information.
+ virtual bool IsFinished() = 0;
+
+ /// \brief Reinitialize decompressor, making it ready for a new compressed stream.
+ virtual Status Reset() = 0;
+
+ // XXX add methods for buffer size heuristics?
+};
+
+/// \brief Compression codec
+class ARROW_EXPORT Codec {
+ public:
+ virtual ~Codec() = default;
+
+ /// \brief Return special value to indicate that a codec implementation
+ /// should use its default compression level
+ static int UseDefaultCompressionLevel();
+
+ /// \brief Return a string name for compression type
+ static const std::string& GetCodecAsString(Compression::type t);
+
+ /// \brief Return compression type for name (all upper case)
+ static Result<Compression::type> GetCompressionType(const std::string& name);
+
+ /// \brief Create a codec for the given compression algorithm
+ static Result<std::unique_ptr<Codec>> Create(
+ Compression::type codec, int compression_level = kUseDefaultCompressionLevel);
+
+ /// \brief Return true if support for indicated codec has been enabled
+ static bool IsAvailable(Compression::type codec);
+
+ /// \brief Return true if indicated codec supports setting a compression level
+ static bool SupportsCompressionLevel(Compression::type codec);
+
/// \brief Return the smallest supported compression level for the codec
/// Note: This function creates a temporary Codec instance
static Result<int> MinimumCompressionLevel(Compression::type codec);
@@ -153,50 +153,50 @@ class ARROW_EXPORT Codec {
/// \brief Return the default compression level
virtual int default_compression_level() const = 0;
- /// \brief One-shot decompression function
- ///
- /// output_buffer_len must be correct and therefore be obtained in advance.
- /// The actual decompressed length is returned.
- ///
- /// \note One-shot decompression is not always compatible with streaming
- /// compression. Depending on the codec (e.g. LZ4), different formats may
- /// be used.
- virtual Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len,
- uint8_t* output_buffer) = 0;
-
- /// \brief One-shot compression function
- ///
- /// output_buffer_len must first have been computed using MaxCompressedLen().
- /// The actual compressed length is returned.
- ///
- /// \note One-shot compression is not always compatible with streaming
- /// decompression. Depending on the codec (e.g. LZ4), different formats may
- /// be used.
- virtual Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) = 0;
-
- virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
-
- /// \brief Create a streaming compressor instance
- virtual Result<std::shared_ptr<Compressor>> MakeCompressor() = 0;
-
- /// \brief Create a streaming compressor instance
- virtual Result<std::shared_ptr<Decompressor>> MakeDecompressor() = 0;
-
- /// \brief This Codec's compression type
- virtual Compression::type compression_type() const = 0;
-
- /// \brief The name of this Codec's compression type
- const std::string& name() const { return GetCodecAsString(compression_type()); }
-
- /// \brief This Codec's compression level, if applicable
- virtual int compression_level() const { return UseDefaultCompressionLevel(); }
-
- private:
- /// \brief Initializes the codec's resources.
- virtual Status Init();
-};
-
-} // namespace util
-} // namespace arrow
+ /// \brief One-shot decompression function
+ ///
+ /// output_buffer_len must be correct and therefore be obtained in advance.
+ /// The actual decompressed length is returned.
+ ///
+ /// \note One-shot decompression is not always compatible with streaming
+ /// compression. Depending on the codec (e.g. LZ4), different formats may
+ /// be used.
+ virtual Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len,
+ uint8_t* output_buffer) = 0;
+
+ /// \brief One-shot compression function
+ ///
+ /// output_buffer_len must first have been computed using MaxCompressedLen().
+ /// The actual compressed length is returned.
+ ///
+ /// \note One-shot compression is not always compatible with streaming
+ /// decompression. Depending on the codec (e.g. LZ4), different formats may
+ /// be used.
+ virtual Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) = 0;
+
+ virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
+
+ /// \brief Create a streaming compressor instance
+ virtual Result<std::shared_ptr<Compressor>> MakeCompressor() = 0;
+
+ /// \brief Create a streaming compressor instance
+ virtual Result<std::shared_ptr<Decompressor>> MakeDecompressor() = 0;
+
+ /// \brief This Codec's compression type
+ virtual Compression::type compression_type() const = 0;
+
+ /// \brief The name of this Codec's compression type
+ const std::string& name() const { return GetCodecAsString(compression_type()); }
+
+ /// \brief This Codec's compression level, if applicable
+ virtual int compression_level() const { return UseDefaultCompressionLevel(); }
+
+ private:
+ /// \brief Initializes the codec's resources.
+ virtual Status Init();
+};
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
index cb547c2c8cf..de1af442ce6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
@@ -1,245 +1,245 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression_internal.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-#include <brotli/decode.h>
-#include <brotli/encode.h>
-#include <brotli/types.h>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-namespace internal {
-
-namespace {
-
-class BrotliDecompressor : public Decompressor {
- public:
- ~BrotliDecompressor() override {
- if (state_ != nullptr) {
- BrotliDecoderDestroyInstance(state_);
- }
- }
-
- Status Init() {
- state_ = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
- if (state_ == nullptr) {
- return BrotliError("Brotli init failed");
- }
- return Status::OK();
- }
-
- Status Reset() override {
- if (state_ != nullptr) {
- BrotliDecoderDestroyInstance(state_);
- }
- return Init();
- }
-
- Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- auto avail_in = static_cast<size_t>(input_len);
- auto avail_out = static_cast<size_t>(output_len);
- BrotliDecoderResult ret;
-
- ret = BrotliDecoderDecompressStream(state_, &avail_in, &input, &avail_out, &output,
- nullptr /* total_out */);
- if (ret == BROTLI_DECODER_RESULT_ERROR) {
- return BrotliError(BrotliDecoderGetErrorCode(state_), "Brotli decompress failed: ");
- }
- return DecompressResult{static_cast<int64_t>(input_len - avail_in),
- static_cast<int64_t>(output_len - avail_out),
- (ret == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT)};
- }
-
- bool IsFinished() override { return BrotliDecoderIsFinished(state_); }
-
- protected:
- Status BrotliError(const char* msg) { return Status::IOError(msg); }
-
- Status BrotliError(BrotliDecoderErrorCode code, const char* prefix_msg) {
- return Status::IOError(prefix_msg, BrotliDecoderErrorString(code));
- }
-
- BrotliDecoderState* state_ = nullptr;
-};
-
-// ----------------------------------------------------------------------
-// Brotli compressor implementation
-
-class BrotliCompressor : public Compressor {
- public:
- explicit BrotliCompressor(int compression_level)
- : compression_level_(compression_level) {}
-
- ~BrotliCompressor() override {
- if (state_ != nullptr) {
- BrotliEncoderDestroyInstance(state_);
- }
- }
-
- Status Init() {
- state_ = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
- if (state_ == nullptr) {
- return BrotliError("Brotli init failed");
- }
- if (!BrotliEncoderSetParameter(state_, BROTLI_PARAM_QUALITY, compression_level_)) {
- return BrotliError("Brotli set compression level failed");
- }
- return Status::OK();
- }
-
- Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- auto avail_in = static_cast<size_t>(input_len);
- auto avail_out = static_cast<size_t>(output_len);
- BROTLI_BOOL ret;
-
- ret = BrotliEncoderCompressStream(state_, BROTLI_OPERATION_PROCESS, &avail_in, &input,
- &avail_out, &output, nullptr /* total_out */);
- if (!ret) {
- return BrotliError("Brotli compress failed");
- }
- return CompressResult{static_cast<int64_t>(input_len - avail_in),
- static_cast<int64_t>(output_len - avail_out)};
- }
-
- Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
- size_t avail_in = 0;
- const uint8_t* next_in = nullptr;
- auto avail_out = static_cast<size_t>(output_len);
- BROTLI_BOOL ret;
-
- ret = BrotliEncoderCompressStream(state_, BROTLI_OPERATION_FLUSH, &avail_in, &next_in,
- &avail_out, &output, nullptr /* total_out */);
- if (!ret) {
- return BrotliError("Brotli flush failed");
- }
- return FlushResult{static_cast<int64_t>(output_len - avail_out),
- !!BrotliEncoderHasMoreOutput(state_)};
- }
-
- Result<EndResult> End(int64_t output_len, uint8_t* output) override {
- size_t avail_in = 0;
- const uint8_t* next_in = nullptr;
- auto avail_out = static_cast<size_t>(output_len);
- BROTLI_BOOL ret;
-
- ret =
- BrotliEncoderCompressStream(state_, BROTLI_OPERATION_FINISH, &avail_in, &next_in,
- &avail_out, &output, nullptr /* total_out */);
- if (!ret) {
- return BrotliError("Brotli end failed");
- }
- bool should_retry = !!BrotliEncoderHasMoreOutput(state_);
- DCHECK_EQ(should_retry, !BrotliEncoderIsFinished(state_));
- return EndResult{static_cast<int64_t>(output_len - avail_out), should_retry};
- }
-
- protected:
- Status BrotliError(const char* msg) { return Status::IOError(msg); }
-
- BrotliEncoderState* state_ = nullptr;
-
- private:
- const int compression_level_;
-};
-
-// ----------------------------------------------------------------------
-// Brotli codec implementation
-
-class BrotliCodec : public Codec {
- public:
- explicit BrotliCodec(int compression_level)
- : compression_level_(compression_level == kUseDefaultCompressionLevel
- ? kBrotliDefaultCompressionLevel
- : compression_level) {}
-
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- DCHECK_GE(input_len, 0);
- DCHECK_GE(output_buffer_len, 0);
- std::size_t output_size = static_cast<size_t>(output_buffer_len);
- if (BrotliDecoderDecompress(static_cast<size_t>(input_len), input, &output_size,
- output_buffer) != BROTLI_DECODER_RESULT_SUCCESS) {
- return Status::IOError("Corrupt brotli compressed data.");
- }
- return output_size;
- }
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- DCHECK_GE(input_len, 0);
- return BrotliEncoderMaxCompressedSize(static_cast<size_t>(input_len));
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- DCHECK_GE(input_len, 0);
- DCHECK_GE(output_buffer_len, 0);
- std::size_t output_size = static_cast<size_t>(output_buffer_len);
- if (BrotliEncoderCompress(compression_level_, BROTLI_DEFAULT_WINDOW,
- BROTLI_DEFAULT_MODE, static_cast<size_t>(input_len), input,
- &output_size, output_buffer) == BROTLI_FALSE) {
- return Status::IOError("Brotli compression failure.");
- }
- return output_size;
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- auto ptr = std::make_shared<BrotliCompressor>(compression_level_);
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- auto ptr = std::make_shared<BrotliDecompressor>();
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Compression::type compression_type() const override { return Compression::BROTLI; }
-
- int compression_level() const override { return compression_level_; }
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression_internal.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#include <brotli/types.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+namespace {
+
+class BrotliDecompressor : public Decompressor {
+ public:
+ ~BrotliDecompressor() override {
+ if (state_ != nullptr) {
+ BrotliDecoderDestroyInstance(state_);
+ }
+ }
+
+ Status Init() {
+ state_ = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+ if (state_ == nullptr) {
+ return BrotliError("Brotli init failed");
+ }
+ return Status::OK();
+ }
+
+ Status Reset() override {
+ if (state_ != nullptr) {
+ BrotliDecoderDestroyInstance(state_);
+ }
+ return Init();
+ }
+
+ Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ auto avail_in = static_cast<size_t>(input_len);
+ auto avail_out = static_cast<size_t>(output_len);
+ BrotliDecoderResult ret;
+
+ ret = BrotliDecoderDecompressStream(state_, &avail_in, &input, &avail_out, &output,
+ nullptr /* total_out */);
+ if (ret == BROTLI_DECODER_RESULT_ERROR) {
+ return BrotliError(BrotliDecoderGetErrorCode(state_), "Brotli decompress failed: ");
+ }
+ return DecompressResult{static_cast<int64_t>(input_len - avail_in),
+ static_cast<int64_t>(output_len - avail_out),
+ (ret == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT)};
+ }
+
+ bool IsFinished() override { return BrotliDecoderIsFinished(state_); }
+
+ protected:
+ Status BrotliError(const char* msg) { return Status::IOError(msg); }
+
+ Status BrotliError(BrotliDecoderErrorCode code, const char* prefix_msg) {
+ return Status::IOError(prefix_msg, BrotliDecoderErrorString(code));
+ }
+
+ BrotliDecoderState* state_ = nullptr;
+};
+
+// ----------------------------------------------------------------------
+// Brotli compressor implementation
+
+class BrotliCompressor : public Compressor {
+ public:
+ explicit BrotliCompressor(int compression_level)
+ : compression_level_(compression_level) {}
+
+ ~BrotliCompressor() override {
+ if (state_ != nullptr) {
+ BrotliEncoderDestroyInstance(state_);
+ }
+ }
+
+ Status Init() {
+ state_ = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+ if (state_ == nullptr) {
+ return BrotliError("Brotli init failed");
+ }
+ if (!BrotliEncoderSetParameter(state_, BROTLI_PARAM_QUALITY, compression_level_)) {
+ return BrotliError("Brotli set compression level failed");
+ }
+ return Status::OK();
+ }
+
+ Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ auto avail_in = static_cast<size_t>(input_len);
+ auto avail_out = static_cast<size_t>(output_len);
+ BROTLI_BOOL ret;
+
+ ret = BrotliEncoderCompressStream(state_, BROTLI_OPERATION_PROCESS, &avail_in, &input,
+ &avail_out, &output, nullptr /* total_out */);
+ if (!ret) {
+ return BrotliError("Brotli compress failed");
+ }
+ return CompressResult{static_cast<int64_t>(input_len - avail_in),
+ static_cast<int64_t>(output_len - avail_out)};
+ }
+
+ Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
+ size_t avail_in = 0;
+ const uint8_t* next_in = nullptr;
+ auto avail_out = static_cast<size_t>(output_len);
+ BROTLI_BOOL ret;
+
+ ret = BrotliEncoderCompressStream(state_, BROTLI_OPERATION_FLUSH, &avail_in, &next_in,
+ &avail_out, &output, nullptr /* total_out */);
+ if (!ret) {
+ return BrotliError("Brotli flush failed");
+ }
+ return FlushResult{static_cast<int64_t>(output_len - avail_out),
+ !!BrotliEncoderHasMoreOutput(state_)};
+ }
+
+ Result<EndResult> End(int64_t output_len, uint8_t* output) override {
+ size_t avail_in = 0;
+ const uint8_t* next_in = nullptr;
+ auto avail_out = static_cast<size_t>(output_len);
+ BROTLI_BOOL ret;
+
+ ret =
+ BrotliEncoderCompressStream(state_, BROTLI_OPERATION_FINISH, &avail_in, &next_in,
+ &avail_out, &output, nullptr /* total_out */);
+ if (!ret) {
+ return BrotliError("Brotli end failed");
+ }
+ bool should_retry = !!BrotliEncoderHasMoreOutput(state_);
+ DCHECK_EQ(should_retry, !BrotliEncoderIsFinished(state_));
+ return EndResult{static_cast<int64_t>(output_len - avail_out), should_retry};
+ }
+
+ protected:
+ Status BrotliError(const char* msg) { return Status::IOError(msg); }
+
+ BrotliEncoderState* state_ = nullptr;
+
+ private:
+ const int compression_level_;
+};
+
+// ----------------------------------------------------------------------
+// Brotli codec implementation
+
+class BrotliCodec : public Codec {
+ public:
+ explicit BrotliCodec(int compression_level)
+ : compression_level_(compression_level == kUseDefaultCompressionLevel
+ ? kBrotliDefaultCompressionLevel
+ : compression_level) {}
+
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ DCHECK_GE(input_len, 0);
+ DCHECK_GE(output_buffer_len, 0);
+ std::size_t output_size = static_cast<size_t>(output_buffer_len);
+ if (BrotliDecoderDecompress(static_cast<size_t>(input_len), input, &output_size,
+ output_buffer) != BROTLI_DECODER_RESULT_SUCCESS) {
+ return Status::IOError("Corrupt brotli compressed data.");
+ }
+ return output_size;
+ }
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ DCHECK_GE(input_len, 0);
+ return BrotliEncoderMaxCompressedSize(static_cast<size_t>(input_len));
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ DCHECK_GE(input_len, 0);
+ DCHECK_GE(output_buffer_len, 0);
+ std::size_t output_size = static_cast<size_t>(output_buffer_len);
+ if (BrotliEncoderCompress(compression_level_, BROTLI_DEFAULT_WINDOW,
+ BROTLI_DEFAULT_MODE, static_cast<size_t>(input_len), input,
+ &output_size, output_buffer) == BROTLI_FALSE) {
+ return Status::IOError("Brotli compression failure.");
+ }
+ return output_size;
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ auto ptr = std::make_shared<BrotliCompressor>(compression_level_);
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ auto ptr = std::make_shared<BrotliDecompressor>();
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Compression::type compression_type() const override { return Compression::BROTLI; }
+
+ int compression_level() const override { return compression_level_; }
int minimum_compression_level() const override { return BROTLI_MIN_QUALITY; }
int maximum_compression_level() const override { return BROTLI_MAX_QUALITY; }
int default_compression_level() const override {
return kBrotliDefaultCompressionLevel;
}
-
- private:
- const int compression_level_;
-};
-
-} // namespace
-
-std::unique_ptr<Codec> MakeBrotliCodec(int compression_level) {
- return std::unique_ptr<Codec>(new BrotliCodec(compression_level));
-}
-
-} // namespace internal
-} // namespace util
-} // namespace arrow
+
+ private:
+ const int compression_level_;
+};
+
+} // namespace
+
+std::unique_ptr<Codec> MakeBrotliCodec(int compression_level) {
+ return std::unique_ptr<Codec>(new BrotliCodec(compression_level));
+}
+
+} // namespace internal
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_internal.h
index 268672e14e2..3c93d4cd746 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_internal.h
@@ -1,80 +1,80 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/util/compression.h" // IWYU pragma: export
-
-namespace arrow {
-namespace util {
-
-// ----------------------------------------------------------------------
-// Internal Codec factories
-
-namespace internal {
-
-// Brotli compression quality is max (11) by default, which is slow.
-// We use 8 as a default as it is the best trade-off for Parquet workload.
-constexpr int kBrotliDefaultCompressionLevel = 8;
-
-// Brotli codec.
-std::unique_ptr<Codec> MakeBrotliCodec(
- int compression_level = kBrotliDefaultCompressionLevel);
-
-// BZ2 codec.
-constexpr int kBZ2DefaultCompressionLevel = 9;
-std::unique_ptr<Codec> MakeBZ2Codec(int compression_level = kBZ2DefaultCompressionLevel);
-
-// GZip
-constexpr int kGZipDefaultCompressionLevel = 9;
-
-struct GZipFormat {
- enum type {
- ZLIB,
- DEFLATE,
- GZIP,
- };
-};
-
-std::unique_ptr<Codec> MakeGZipCodec(int compression_level = kGZipDefaultCompressionLevel,
- GZipFormat::type format = GZipFormat::GZIP);
-
-// Snappy
-std::unique_ptr<Codec> MakeSnappyCodec();
-
-// Lz4 "raw" format codec.
-std::unique_ptr<Codec> MakeLz4RawCodec();
-
-// Lz4 "Hadoop" format codec (== Lz4 raw codec prefixed with lengths header)
-std::unique_ptr<Codec> MakeLz4HadoopRawCodec();
-
-// Lz4 frame format codec.
-std::unique_ptr<Codec> MakeLz4FrameCodec();
-
-// ZSTD codec.
-
-// XXX level = 1 probably doesn't compress very much
-constexpr int kZSTDDefaultCompressionLevel = 1;
-
-std::unique_ptr<Codec> MakeZSTDCodec(
- int compression_level = kZSTDDefaultCompressionLevel);
-
-} // namespace internal
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/compression.h" // IWYU pragma: export
+
+namespace arrow {
+namespace util {
+
+// ----------------------------------------------------------------------
+// Internal Codec factories
+
+namespace internal {
+
+// Brotli compression quality is max (11) by default, which is slow.
+// We use 8 as a default as it is the best trade-off for Parquet workload.
+constexpr int kBrotliDefaultCompressionLevel = 8;
+
+// Brotli codec.
+std::unique_ptr<Codec> MakeBrotliCodec(
+ int compression_level = kBrotliDefaultCompressionLevel);
+
+// BZ2 codec.
+constexpr int kBZ2DefaultCompressionLevel = 9;
+std::unique_ptr<Codec> MakeBZ2Codec(int compression_level = kBZ2DefaultCompressionLevel);
+
+// GZip
+constexpr int kGZipDefaultCompressionLevel = 9;
+
+struct GZipFormat {
+ enum type {
+ ZLIB,
+ DEFLATE,
+ GZIP,
+ };
+};
+
+std::unique_ptr<Codec> MakeGZipCodec(int compression_level = kGZipDefaultCompressionLevel,
+ GZipFormat::type format = GZipFormat::GZIP);
+
+// Snappy
+std::unique_ptr<Codec> MakeSnappyCodec();
+
+// Lz4 "raw" format codec.
+std::unique_ptr<Codec> MakeLz4RawCodec();
+
+// Lz4 "Hadoop" format codec (== Lz4 raw codec prefixed with lengths header)
+std::unique_ptr<Codec> MakeLz4HadoopRawCodec();
+
+// Lz4 frame format codec.
+std::unique_ptr<Codec> MakeLz4FrameCodec();
+
+// ZSTD codec.
+
+// XXX level = 1 probably doesn't compress very much
+constexpr int kZSTDDefaultCompressionLevel = 1;
+
+std::unique_ptr<Codec> MakeZSTDCodec(
+ int compression_level = kZSTDDefaultCompressionLevel);
+
+} // namespace internal
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
index c783e405590..c1c68beb095 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
@@ -1,440 +1,440 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression.h"
-
-#include <cstdint>
-#include <cstring>
-#include <memory>
-
-#include <lz4.h>
-#include <lz4frame.h>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/bit_util.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+
+#include <lz4.h>
+#include <lz4frame.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/ubsan.h"
-
-#ifndef LZ4F_HEADER_SIZE_MAX
-#define LZ4F_HEADER_SIZE_MAX 19
-#endif
-
-namespace arrow {
-namespace util {
-
-namespace {
-
-static Status LZ4Error(LZ4F_errorCode_t ret, const char* prefix_msg) {
- return Status::IOError(prefix_msg, LZ4F_getErrorName(ret));
-}
-
-static LZ4F_preferences_t DefaultPreferences() {
- LZ4F_preferences_t prefs;
- memset(&prefs, 0, sizeof(prefs));
- return prefs;
-}
-
-// ----------------------------------------------------------------------
-// Lz4 frame decompressor implementation
-
-class LZ4Decompressor : public Decompressor {
- public:
- LZ4Decompressor() {}
-
- ~LZ4Decompressor() override {
- if (ctx_ != nullptr) {
- ARROW_UNUSED(LZ4F_freeDecompressionContext(ctx_));
- }
- }
-
- Status Init() {
- LZ4F_errorCode_t ret;
- finished_ = false;
-
- ret = LZ4F_createDecompressionContext(&ctx_, LZ4F_VERSION);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 init failed: ");
- } else {
- return Status::OK();
- }
- }
-
- Status Reset() override {
-#if defined(LZ4_VERSION_NUMBER) && LZ4_VERSION_NUMBER >= 10800
- // LZ4F_resetDecompressionContext appeared in 1.8.0
- DCHECK_NE(ctx_, nullptr);
- LZ4F_resetDecompressionContext(ctx_);
- finished_ = false;
- return Status::OK();
-#else
- if (ctx_ != nullptr) {
- ARROW_UNUSED(LZ4F_freeDecompressionContext(ctx_));
- }
- return Init();
-#endif
- }
-
- Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- auto src = input;
- auto dst = output;
- auto src_size = static_cast<size_t>(input_len);
- auto dst_capacity = static_cast<size_t>(output_len);
- size_t ret;
-
- ret =
- LZ4F_decompress(ctx_, dst, &dst_capacity, src, &src_size, nullptr /* options */);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 decompress failed: ");
- }
- finished_ = (ret == 0);
- return DecompressResult{static_cast<int64_t>(src_size),
- static_cast<int64_t>(dst_capacity),
- (src_size == 0 && dst_capacity == 0)};
- }
-
- bool IsFinished() override { return finished_; }
-
- protected:
- LZ4F_decompressionContext_t ctx_ = nullptr;
- bool finished_;
-};
-
-// ----------------------------------------------------------------------
-// Lz4 frame compressor implementation
-
-class LZ4Compressor : public Compressor {
- public:
- LZ4Compressor() {}
-
- ~LZ4Compressor() override {
- if (ctx_ != nullptr) {
- ARROW_UNUSED(LZ4F_freeCompressionContext(ctx_));
- }
- }
-
- Status Init() {
- LZ4F_errorCode_t ret;
- prefs_ = DefaultPreferences();
- first_time_ = true;
-
- ret = LZ4F_createCompressionContext(&ctx_, LZ4F_VERSION);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 init failed: ");
- } else {
- return Status::OK();
- }
- }
-
-#define BEGIN_COMPRESS(dst, dst_capacity, output_too_small) \
- if (first_time_) { \
- if (dst_capacity < LZ4F_HEADER_SIZE_MAX) { \
- /* Output too small to write LZ4F header */ \
- return (output_too_small); \
- } \
- ret = LZ4F_compressBegin(ctx_, dst, dst_capacity, &prefs_); \
- if (LZ4F_isError(ret)) { \
- return LZ4Error(ret, "LZ4 compress begin failed: "); \
- } \
- first_time_ = false; \
- dst += ret; \
- dst_capacity -= ret; \
- bytes_written += static_cast<int64_t>(ret); \
- }
-
- Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- auto src = input;
- auto dst = output;
- auto src_size = static_cast<size_t>(input_len);
- auto dst_capacity = static_cast<size_t>(output_len);
- size_t ret;
- int64_t bytes_written = 0;
-
- BEGIN_COMPRESS(dst, dst_capacity, (CompressResult{0, 0}));
-
- if (dst_capacity < LZ4F_compressBound(src_size, &prefs_)) {
- // Output too small to compress into
- return CompressResult{0, bytes_written};
- }
- ret = LZ4F_compressUpdate(ctx_, dst, dst_capacity, src, src_size,
- nullptr /* options */);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 compress update failed: ");
- }
- bytes_written += static_cast<int64_t>(ret);
- DCHECK_LE(bytes_written, output_len);
- return CompressResult{input_len, bytes_written};
- }
-
- Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
- auto dst = output;
- auto dst_capacity = static_cast<size_t>(output_len);
- size_t ret;
- int64_t bytes_written = 0;
-
- BEGIN_COMPRESS(dst, dst_capacity, (FlushResult{0, true}));
-
- if (dst_capacity < LZ4F_compressBound(0, &prefs_)) {
- // Output too small to flush into
- return FlushResult{bytes_written, true};
- }
-
- ret = LZ4F_flush(ctx_, dst, dst_capacity, nullptr /* options */);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 flush failed: ");
- }
- bytes_written += static_cast<int64_t>(ret);
- DCHECK_LE(bytes_written, output_len);
- return FlushResult{bytes_written, false};
- }
-
- Result<EndResult> End(int64_t output_len, uint8_t* output) override {
- auto dst = output;
- auto dst_capacity = static_cast<size_t>(output_len);
- size_t ret;
- int64_t bytes_written = 0;
-
- BEGIN_COMPRESS(dst, dst_capacity, (EndResult{0, true}));
-
- if (dst_capacity < LZ4F_compressBound(0, &prefs_)) {
- // Output too small to end frame into
- return EndResult{bytes_written, true};
- }
-
- ret = LZ4F_compressEnd(ctx_, dst, dst_capacity, nullptr /* options */);
- if (LZ4F_isError(ret)) {
- return LZ4Error(ret, "LZ4 end failed: ");
- }
- bytes_written += static_cast<int64_t>(ret);
- DCHECK_LE(bytes_written, output_len);
- return EndResult{bytes_written, false};
- }
-
-#undef BEGIN_COMPRESS
-
- protected:
- LZ4F_compressionContext_t ctx_ = nullptr;
- LZ4F_preferences_t prefs_;
- bool first_time_;
-};
-
-// ----------------------------------------------------------------------
-// Lz4 frame codec implementation
-
-class Lz4FrameCodec : public Codec {
- public:
- Lz4FrameCodec() : prefs_(DefaultPreferences()) {}
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- return static_cast<int64_t>(
- LZ4F_compressFrameBound(static_cast<size_t>(input_len), &prefs_));
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- auto output_len =
- LZ4F_compressFrame(output_buffer, static_cast<size_t>(output_buffer_len), input,
- static_cast<size_t>(input_len), &prefs_);
- if (LZ4F_isError(output_len)) {
- return LZ4Error(output_len, "Lz4 compression failure: ");
- }
- return static_cast<int64_t>(output_len);
- }
-
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- ARROW_ASSIGN_OR_RAISE(auto decomp, MakeDecompressor());
-
- int64_t total_bytes_written = 0;
- while (!decomp->IsFinished() && input_len != 0) {
- ARROW_ASSIGN_OR_RAISE(
- auto res,
- decomp->Decompress(input_len, input, output_buffer_len, output_buffer));
- input += res.bytes_read;
- input_len -= res.bytes_read;
- output_buffer += res.bytes_written;
- output_buffer_len -= res.bytes_written;
- total_bytes_written += res.bytes_written;
- if (res.need_more_output) {
- return Status::IOError("Lz4 decompression buffer too small");
- }
- }
- if (!decomp->IsFinished()) {
- return Status::IOError("Lz4 compressed input contains less than one frame");
- }
- if (input_len != 0) {
- return Status::IOError("Lz4 compressed input contains more than one frame");
- }
- return total_bytes_written;
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- auto ptr = std::make_shared<LZ4Compressor>();
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- auto ptr = std::make_shared<LZ4Decompressor>();
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Compression::type compression_type() const override { return Compression::LZ4_FRAME; }
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+
+#ifndef LZ4F_HEADER_SIZE_MAX
+#define LZ4F_HEADER_SIZE_MAX 19
+#endif
+
+namespace arrow {
+namespace util {
+
+namespace {
+
+static Status LZ4Error(LZ4F_errorCode_t ret, const char* prefix_msg) {
+ return Status::IOError(prefix_msg, LZ4F_getErrorName(ret));
+}
+
+static LZ4F_preferences_t DefaultPreferences() {
+ LZ4F_preferences_t prefs;
+ memset(&prefs, 0, sizeof(prefs));
+ return prefs;
+}
+
+// ----------------------------------------------------------------------
+// Lz4 frame decompressor implementation
+
+class LZ4Decompressor : public Decompressor {
+ public:
+ LZ4Decompressor() {}
+
+ ~LZ4Decompressor() override {
+ if (ctx_ != nullptr) {
+ ARROW_UNUSED(LZ4F_freeDecompressionContext(ctx_));
+ }
+ }
+
+ Status Init() {
+ LZ4F_errorCode_t ret;
+ finished_ = false;
+
+ ret = LZ4F_createDecompressionContext(&ctx_, LZ4F_VERSION);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 init failed: ");
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Status Reset() override {
+#if defined(LZ4_VERSION_NUMBER) && LZ4_VERSION_NUMBER >= 10800
+ // LZ4F_resetDecompressionContext appeared in 1.8.0
+ DCHECK_NE(ctx_, nullptr);
+ LZ4F_resetDecompressionContext(ctx_);
+ finished_ = false;
+ return Status::OK();
+#else
+ if (ctx_ != nullptr) {
+ ARROW_UNUSED(LZ4F_freeDecompressionContext(ctx_));
+ }
+ return Init();
+#endif
+ }
+
+ Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ auto src = input;
+ auto dst = output;
+ auto src_size = static_cast<size_t>(input_len);
+ auto dst_capacity = static_cast<size_t>(output_len);
+ size_t ret;
+
+ ret =
+ LZ4F_decompress(ctx_, dst, &dst_capacity, src, &src_size, nullptr /* options */);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 decompress failed: ");
+ }
+ finished_ = (ret == 0);
+ return DecompressResult{static_cast<int64_t>(src_size),
+ static_cast<int64_t>(dst_capacity),
+ (src_size == 0 && dst_capacity == 0)};
+ }
+
+ bool IsFinished() override { return finished_; }
+
+ protected:
+ LZ4F_decompressionContext_t ctx_ = nullptr;
+ bool finished_;
+};
+
+// ----------------------------------------------------------------------
+// Lz4 frame compressor implementation
+
+class LZ4Compressor : public Compressor {
+ public:
+ LZ4Compressor() {}
+
+ ~LZ4Compressor() override {
+ if (ctx_ != nullptr) {
+ ARROW_UNUSED(LZ4F_freeCompressionContext(ctx_));
+ }
+ }
+
+ Status Init() {
+ LZ4F_errorCode_t ret;
+ prefs_ = DefaultPreferences();
+ first_time_ = true;
+
+ ret = LZ4F_createCompressionContext(&ctx_, LZ4F_VERSION);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 init failed: ");
+ } else {
+ return Status::OK();
+ }
+ }
+
+#define BEGIN_COMPRESS(dst, dst_capacity, output_too_small) \
+ if (first_time_) { \
+ if (dst_capacity < LZ4F_HEADER_SIZE_MAX) { \
+ /* Output too small to write LZ4F header */ \
+ return (output_too_small); \
+ } \
+ ret = LZ4F_compressBegin(ctx_, dst, dst_capacity, &prefs_); \
+ if (LZ4F_isError(ret)) { \
+ return LZ4Error(ret, "LZ4 compress begin failed: "); \
+ } \
+ first_time_ = false; \
+ dst += ret; \
+ dst_capacity -= ret; \
+ bytes_written += static_cast<int64_t>(ret); \
+ }
+
+ Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ auto src = input;
+ auto dst = output;
+ auto src_size = static_cast<size_t>(input_len);
+ auto dst_capacity = static_cast<size_t>(output_len);
+ size_t ret;
+ int64_t bytes_written = 0;
+
+ BEGIN_COMPRESS(dst, dst_capacity, (CompressResult{0, 0}));
+
+ if (dst_capacity < LZ4F_compressBound(src_size, &prefs_)) {
+ // Output too small to compress into
+ return CompressResult{0, bytes_written};
+ }
+ ret = LZ4F_compressUpdate(ctx_, dst, dst_capacity, src, src_size,
+ nullptr /* options */);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 compress update failed: ");
+ }
+ bytes_written += static_cast<int64_t>(ret);
+ DCHECK_LE(bytes_written, output_len);
+ return CompressResult{input_len, bytes_written};
+ }
+
+ Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
+ auto dst = output;
+ auto dst_capacity = static_cast<size_t>(output_len);
+ size_t ret;
+ int64_t bytes_written = 0;
+
+ BEGIN_COMPRESS(dst, dst_capacity, (FlushResult{0, true}));
+
+ if (dst_capacity < LZ4F_compressBound(0, &prefs_)) {
+ // Output too small to flush into
+ return FlushResult{bytes_written, true};
+ }
+
+ ret = LZ4F_flush(ctx_, dst, dst_capacity, nullptr /* options */);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 flush failed: ");
+ }
+ bytes_written += static_cast<int64_t>(ret);
+ DCHECK_LE(bytes_written, output_len);
+ return FlushResult{bytes_written, false};
+ }
+
+ Result<EndResult> End(int64_t output_len, uint8_t* output) override {
+ auto dst = output;
+ auto dst_capacity = static_cast<size_t>(output_len);
+ size_t ret;
+ int64_t bytes_written = 0;
+
+ BEGIN_COMPRESS(dst, dst_capacity, (EndResult{0, true}));
+
+ if (dst_capacity < LZ4F_compressBound(0, &prefs_)) {
+ // Output too small to end frame into
+ return EndResult{bytes_written, true};
+ }
+
+ ret = LZ4F_compressEnd(ctx_, dst, dst_capacity, nullptr /* options */);
+ if (LZ4F_isError(ret)) {
+ return LZ4Error(ret, "LZ4 end failed: ");
+ }
+ bytes_written += static_cast<int64_t>(ret);
+ DCHECK_LE(bytes_written, output_len);
+ return EndResult{bytes_written, false};
+ }
+
+#undef BEGIN_COMPRESS
+
+ protected:
+ LZ4F_compressionContext_t ctx_ = nullptr;
+ LZ4F_preferences_t prefs_;
+ bool first_time_;
+};
+
+// ----------------------------------------------------------------------
+// Lz4 frame codec implementation
+
+class Lz4FrameCodec : public Codec {
+ public:
+ Lz4FrameCodec() : prefs_(DefaultPreferences()) {}
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ return static_cast<int64_t>(
+ LZ4F_compressFrameBound(static_cast<size_t>(input_len), &prefs_));
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ auto output_len =
+ LZ4F_compressFrame(output_buffer, static_cast<size_t>(output_buffer_len), input,
+ static_cast<size_t>(input_len), &prefs_);
+ if (LZ4F_isError(output_len)) {
+ return LZ4Error(output_len, "Lz4 compression failure: ");
+ }
+ return static_cast<int64_t>(output_len);
+ }
+
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ ARROW_ASSIGN_OR_RAISE(auto decomp, MakeDecompressor());
+
+ int64_t total_bytes_written = 0;
+ while (!decomp->IsFinished() && input_len != 0) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto res,
+ decomp->Decompress(input_len, input, output_buffer_len, output_buffer));
+ input += res.bytes_read;
+ input_len -= res.bytes_read;
+ output_buffer += res.bytes_written;
+ output_buffer_len -= res.bytes_written;
+ total_bytes_written += res.bytes_written;
+ if (res.need_more_output) {
+ return Status::IOError("Lz4 decompression buffer too small");
+ }
+ }
+ if (!decomp->IsFinished()) {
+ return Status::IOError("Lz4 compressed input contains less than one frame");
+ }
+ if (input_len != 0) {
+ return Status::IOError("Lz4 compressed input contains more than one frame");
+ }
+ return total_bytes_written;
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ auto ptr = std::make_shared<LZ4Compressor>();
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ auto ptr = std::make_shared<LZ4Decompressor>();
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Compression::type compression_type() const override { return Compression::LZ4_FRAME; }
int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
int default_compression_level() const override { return kUseDefaultCompressionLevel; }
-
- protected:
- const LZ4F_preferences_t prefs_;
-};
-
-// ----------------------------------------------------------------------
-// Lz4 "raw" codec implementation
-
-class Lz4Codec : public Codec {
- public:
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- int64_t decompressed_size = LZ4_decompress_safe(
- reinterpret_cast<const char*>(input), reinterpret_cast<char*>(output_buffer),
- static_cast<int>(input_len), static_cast<int>(output_buffer_len));
- if (decompressed_size < 0) {
- return Status::IOError("Corrupt Lz4 compressed data.");
- }
- return decompressed_size;
- }
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- return LZ4_compressBound(static_cast<int>(input_len));
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- int64_t output_len = LZ4_compress_default(
- reinterpret_cast<const char*>(input), reinterpret_cast<char*>(output_buffer),
- static_cast<int>(input_len), static_cast<int>(output_buffer_len));
- if (output_len == 0) {
- return Status::IOError("Lz4 compression failure.");
- }
- return output_len;
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- return Status::NotImplemented(
- "Streaming compression unsupported with LZ4 raw format. "
- "Try using LZ4 frame format instead.");
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- return Status::NotImplemented(
- "Streaming decompression unsupported with LZ4 raw format. "
- "Try using LZ4 frame format instead.");
- }
-
- Compression::type compression_type() const override { return Compression::LZ4; }
+
+ protected:
+ const LZ4F_preferences_t prefs_;
+};
+
+// ----------------------------------------------------------------------
+// Lz4 "raw" codec implementation
+
+class Lz4Codec : public Codec {
+ public:
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ int64_t decompressed_size = LZ4_decompress_safe(
+ reinterpret_cast<const char*>(input), reinterpret_cast<char*>(output_buffer),
+ static_cast<int>(input_len), static_cast<int>(output_buffer_len));
+ if (decompressed_size < 0) {
+ return Status::IOError("Corrupt Lz4 compressed data.");
+ }
+ return decompressed_size;
+ }
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ return LZ4_compressBound(static_cast<int>(input_len));
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ int64_t output_len = LZ4_compress_default(
+ reinterpret_cast<const char*>(input), reinterpret_cast<char*>(output_buffer),
+ static_cast<int>(input_len), static_cast<int>(output_buffer_len));
+ if (output_len == 0) {
+ return Status::IOError("Lz4 compression failure.");
+ }
+ return output_len;
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ return Status::NotImplemented(
+ "Streaming compression unsupported with LZ4 raw format. "
+ "Try using LZ4 frame format instead.");
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ return Status::NotImplemented(
+ "Streaming decompression unsupported with LZ4 raw format. "
+ "Try using LZ4 frame format instead.");
+ }
+
+ Compression::type compression_type() const override { return Compression::LZ4; }
int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
int default_compression_level() const override { return kUseDefaultCompressionLevel; }
-};
-
-// ----------------------------------------------------------------------
-// Lz4 Hadoop "raw" codec implementation
-
-class Lz4HadoopCodec : public Lz4Codec {
- public:
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- const int64_t decompressed_size =
- TryDecompressHadoop(input_len, input, output_buffer_len, output_buffer);
- if (decompressed_size != kNotHadoop) {
- return decompressed_size;
- }
- // Fall back on raw LZ4 codec (for files produces by earlier versions of Parquet C++)
- return Lz4Codec::Decompress(input_len, input, output_buffer_len, output_buffer);
- }
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- return kPrefixLength + Lz4Codec::MaxCompressedLen(input_len, nullptr);
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- if (output_buffer_len < kPrefixLength) {
- return Status::Invalid("Output buffer too small for Lz4HadoopCodec compression");
- }
-
- ARROW_ASSIGN_OR_RAISE(
- int64_t output_len,
- Lz4Codec::Compress(input_len, input, output_buffer_len - kPrefixLength,
- output_buffer + kPrefixLength));
-
- // Prepend decompressed size in bytes and compressed size in bytes
- // to be compatible with Hadoop Lz4Codec
- const uint32_t decompressed_size =
- BitUtil::ToBigEndian(static_cast<uint32_t>(input_len));
- const uint32_t compressed_size =
- BitUtil::ToBigEndian(static_cast<uint32_t>(output_len));
- SafeStore(output_buffer, decompressed_size);
- SafeStore(output_buffer + sizeof(uint32_t), compressed_size);
-
- return kPrefixLength + output_len;
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- return Status::NotImplemented(
- "Streaming compression unsupported with LZ4 Hadoop raw format. "
- "Try using LZ4 frame format instead.");
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- return Status::NotImplemented(
- "Streaming decompression unsupported with LZ4 Hadoop raw format. "
- "Try using LZ4 frame format instead.");
- }
-
- Compression::type compression_type() const override { return Compression::LZ4_HADOOP; }
-
- protected:
- // Offset starting at which page data can be read/written
- static const int64_t kPrefixLength = sizeof(uint32_t) * 2;
-
- static const int64_t kNotHadoop = -1;
-
- int64_t TryDecompressHadoop(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) {
+};
+
+// ----------------------------------------------------------------------
+// Lz4 Hadoop "raw" codec implementation
+
+class Lz4HadoopCodec : public Lz4Codec {
+ public:
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ const int64_t decompressed_size =
+ TryDecompressHadoop(input_len, input, output_buffer_len, output_buffer);
+ if (decompressed_size != kNotHadoop) {
+ return decompressed_size;
+ }
+ // Fall back on raw LZ4 codec (for files produces by earlier versions of Parquet C++)
+ return Lz4Codec::Decompress(input_len, input, output_buffer_len, output_buffer);
+ }
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ return kPrefixLength + Lz4Codec::MaxCompressedLen(input_len, nullptr);
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ if (output_buffer_len < kPrefixLength) {
+ return Status::Invalid("Output buffer too small for Lz4HadoopCodec compression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(
+ int64_t output_len,
+ Lz4Codec::Compress(input_len, input, output_buffer_len - kPrefixLength,
+ output_buffer + kPrefixLength));
+
+ // Prepend decompressed size in bytes and compressed size in bytes
+ // to be compatible with Hadoop Lz4Codec
+ const uint32_t decompressed_size =
+ BitUtil::ToBigEndian(static_cast<uint32_t>(input_len));
+ const uint32_t compressed_size =
+ BitUtil::ToBigEndian(static_cast<uint32_t>(output_len));
+ SafeStore(output_buffer, decompressed_size);
+ SafeStore(output_buffer + sizeof(uint32_t), compressed_size);
+
+ return kPrefixLength + output_len;
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ return Status::NotImplemented(
+ "Streaming compression unsupported with LZ4 Hadoop raw format. "
+ "Try using LZ4 frame format instead.");
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ return Status::NotImplemented(
+ "Streaming decompression unsupported with LZ4 Hadoop raw format. "
+ "Try using LZ4 frame format instead.");
+ }
+
+ Compression::type compression_type() const override { return Compression::LZ4_HADOOP; }
+
+ protected:
+ // Offset starting at which page data can be read/written
+ static const int64_t kPrefixLength = sizeof(uint32_t) * 2;
+
+ static const int64_t kNotHadoop = -1;
+
+ int64_t TryDecompressHadoop(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) {
// Parquet files written with the Hadoop Lz4Codec use their own framing.
// The input buffer can contain an arbitrary number of "frames", each
// with the following structure:
// - bytes 0..3: big-endian uint32_t representing the frame decompressed size
// - bytes 4..7: big-endian uint32_t representing the frame compressed size
// - bytes 8...: frame compressed data
- //
- // The Hadoop Lz4Codec source code can be found here:
- // https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc
+ //
+ // The Hadoop Lz4Codec source code can be found here:
+ // https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc
int64_t total_decompressed_size = 0;
-
+
while (input_len >= kPrefixLength) {
const uint32_t expected_decompressed_size =
BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input));
@@ -442,11 +442,11 @@ class Lz4HadoopCodec : public Lz4Codec {
BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input + sizeof(uint32_t)));
input += kPrefixLength;
input_len -= kPrefixLength;
-
+
if (input_len < expected_compressed_size) {
// Not enough bytes for Hadoop "frame"
return kNotHadoop;
- }
+ }
if (output_buffer_len < expected_decompressed_size) {
// Not enough bytes to hold advertised output => probably not Hadoop
return kNotHadoop;
@@ -463,33 +463,33 @@ class Lz4HadoopCodec : public Lz4Codec {
output_buffer += expected_decompressed_size;
output_buffer_len -= expected_decompressed_size;
total_decompressed_size += expected_decompressed_size;
- }
-
+ }
+
if (input_len == 0) {
return total_decompressed_size;
} else {
return kNotHadoop;
}
- }
-};
-
-} // namespace
-
-namespace internal {
-
-std::unique_ptr<Codec> MakeLz4FrameCodec() {
- return std::unique_ptr<Codec>(new Lz4FrameCodec());
-}
-
-std::unique_ptr<Codec> MakeLz4HadoopRawCodec() {
- return std::unique_ptr<Codec>(new Lz4HadoopCodec());
-}
-
-std::unique_ptr<Codec> MakeLz4RawCodec() {
- return std::unique_ptr<Codec>(new Lz4Codec());
-}
-
-} // namespace internal
-
-} // namespace util
-} // namespace arrow
+ }
+};
+
+} // namespace
+
+namespace internal {
+
+std::unique_ptr<Codec> MakeLz4FrameCodec() {
+ return std::unique_ptr<Codec>(new Lz4FrameCodec());
+}
+
+std::unique_ptr<Codec> MakeLz4HadoopRawCodec() {
+ return std::unique_ptr<Codec>(new Lz4HadoopCodec());
+}
+
+std::unique_ptr<Codec> MakeLz4RawCodec() {
+ return std::unique_ptr<Codec>(new Lz4Codec());
+}
+
+} // namespace internal
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
index 3756f957d04..276df7a0433 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
@@ -1,102 +1,102 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression_internal.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-#include <snappy.h>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-using std::size_t;
-
-namespace arrow {
-namespace util {
-namespace internal {
-
-namespace {
-
-// ----------------------------------------------------------------------
-// Snappy implementation
-
-class SnappyCodec : public Codec {
- public:
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- size_t decompressed_size;
- if (!snappy::GetUncompressedLength(reinterpret_cast<const char*>(input),
- static_cast<size_t>(input_len),
- &decompressed_size)) {
- return Status::IOError("Corrupt snappy compressed data.");
- }
- if (output_buffer_len < static_cast<int64_t>(decompressed_size)) {
- return Status::Invalid("Output buffer size (", output_buffer_len, ") must be ",
- decompressed_size, " or larger.");
- }
- if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
- static_cast<size_t>(input_len),
- reinterpret_cast<char*>(output_buffer))) {
- return Status::IOError("Corrupt snappy compressed data.");
- }
- return static_cast<int64_t>(decompressed_size);
- }
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- DCHECK_GE(input_len, 0);
- return snappy::MaxCompressedLength(static_cast<size_t>(input_len));
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t ARROW_ARG_UNUSED(output_buffer_len),
- uint8_t* output_buffer) override {
- size_t output_size;
- snappy::RawCompress(reinterpret_cast<const char*>(input),
- static_cast<size_t>(input_len),
- reinterpret_cast<char*>(output_buffer), &output_size);
- return static_cast<int64_t>(output_size);
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- return Status::NotImplemented("Streaming compression unsupported with Snappy");
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- return Status::NotImplemented("Streaming decompression unsupported with Snappy");
- }
-
- Compression::type compression_type() const override { return Compression::SNAPPY; }
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression_internal.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+#include <snappy.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+using std::size_t;
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+namespace {
+
+// ----------------------------------------------------------------------
+// Snappy implementation
+
+class SnappyCodec : public Codec {
+ public:
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ size_t decompressed_size;
+ if (!snappy::GetUncompressedLength(reinterpret_cast<const char*>(input),
+ static_cast<size_t>(input_len),
+ &decompressed_size)) {
+ return Status::IOError("Corrupt snappy compressed data.");
+ }
+ if (output_buffer_len < static_cast<int64_t>(decompressed_size)) {
+ return Status::Invalid("Output buffer size (", output_buffer_len, ") must be ",
+ decompressed_size, " or larger.");
+ }
+ if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
+ static_cast<size_t>(input_len),
+ reinterpret_cast<char*>(output_buffer))) {
+ return Status::IOError("Corrupt snappy compressed data.");
+ }
+ return static_cast<int64_t>(decompressed_size);
+ }
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ DCHECK_GE(input_len, 0);
+ return snappy::MaxCompressedLength(static_cast<size_t>(input_len));
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t ARROW_ARG_UNUSED(output_buffer_len),
+ uint8_t* output_buffer) override {
+ size_t output_size;
+ snappy::RawCompress(reinterpret_cast<const char*>(input),
+ static_cast<size_t>(input_len),
+ reinterpret_cast<char*>(output_buffer), &output_size);
+ return static_cast<int64_t>(output_size);
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ return Status::NotImplemented("Streaming compression unsupported with Snappy");
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ return Status::NotImplemented("Streaming decompression unsupported with Snappy");
+ }
+
+ Compression::type compression_type() const override { return Compression::SNAPPY; }
int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
int default_compression_level() const override { return kUseDefaultCompressionLevel; }
-};
-
-} // namespace
-
-std::unique_ptr<Codec> MakeSnappyCodec() {
- return std::unique_ptr<Codec>(new SnappyCodec());
-}
-
-} // namespace internal
-} // namespace util
-} // namespace arrow
+};
+
+} // namespace
+
+std::unique_ptr<Codec> MakeSnappyCodec() {
+ return std::unique_ptr<Codec>(new SnappyCodec());
+}
+
+} // namespace internal
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
index e9cb2470ee2..ed7f321d77c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
@@ -1,507 +1,507 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression_internal.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <memory>
-
-#include <zconf.h>
-#include <zlib.h>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-namespace internal {
-
-namespace {
-
-// ----------------------------------------------------------------------
-// gzip implementation
-
-// These are magic numbers from zlib.h. Not clear why they are not defined
-// there.
-
-// Maximum window size
-constexpr int WINDOW_BITS = 15;
-
-// Output Gzip.
-constexpr int GZIP_CODEC = 16;
-
-// Determine if this is libz or gzip from header.
-constexpr int DETECT_CODEC = 32;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression_internal.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+
+#include <zconf.h>
+#include <zlib.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+namespace {
+
+// ----------------------------------------------------------------------
+// gzip implementation
+
+// These are magic numbers from zlib.h. Not clear why they are not defined
+// there.
+
+// Maximum window size
+constexpr int WINDOW_BITS = 15;
+
+// Output Gzip.
+constexpr int GZIP_CODEC = 16;
+
+// Determine if this is libz or gzip from header.
+constexpr int DETECT_CODEC = 32;
+
constexpr int kGZipMinCompressionLevel = 1;
constexpr int kGZipMaxCompressionLevel = 9;
-int CompressionWindowBitsForFormat(GZipFormat::type format) {
- int window_bits = WINDOW_BITS;
- switch (format) {
- case GZipFormat::DEFLATE:
- window_bits = -window_bits;
- break;
- case GZipFormat::GZIP:
- window_bits += GZIP_CODEC;
- break;
- case GZipFormat::ZLIB:
- break;
- }
- return window_bits;
-}
-
-int DecompressionWindowBitsForFormat(GZipFormat::type format) {
- if (format == GZipFormat::DEFLATE) {
- return -WINDOW_BITS;
- } else {
- /* If not deflate, autodetect format from header */
- return WINDOW_BITS | DETECT_CODEC;
- }
-}
-
-Status ZlibErrorPrefix(const char* prefix_msg, const char* msg) {
- return Status::IOError(prefix_msg, (msg) ? msg : "(unknown error)");
-}
-
-// ----------------------------------------------------------------------
-// gzip decompressor implementation
-
-class GZipDecompressor : public Decompressor {
- public:
- explicit GZipDecompressor(GZipFormat::type format)
- : format_(format), initialized_(false), finished_(false) {}
-
- ~GZipDecompressor() override {
- if (initialized_) {
- inflateEnd(&stream_);
- }
- }
-
- Status Init() {
- DCHECK(!initialized_);
- memset(&stream_, 0, sizeof(stream_));
- finished_ = false;
-
- int ret;
- int window_bits = DecompressionWindowBitsForFormat(format_);
- if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
- return ZlibError("zlib inflateInit failed: ");
- } else {
- initialized_ = true;
- return Status::OK();
- }
- }
-
- Status Reset() override {
- DCHECK(initialized_);
- finished_ = false;
- int ret;
- if ((ret = inflateReset(&stream_)) != Z_OK) {
- return ZlibError("zlib inflateReset failed: ");
- } else {
- return Status::OK();
- }
- }
-
- Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- static constexpr auto input_limit =
- static_cast<int64_t>(std::numeric_limits<uInt>::max());
- stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = static_cast<uInt>(std::min(input_len, input_limit));
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
- int ret;
-
- ret = inflate(&stream_, Z_SYNC_FLUSH);
- if (ret == Z_DATA_ERROR || ret == Z_STREAM_ERROR || ret == Z_MEM_ERROR) {
- return ZlibError("zlib inflate failed: ");
- }
- if (ret == Z_NEED_DICT) {
- return ZlibError("zlib inflate failed (need preset dictionary): ");
- }
- finished_ = (ret == Z_STREAM_END);
- if (ret == Z_BUF_ERROR) {
- // No progress was possible
- return DecompressResult{0, 0, true};
- } else {
- ARROW_CHECK(ret == Z_OK || ret == Z_STREAM_END);
- // Some progress has been made
- return DecompressResult{input_len - stream_.avail_in,
- output_len - stream_.avail_out, false};
- }
- return Status::OK();
- }
-
- bool IsFinished() override { return finished_; }
-
- protected:
- Status ZlibError(const char* prefix_msg) {
- return ZlibErrorPrefix(prefix_msg, stream_.msg);
- }
-
- z_stream stream_;
- GZipFormat::type format_;
- bool initialized_;
- bool finished_;
-};
-
-// ----------------------------------------------------------------------
-// gzip compressor implementation
-
-class GZipCompressor : public Compressor {
- public:
- explicit GZipCompressor(int compression_level)
- : initialized_(false), compression_level_(compression_level) {}
-
- ~GZipCompressor() override {
- if (initialized_) {
- deflateEnd(&stream_);
- }
- }
-
- Status Init(GZipFormat::type format) {
- DCHECK(!initialized_);
- memset(&stream_, 0, sizeof(stream_));
-
- int ret;
- // Initialize to run specified format
- int window_bits = CompressionWindowBitsForFormat(format);
- if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
- compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
- return ZlibError("zlib deflateInit failed: ");
- } else {
- initialized_ = true;
- return Status::OK();
- }
- }
-
- Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- DCHECK(initialized_) << "Called on non-initialized stream";
-
- static constexpr auto input_limit =
- static_cast<int64_t>(std::numeric_limits<uInt>::max());
-
- stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = static_cast<uInt>(std::min(input_len, input_limit));
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
-
- int64_t ret = 0;
- ret = deflate(&stream_, Z_NO_FLUSH);
- if (ret == Z_STREAM_ERROR) {
- return ZlibError("zlib compress failed: ");
- }
- if (ret == Z_OK) {
- // Some progress has been made
- return CompressResult{input_len - stream_.avail_in, output_len - stream_.avail_out};
- } else {
- // No progress was possible
- ARROW_CHECK_EQ(ret, Z_BUF_ERROR);
- return CompressResult{0, 0};
- }
- }
-
- Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
- DCHECK(initialized_) << "Called on non-initialized stream";
-
- static constexpr auto input_limit =
- static_cast<int64_t>(std::numeric_limits<uInt>::max());
-
- stream_.avail_in = 0;
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
-
- int64_t ret = 0;
- ret = deflate(&stream_, Z_SYNC_FLUSH);
- if (ret == Z_STREAM_ERROR) {
- return ZlibError("zlib flush failed: ");
- }
- int64_t bytes_written;
- if (ret == Z_OK) {
- bytes_written = output_len - stream_.avail_out;
- } else {
- ARROW_CHECK_EQ(ret, Z_BUF_ERROR);
- bytes_written = 0;
- }
- // "If deflate returns with avail_out == 0, this function must be called
- // again with the same value of the flush parameter and more output space
- // (updated avail_out), until the flush is complete (deflate returns
- // with non-zero avail_out)."
+int CompressionWindowBitsForFormat(GZipFormat::type format) {
+ int window_bits = WINDOW_BITS;
+ switch (format) {
+ case GZipFormat::DEFLATE:
+ window_bits = -window_bits;
+ break;
+ case GZipFormat::GZIP:
+ window_bits += GZIP_CODEC;
+ break;
+ case GZipFormat::ZLIB:
+ break;
+ }
+ return window_bits;
+}
+
+int DecompressionWindowBitsForFormat(GZipFormat::type format) {
+ if (format == GZipFormat::DEFLATE) {
+ return -WINDOW_BITS;
+ } else {
+ /* If not deflate, autodetect format from header */
+ return WINDOW_BITS | DETECT_CODEC;
+ }
+}
+
+Status ZlibErrorPrefix(const char* prefix_msg, const char* msg) {
+ return Status::IOError(prefix_msg, (msg) ? msg : "(unknown error)");
+}
+
+// ----------------------------------------------------------------------
+// gzip decompressor implementation
+
+class GZipDecompressor : public Decompressor {
+ public:
+ explicit GZipDecompressor(GZipFormat::type format)
+ : format_(format), initialized_(false), finished_(false) {}
+
+ ~GZipDecompressor() override {
+ if (initialized_) {
+ inflateEnd(&stream_);
+ }
+ }
+
+ Status Init() {
+ DCHECK(!initialized_);
+ memset(&stream_, 0, sizeof(stream_));
+ finished_ = false;
+
+ int ret;
+ int window_bits = DecompressionWindowBitsForFormat(format_);
+ if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
+ return ZlibError("zlib inflateInit failed: ");
+ } else {
+ initialized_ = true;
+ return Status::OK();
+ }
+ }
+
+ Status Reset() override {
+ DCHECK(initialized_);
+ finished_ = false;
+ int ret;
+ if ((ret = inflateReset(&stream_)) != Z_OK) {
+ return ZlibError("zlib inflateReset failed: ");
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ static constexpr auto input_limit =
+ static_cast<int64_t>(std::numeric_limits<uInt>::max());
+ stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+ stream_.avail_in = static_cast<uInt>(std::min(input_len, input_limit));
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
+ int ret;
+
+ ret = inflate(&stream_, Z_SYNC_FLUSH);
+ if (ret == Z_DATA_ERROR || ret == Z_STREAM_ERROR || ret == Z_MEM_ERROR) {
+ return ZlibError("zlib inflate failed: ");
+ }
+ if (ret == Z_NEED_DICT) {
+ return ZlibError("zlib inflate failed (need preset dictionary): ");
+ }
+ finished_ = (ret == Z_STREAM_END);
+ if (ret == Z_BUF_ERROR) {
+ // No progress was possible
+ return DecompressResult{0, 0, true};
+ } else {
+ ARROW_CHECK(ret == Z_OK || ret == Z_STREAM_END);
+ // Some progress has been made
+ return DecompressResult{input_len - stream_.avail_in,
+ output_len - stream_.avail_out, false};
+ }
+ return Status::OK();
+ }
+
+ bool IsFinished() override { return finished_; }
+
+ protected:
+ Status ZlibError(const char* prefix_msg) {
+ return ZlibErrorPrefix(prefix_msg, stream_.msg);
+ }
+
+ z_stream stream_;
+ GZipFormat::type format_;
+ bool initialized_;
+ bool finished_;
+};
+
+// ----------------------------------------------------------------------
+// gzip compressor implementation
+
+class GZipCompressor : public Compressor {
+ public:
+ explicit GZipCompressor(int compression_level)
+ : initialized_(false), compression_level_(compression_level) {}
+
+ ~GZipCompressor() override {
+ if (initialized_) {
+ deflateEnd(&stream_);
+ }
+ }
+
+ Status Init(GZipFormat::type format) {
+ DCHECK(!initialized_);
+ memset(&stream_, 0, sizeof(stream_));
+
+ int ret;
+ // Initialize to run specified format
+ int window_bits = CompressionWindowBitsForFormat(format);
+ if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
+ compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
+ return ZlibError("zlib deflateInit failed: ");
+ } else {
+ initialized_ = true;
+ return Status::OK();
+ }
+ }
+
+ Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ DCHECK(initialized_) << "Called on non-initialized stream";
+
+ static constexpr auto input_limit =
+ static_cast<int64_t>(std::numeric_limits<uInt>::max());
+
+ stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+ stream_.avail_in = static_cast<uInt>(std::min(input_len, input_limit));
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
+
+ int64_t ret = 0;
+ ret = deflate(&stream_, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR) {
+ return ZlibError("zlib compress failed: ");
+ }
+ if (ret == Z_OK) {
+ // Some progress has been made
+ return CompressResult{input_len - stream_.avail_in, output_len - stream_.avail_out};
+ } else {
+ // No progress was possible
+ ARROW_CHECK_EQ(ret, Z_BUF_ERROR);
+ return CompressResult{0, 0};
+ }
+ }
+
+ Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
+ DCHECK(initialized_) << "Called on non-initialized stream";
+
+ static constexpr auto input_limit =
+ static_cast<int64_t>(std::numeric_limits<uInt>::max());
+
+ stream_.avail_in = 0;
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
+
+ int64_t ret = 0;
+ ret = deflate(&stream_, Z_SYNC_FLUSH);
+ if (ret == Z_STREAM_ERROR) {
+ return ZlibError("zlib flush failed: ");
+ }
+ int64_t bytes_written;
+ if (ret == Z_OK) {
+ bytes_written = output_len - stream_.avail_out;
+ } else {
+ ARROW_CHECK_EQ(ret, Z_BUF_ERROR);
+ bytes_written = 0;
+ }
+ // "If deflate returns with avail_out == 0, this function must be called
+ // again with the same value of the flush parameter and more output space
+ // (updated avail_out), until the flush is complete (deflate returns
+ // with non-zero avail_out)."
// "Note that Z_BUF_ERROR is not fatal, and deflate() can be called again
// with more input and more output space to continue compressing."
return FlushResult{bytes_written, stream_.avail_out == 0};
- }
-
- Result<EndResult> End(int64_t output_len, uint8_t* output) override {
- DCHECK(initialized_) << "Called on non-initialized stream";
-
- static constexpr auto input_limit =
- static_cast<int64_t>(std::numeric_limits<uInt>::max());
-
- stream_.avail_in = 0;
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
-
- int64_t ret = 0;
- ret = deflate(&stream_, Z_FINISH);
- if (ret == Z_STREAM_ERROR) {
- return ZlibError("zlib flush failed: ");
- }
- int64_t bytes_written = output_len - stream_.avail_out;
- if (ret == Z_STREAM_END) {
- // Flush complete, we can now end the stream
- initialized_ = false;
- ret = deflateEnd(&stream_);
- if (ret == Z_OK) {
- return EndResult{bytes_written, false};
- } else {
- return ZlibError("zlib end failed: ");
- }
- } else {
- // Not everything could be flushed,
- return EndResult{bytes_written, true};
- }
- }
-
- protected:
- Status ZlibError(const char* prefix_msg) {
- return ZlibErrorPrefix(prefix_msg, stream_.msg);
- }
-
- z_stream stream_;
- bool initialized_;
- int compression_level_;
-};
-
-// ----------------------------------------------------------------------
-// gzip codec implementation
-
-class GZipCodec : public Codec {
- public:
- explicit GZipCodec(int compression_level, GZipFormat::type format)
- : format_(format),
- compressor_initialized_(false),
- decompressor_initialized_(false) {
- compression_level_ = compression_level == kUseDefaultCompressionLevel
- ? kGZipDefaultCompressionLevel
- : compression_level;
- }
-
- ~GZipCodec() override {
- EndCompressor();
- EndDecompressor();
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- auto ptr = std::make_shared<GZipCompressor>(compression_level_);
- RETURN_NOT_OK(ptr->Init(format_));
- return ptr;
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- auto ptr = std::make_shared<GZipDecompressor>(format_);
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Status InitCompressor() {
- EndDecompressor();
- memset(&stream_, 0, sizeof(stream_));
-
- int ret;
- // Initialize to run specified format
- int window_bits = CompressionWindowBitsForFormat(format_);
- if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
- compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
- return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg);
- }
- compressor_initialized_ = true;
- return Status::OK();
- }
-
- void EndCompressor() {
- if (compressor_initialized_) {
- (void)deflateEnd(&stream_);
- }
- compressor_initialized_ = false;
- }
-
- Status InitDecompressor() {
- EndCompressor();
- memset(&stream_, 0, sizeof(stream_));
- int ret;
-
- // Initialize to run either deflate or zlib/gzip format
- int window_bits = DecompressionWindowBitsForFormat(format_);
- if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
- return ZlibErrorPrefix("zlib inflateInit failed: ", stream_.msg);
- }
- decompressor_initialized_ = true;
- return Status::OK();
- }
-
- void EndDecompressor() {
- if (decompressor_initialized_) {
- (void)inflateEnd(&stream_);
- }
- decompressor_initialized_ = false;
- }
-
- Result<int64_t> Decompress(int64_t input_length, const uint8_t* input,
- int64_t output_buffer_length, uint8_t* output) override {
- if (!decompressor_initialized_) {
- RETURN_NOT_OK(InitDecompressor());
- }
- if (output_buffer_length == 0) {
- // The zlib library does not allow *output to be NULL, even when
- // output_buffer_length is 0 (inflate() will return Z_STREAM_ERROR). We don't
- // consider this an error, so bail early if no output is expected. Note that we
- // don't signal an error if the input actually contains compressed data.
- return 0;
- }
-
- // Reset the stream for this block
- if (inflateReset(&stream_) != Z_OK) {
- return ZlibErrorPrefix("zlib inflateReset failed: ", stream_.msg);
- }
-
- int ret = 0;
- // gzip can run in streaming mode or non-streaming mode. We only
- // support the non-streaming use case where we present it the entire
- // compressed input and a buffer big enough to contain the entire
- // compressed output. In the case where we don't know the output,
- // we just make a bigger buffer and try the non-streaming mode
- // from the beginning again.
- while (ret != Z_STREAM_END) {
- stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = static_cast<uInt>(input_length);
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(output_buffer_length);
-
- // We know the output size. In this case, we can use Z_FINISH
- // which is more efficient.
- ret = inflate(&stream_, Z_FINISH);
- if (ret == Z_STREAM_END || ret != Z_OK) break;
-
- // Failure, buffer was too small
- return Status::IOError("Too small a buffer passed to GZipCodec. InputLength=",
- input_length, " OutputLength=", output_buffer_length);
- }
-
- // Failure for some other reason
- if (ret != Z_STREAM_END) {
- return ZlibErrorPrefix("GZipCodec failed: ", stream_.msg);
- }
-
- return stream_.total_out;
- }
-
- int64_t MaxCompressedLen(int64_t input_length,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- // Must be in compression mode
- if (!compressor_initialized_) {
- Status s = InitCompressor();
- ARROW_CHECK_OK(s);
- }
- int64_t max_len = deflateBound(&stream_, static_cast<uLong>(input_length));
- // ARROW-3514: return a more pessimistic estimate to account for bugs
- // in old zlib versions.
- return max_len + 12;
- }
-
- Result<int64_t> Compress(int64_t input_length, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output) override {
- if (!compressor_initialized_) {
- RETURN_NOT_OK(InitCompressor());
- }
- stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = static_cast<uInt>(input_length);
- stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = static_cast<uInt>(output_buffer_len);
-
- int64_t ret = 0;
- if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) {
- if (ret == Z_OK) {
- // Will return Z_OK (and stream.msg NOT set) if stream.avail_out is too
- // small
- return Status::IOError("zlib deflate failed, output buffer too small");
- }
-
- return ZlibErrorPrefix("zlib deflate failed: ", stream_.msg);
- }
-
- if (deflateReset(&stream_) != Z_OK) {
- return ZlibErrorPrefix("zlib deflateReset failed: ", stream_.msg);
- }
-
- // Actual output length
- return output_buffer_len - stream_.avail_out;
- }
-
- Status Init() override {
- const Status init_compressor_status = InitCompressor();
- if (!init_compressor_status.ok()) {
- return init_compressor_status;
- }
- return InitDecompressor();
- }
-
- Compression::type compression_type() const override { return Compression::GZIP; }
-
- int compression_level() const override { return compression_level_; }
+ }
+
+ Result<EndResult> End(int64_t output_len, uint8_t* output) override {
+ DCHECK(initialized_) << "Called on non-initialized stream";
+
+ static constexpr auto input_limit =
+ static_cast<int64_t>(std::numeric_limits<uInt>::max());
+
+ stream_.avail_in = 0;
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(std::min(output_len, input_limit));
+
+ int64_t ret = 0;
+ ret = deflate(&stream_, Z_FINISH);
+ if (ret == Z_STREAM_ERROR) {
+ return ZlibError("zlib flush failed: ");
+ }
+ int64_t bytes_written = output_len - stream_.avail_out;
+ if (ret == Z_STREAM_END) {
+ // Flush complete, we can now end the stream
+ initialized_ = false;
+ ret = deflateEnd(&stream_);
+ if (ret == Z_OK) {
+ return EndResult{bytes_written, false};
+ } else {
+ return ZlibError("zlib end failed: ");
+ }
+ } else {
+ // Not everything could be flushed,
+ return EndResult{bytes_written, true};
+ }
+ }
+
+ protected:
+ Status ZlibError(const char* prefix_msg) {
+ return ZlibErrorPrefix(prefix_msg, stream_.msg);
+ }
+
+ z_stream stream_;
+ bool initialized_;
+ int compression_level_;
+};
+
+// ----------------------------------------------------------------------
+// gzip codec implementation
+
+class GZipCodec : public Codec {
+ public:
+ explicit GZipCodec(int compression_level, GZipFormat::type format)
+ : format_(format),
+ compressor_initialized_(false),
+ decompressor_initialized_(false) {
+ compression_level_ = compression_level == kUseDefaultCompressionLevel
+ ? kGZipDefaultCompressionLevel
+ : compression_level;
+ }
+
+ ~GZipCodec() override {
+ EndCompressor();
+ EndDecompressor();
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ auto ptr = std::make_shared<GZipCompressor>(compression_level_);
+ RETURN_NOT_OK(ptr->Init(format_));
+ return ptr;
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ auto ptr = std::make_shared<GZipDecompressor>(format_);
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Status InitCompressor() {
+ EndDecompressor();
+ memset(&stream_, 0, sizeof(stream_));
+
+ int ret;
+ // Initialize to run specified format
+ int window_bits = CompressionWindowBitsForFormat(format_);
+ if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
+ compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
+ return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg);
+ }
+ compressor_initialized_ = true;
+ return Status::OK();
+ }
+
+ void EndCompressor() {
+ if (compressor_initialized_) {
+ (void)deflateEnd(&stream_);
+ }
+ compressor_initialized_ = false;
+ }
+
+ Status InitDecompressor() {
+ EndCompressor();
+ memset(&stream_, 0, sizeof(stream_));
+ int ret;
+
+ // Initialize to run either deflate or zlib/gzip format
+ int window_bits = DecompressionWindowBitsForFormat(format_);
+ if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
+ return ZlibErrorPrefix("zlib inflateInit failed: ", stream_.msg);
+ }
+ decompressor_initialized_ = true;
+ return Status::OK();
+ }
+
+ void EndDecompressor() {
+ if (decompressor_initialized_) {
+ (void)inflateEnd(&stream_);
+ }
+ decompressor_initialized_ = false;
+ }
+
+ Result<int64_t> Decompress(int64_t input_length, const uint8_t* input,
+ int64_t output_buffer_length, uint8_t* output) override {
+ if (!decompressor_initialized_) {
+ RETURN_NOT_OK(InitDecompressor());
+ }
+ if (output_buffer_length == 0) {
+ // The zlib library does not allow *output to be NULL, even when
+ // output_buffer_length is 0 (inflate() will return Z_STREAM_ERROR). We don't
+ // consider this an error, so bail early if no output is expected. Note that we
+ // don't signal an error if the input actually contains compressed data.
+ return 0;
+ }
+
+ // Reset the stream for this block
+ if (inflateReset(&stream_) != Z_OK) {
+ return ZlibErrorPrefix("zlib inflateReset failed: ", stream_.msg);
+ }
+
+ int ret = 0;
+ // gzip can run in streaming mode or non-streaming mode. We only
+ // support the non-streaming use case where we present it the entire
+ // compressed input and a buffer big enough to contain the entire
+ // compressed output. In the case where we don't know the output,
+ // we just make a bigger buffer and try the non-streaming mode
+ // from the beginning again.
+ while (ret != Z_STREAM_END) {
+ stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+ stream_.avail_in = static_cast<uInt>(input_length);
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(output_buffer_length);
+
+ // We know the output size. In this case, we can use Z_FINISH
+ // which is more efficient.
+ ret = inflate(&stream_, Z_FINISH);
+ if (ret == Z_STREAM_END || ret != Z_OK) break;
+
+ // Failure, buffer was too small
+ return Status::IOError("Too small a buffer passed to GZipCodec. InputLength=",
+ input_length, " OutputLength=", output_buffer_length);
+ }
+
+ // Failure for some other reason
+ if (ret != Z_STREAM_END) {
+ return ZlibErrorPrefix("GZipCodec failed: ", stream_.msg);
+ }
+
+ return stream_.total_out;
+ }
+
+ int64_t MaxCompressedLen(int64_t input_length,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ // Must be in compression mode
+ if (!compressor_initialized_) {
+ Status s = InitCompressor();
+ ARROW_CHECK_OK(s);
+ }
+ int64_t max_len = deflateBound(&stream_, static_cast<uLong>(input_length));
+ // ARROW-3514: return a more pessimistic estimate to account for bugs
+ // in old zlib versions.
+ return max_len + 12;
+ }
+
+ Result<int64_t> Compress(int64_t input_length, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output) override {
+ if (!compressor_initialized_) {
+ RETURN_NOT_OK(InitCompressor());
+ }
+ stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+ stream_.avail_in = static_cast<uInt>(input_length);
+ stream_.next_out = reinterpret_cast<Bytef*>(output);
+ stream_.avail_out = static_cast<uInt>(output_buffer_len);
+
+ int64_t ret = 0;
+ if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) {
+ if (ret == Z_OK) {
+ // Will return Z_OK (and stream.msg NOT set) if stream.avail_out is too
+ // small
+ return Status::IOError("zlib deflate failed, output buffer too small");
+ }
+
+ return ZlibErrorPrefix("zlib deflate failed: ", stream_.msg);
+ }
+
+ if (deflateReset(&stream_) != Z_OK) {
+ return ZlibErrorPrefix("zlib deflateReset failed: ", stream_.msg);
+ }
+
+ // Actual output length
+ return output_buffer_len - stream_.avail_out;
+ }
+
+ Status Init() override {
+ const Status init_compressor_status = InitCompressor();
+ if (!init_compressor_status.ok()) {
+ return init_compressor_status;
+ }
+ return InitDecompressor();
+ }
+
+ Compression::type compression_type() const override { return Compression::GZIP; }
+
+ int compression_level() const override { return compression_level_; }
int minimum_compression_level() const override { return kGZipMinCompressionLevel; }
int maximum_compression_level() const override { return kGZipMaxCompressionLevel; }
int default_compression_level() const override { return kGZipDefaultCompressionLevel; }
-
- private:
- // zlib is stateful and the z_stream state variable must be initialized
- // before
- z_stream stream_;
-
- // Realistically, this will always be GZIP, but we leave the option open to
- // configure
- GZipFormat::type format_;
-
- // These variables are mutually exclusive. When the codec is in "compressor"
- // state, compressor_initialized_ is true while decompressor_initialized_ is
- // false. When it's decompressing, the opposite is true.
- //
- // Indeed, this is slightly hacky, but the alternative is having separate
- // Compressor and Decompressor classes. If this ever becomes an issue, we can
- // perform the refactoring then
- bool compressor_initialized_;
- bool decompressor_initialized_;
- int compression_level_;
-};
-
-} // namespace
-
-std::unique_ptr<Codec> MakeGZipCodec(int compression_level, GZipFormat::type format) {
- return std::unique_ptr<Codec>(new GZipCodec(compression_level, format));
-}
-
-} // namespace internal
-} // namespace util
-} // namespace arrow
+
+ private:
+ // zlib is stateful and the z_stream state variable must be initialized
+ // before
+ z_stream stream_;
+
+ // Realistically, this will always be GZIP, but we leave the option open to
+ // configure
+ GZipFormat::type format_;
+
+ // These variables are mutually exclusive. When the codec is in "compressor"
+ // state, compressor_initialized_ is true while decompressor_initialized_ is
+ // false. When it's decompressing, the opposite is true.
+ //
+ // Indeed, this is slightly hacky, but the alternative is having separate
+ // Compressor and Decompressor classes. If this ever becomes an issue, we can
+ // perform the refactoring then
+ bool compressor_initialized_;
+ bool decompressor_initialized_;
+ int compression_level_;
+};
+
+} // namespace
+
+std::unique_ptr<Codec> MakeGZipCodec(int compression_level, GZipFormat::type format) {
+ return std::unique_ptr<Codec>(new GZipCodec(compression_level, format));
+}
+
+} // namespace internal
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
index e15ecb4e1fe..9814fad9850 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
@@ -1,249 +1,249 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/compression_internal.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-#include <zstd.h>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-
-using std::size_t;
-
-namespace arrow {
-namespace util {
-namespace internal {
-
-namespace {
-
-Status ZSTDError(size_t ret, const char* prefix_msg) {
- return Status::IOError(prefix_msg, ZSTD_getErrorName(ret));
-}
-
-// ----------------------------------------------------------------------
-// ZSTD decompressor implementation
-
-class ZSTDDecompressor : public Decompressor {
- public:
- ZSTDDecompressor() : stream_(ZSTD_createDStream()) {}
-
- ~ZSTDDecompressor() override { ZSTD_freeDStream(stream_); }
-
- Status Init() {
- finished_ = false;
- size_t ret = ZSTD_initDStream(stream_);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD init failed: ");
- } else {
- return Status::OK();
- }
- }
-
- Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- ZSTD_inBuffer in_buf;
- ZSTD_outBuffer out_buf;
-
- in_buf.src = input;
- in_buf.size = static_cast<size_t>(input_len);
- in_buf.pos = 0;
- out_buf.dst = output;
- out_buf.size = static_cast<size_t>(output_len);
- out_buf.pos = 0;
-
- size_t ret;
- ret = ZSTD_decompressStream(stream_, &out_buf, &in_buf);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD decompress failed: ");
- }
- finished_ = (ret == 0);
- return DecompressResult{static_cast<int64_t>(in_buf.pos),
- static_cast<int64_t>(out_buf.pos),
- in_buf.pos == 0 && out_buf.pos == 0};
- }
-
- Status Reset() override { return Init(); }
-
- bool IsFinished() override { return finished_; }
-
- protected:
- ZSTD_DStream* stream_;
- bool finished_;
-};
-
-// ----------------------------------------------------------------------
-// ZSTD compressor implementation
-
-class ZSTDCompressor : public Compressor {
- public:
- explicit ZSTDCompressor(int compression_level)
- : stream_(ZSTD_createCStream()), compression_level_(compression_level) {}
-
- ~ZSTDCompressor() override { ZSTD_freeCStream(stream_); }
-
- Status Init() {
- size_t ret = ZSTD_initCStream(stream_, compression_level_);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD init failed: ");
- } else {
- return Status::OK();
- }
- }
-
- Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_len, uint8_t* output) override {
- ZSTD_inBuffer in_buf;
- ZSTD_outBuffer out_buf;
-
- in_buf.src = input;
- in_buf.size = static_cast<size_t>(input_len);
- in_buf.pos = 0;
- out_buf.dst = output;
- out_buf.size = static_cast<size_t>(output_len);
- out_buf.pos = 0;
-
- size_t ret;
- ret = ZSTD_compressStream(stream_, &out_buf, &in_buf);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD compress failed: ");
- }
- return CompressResult{static_cast<int64_t>(in_buf.pos),
- static_cast<int64_t>(out_buf.pos)};
- }
-
- Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
- ZSTD_outBuffer out_buf;
-
- out_buf.dst = output;
- out_buf.size = static_cast<size_t>(output_len);
- out_buf.pos = 0;
-
- size_t ret;
- ret = ZSTD_flushStream(stream_, &out_buf);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD flush failed: ");
- }
- return FlushResult{static_cast<int64_t>(out_buf.pos), ret > 0};
- }
-
- Result<EndResult> End(int64_t output_len, uint8_t* output) override {
- ZSTD_outBuffer out_buf;
-
- out_buf.dst = output;
- out_buf.size = static_cast<size_t>(output_len);
- out_buf.pos = 0;
-
- size_t ret;
- ret = ZSTD_endStream(stream_, &out_buf);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD end failed: ");
- }
- return EndResult{static_cast<int64_t>(out_buf.pos), ret > 0};
- }
-
- protected:
- ZSTD_CStream* stream_;
-
- private:
- int compression_level_;
-};
-
-// ----------------------------------------------------------------------
-// ZSTD codec implementation
-
-class ZSTDCodec : public Codec {
- public:
- explicit ZSTDCodec(int compression_level)
- : compression_level_(compression_level == kUseDefaultCompressionLevel
- ? kZSTDDefaultCompressionLevel
- : compression_level) {}
-
- Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- if (output_buffer == nullptr) {
- // We may pass a NULL 0-byte output buffer but some zstd versions demand
- // a valid pointer: https://github.com/facebook/zstd/issues/1385
- static uint8_t empty_buffer;
- DCHECK_EQ(output_buffer_len, 0);
- output_buffer = &empty_buffer;
- }
-
- size_t ret = ZSTD_decompress(output_buffer, static_cast<size_t>(output_buffer_len),
- input, static_cast<size_t>(input_len));
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD decompression failed: ");
- }
- if (static_cast<int64_t>(ret) != output_buffer_len) {
- return Status::IOError("Corrupt ZSTD compressed data.");
- }
- return static_cast<int64_t>(ret);
- }
-
- int64_t MaxCompressedLen(int64_t input_len,
- const uint8_t* ARROW_ARG_UNUSED(input)) override {
- DCHECK_GE(input_len, 0);
- return ZSTD_compressBound(static_cast<size_t>(input_len));
- }
-
- Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
- int64_t output_buffer_len, uint8_t* output_buffer) override {
- size_t ret = ZSTD_compress(output_buffer, static_cast<size_t>(output_buffer_len),
- input, static_cast<size_t>(input_len), compression_level_);
- if (ZSTD_isError(ret)) {
- return ZSTDError(ret, "ZSTD compression failed: ");
- }
- return static_cast<int64_t>(ret);
- }
-
- Result<std::shared_ptr<Compressor>> MakeCompressor() override {
- auto ptr = std::make_shared<ZSTDCompressor>(compression_level_);
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
- auto ptr = std::make_shared<ZSTDDecompressor>();
- RETURN_NOT_OK(ptr->Init());
- return ptr;
- }
-
- Compression::type compression_type() const override { return Compression::ZSTD; }
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression_internal.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+#include <zstd.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+using std::size_t;
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+namespace {
+
+Status ZSTDError(size_t ret, const char* prefix_msg) {
+ return Status::IOError(prefix_msg, ZSTD_getErrorName(ret));
+}
+
+// ----------------------------------------------------------------------
+// ZSTD decompressor implementation
+
+class ZSTDDecompressor : public Decompressor {
+ public:
+ ZSTDDecompressor() : stream_(ZSTD_createDStream()) {}
+
+ ~ZSTDDecompressor() override { ZSTD_freeDStream(stream_); }
+
+ Status Init() {
+ finished_ = false;
+ size_t ret = ZSTD_initDStream(stream_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD init failed: ");
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ ZSTD_inBuffer in_buf;
+ ZSTD_outBuffer out_buf;
+
+ in_buf.src = input;
+ in_buf.size = static_cast<size_t>(input_len);
+ in_buf.pos = 0;
+ out_buf.dst = output;
+ out_buf.size = static_cast<size_t>(output_len);
+ out_buf.pos = 0;
+
+ size_t ret;
+ ret = ZSTD_decompressStream(stream_, &out_buf, &in_buf);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD decompress failed: ");
+ }
+ finished_ = (ret == 0);
+ return DecompressResult{static_cast<int64_t>(in_buf.pos),
+ static_cast<int64_t>(out_buf.pos),
+ in_buf.pos == 0 && out_buf.pos == 0};
+ }
+
+ Status Reset() override { return Init(); }
+
+ bool IsFinished() override { return finished_; }
+
+ protected:
+ ZSTD_DStream* stream_;
+ bool finished_;
+};
+
+// ----------------------------------------------------------------------
+// ZSTD compressor implementation
+
+class ZSTDCompressor : public Compressor {
+ public:
+ explicit ZSTDCompressor(int compression_level)
+ : stream_(ZSTD_createCStream()), compression_level_(compression_level) {}
+
+ ~ZSTDCompressor() override { ZSTD_freeCStream(stream_); }
+
+ Status Init() {
+ size_t ret = ZSTD_initCStream(stream_, compression_level_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD init failed: ");
+ } else {
+ return Status::OK();
+ }
+ }
+
+ Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_len, uint8_t* output) override {
+ ZSTD_inBuffer in_buf;
+ ZSTD_outBuffer out_buf;
+
+ in_buf.src = input;
+ in_buf.size = static_cast<size_t>(input_len);
+ in_buf.pos = 0;
+ out_buf.dst = output;
+ out_buf.size = static_cast<size_t>(output_len);
+ out_buf.pos = 0;
+
+ size_t ret;
+ ret = ZSTD_compressStream(stream_, &out_buf, &in_buf);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD compress failed: ");
+ }
+ return CompressResult{static_cast<int64_t>(in_buf.pos),
+ static_cast<int64_t>(out_buf.pos)};
+ }
+
+ Result<FlushResult> Flush(int64_t output_len, uint8_t* output) override {
+ ZSTD_outBuffer out_buf;
+
+ out_buf.dst = output;
+ out_buf.size = static_cast<size_t>(output_len);
+ out_buf.pos = 0;
+
+ size_t ret;
+ ret = ZSTD_flushStream(stream_, &out_buf);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD flush failed: ");
+ }
+ return FlushResult{static_cast<int64_t>(out_buf.pos), ret > 0};
+ }
+
+ Result<EndResult> End(int64_t output_len, uint8_t* output) override {
+ ZSTD_outBuffer out_buf;
+
+ out_buf.dst = output;
+ out_buf.size = static_cast<size_t>(output_len);
+ out_buf.pos = 0;
+
+ size_t ret;
+ ret = ZSTD_endStream(stream_, &out_buf);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD end failed: ");
+ }
+ return EndResult{static_cast<int64_t>(out_buf.pos), ret > 0};
+ }
+
+ protected:
+ ZSTD_CStream* stream_;
+
+ private:
+ int compression_level_;
+};
+
+// ----------------------------------------------------------------------
+// ZSTD codec implementation
+
+class ZSTDCodec : public Codec {
+ public:
+ explicit ZSTDCodec(int compression_level)
+ : compression_level_(compression_level == kUseDefaultCompressionLevel
+ ? kZSTDDefaultCompressionLevel
+ : compression_level) {}
+
+ Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ if (output_buffer == nullptr) {
+ // We may pass a NULL 0-byte output buffer but some zstd versions demand
+ // a valid pointer: https://github.com/facebook/zstd/issues/1385
+ static uint8_t empty_buffer;
+ DCHECK_EQ(output_buffer_len, 0);
+ output_buffer = &empty_buffer;
+ }
+
+ size_t ret = ZSTD_decompress(output_buffer, static_cast<size_t>(output_buffer_len),
+ input, static_cast<size_t>(input_len));
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD decompression failed: ");
+ }
+ if (static_cast<int64_t>(ret) != output_buffer_len) {
+ return Status::IOError("Corrupt ZSTD compressed data.");
+ }
+ return static_cast<int64_t>(ret);
+ }
+
+ int64_t MaxCompressedLen(int64_t input_len,
+ const uint8_t* ARROW_ARG_UNUSED(input)) override {
+ DCHECK_GE(input_len, 0);
+ return ZSTD_compressBound(static_cast<size_t>(input_len));
+ }
+
+ Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer) override {
+ size_t ret = ZSTD_compress(output_buffer, static_cast<size_t>(output_buffer_len),
+ input, static_cast<size_t>(input_len), compression_level_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD compression failed: ");
+ }
+ return static_cast<int64_t>(ret);
+ }
+
+ Result<std::shared_ptr<Compressor>> MakeCompressor() override {
+ auto ptr = std::make_shared<ZSTDCompressor>(compression_level_);
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Result<std::shared_ptr<Decompressor>> MakeDecompressor() override {
+ auto ptr = std::make_shared<ZSTDDecompressor>();
+ RETURN_NOT_OK(ptr->Init());
+ return ptr;
+ }
+
+ Compression::type compression_type() const override { return Compression::ZSTD; }
int minimum_compression_level() const override { return ZSTD_minCLevel(); }
int maximum_compression_level() const override { return ZSTD_maxCLevel(); }
int default_compression_level() const override { return kZSTDDefaultCompressionLevel; }
-
- int compression_level() const override { return compression_level_; }
-
- private:
- const int compression_level_;
-};
-
-} // namespace
-
-std::unique_ptr<Codec> MakeZSTDCodec(int compression_level) {
- return std::unique_ptr<Codec>(new ZSTDCodec(compression_level));
-}
-
-} // namespace internal
-} // namespace util
-} // namespace arrow
+
+ int compression_level() const override { return compression_level_; }
+
+ private:
+ const int compression_level_;
+};
+
+} // namespace
+
+std::unique_ptr<Codec> MakeZSTDCodec(int compression_level) {
+ return std::unique_ptr<Codec>(new ZSTDCodec(compression_level));
+}
+
+} // namespace internal
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
index d803521a2d9..2b255f5821f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
@@ -1,85 +1,85 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// From Apache Impala (incubating) as of 2016-01-29.
-
-#include "arrow/util/cpu_info.h"
-
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef _MSC_VER
-#include <unistd.h>
-#endif
-
-#ifdef _WIN32
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29.
+
+#include "arrow/util/cpu_info.h"
+
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+
+#ifdef _WIN32
#include <immintrin.h>
-#include <intrin.h>
-#include <array>
-#include <bitset>
-
-#include "arrow/util/windows_compatibility.h"
-#endif
-
-#include <algorithm>
-#include <cctype>
-#include <cerrno>
-#include <cstdint>
-#include <fstream>
-#include <memory>
-#include <mutex>
-#include <string>
-
-#include "arrow/result.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
+#include <intrin.h>
+#include <array>
+#include <bitset>
+
+#include "arrow/util/windows_compatibility.h"
+#endif
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstdint>
+#include <fstream>
+#include <memory>
+#include <mutex>
+#include <string>
+
+#include "arrow/result.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
#include "arrow/util/optional.h"
-#include "arrow/util/string.h"
-
+#include "arrow/util/string.h"
+
namespace arrow {
namespace internal {
namespace {
-using std::max;
-
+using std::max;
+
constexpr int64_t kDefaultL1CacheSize = 32 * 1024; // Level 1: 32k
constexpr int64_t kDefaultL2CacheSize = 256 * 1024; // Level 2: 256k
constexpr int64_t kDefaultL3CacheSize = 3072 * 1024; // Level 3: 3M
-
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
-void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
- __asm__ __volatile__("cpuid"
- : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
- "=d"(CPUInfo[3])
- : "a"(function_id), "c"(subfunction_id));
-}
+
+#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
+ __asm__ __volatile__("cpuid"
+ : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
+ "=d"(CPUInfo[3])
+ : "a"(function_id), "c"(subfunction_id));
+}
int64_t _xgetbv(int xcr) {
int out = 0;
__asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
return out;
}
-#endif
-
+#endif
+
#ifdef __APPLE__
util::optional<int64_t> IntegerSysCtlByName(const char* name) {
size_t len = sizeof(int64_t);
@@ -97,183 +97,183 @@ util::optional<int64_t> IntegerSysCtlByName(const char* name) {
}
#endif
-#if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
-// There is no direct instruction to get cache size on Arm64 like '__cpuid' on x86;
-// Get Arm64 cache size by reading '/sys/devices/system/cpu/cpu0/cache/index*/size';
-// index* :
-// index0: L1 Dcache
-// index1: L1 Icache
-// index2: L2 cache
-// index3: L3 cache
+#if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
+// There is no direct instruction to get cache size on Arm64 like '__cpuid' on x86;
+// Get Arm64 cache size by reading '/sys/devices/system/cpu/cpu0/cache/index*/size';
+// index* :
+// index0: L1 Dcache
+// index1: L1 Icache
+// index2: L2 cache
+// index3: L3 cache
const char* kL1CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index0/size";
const char* kL2CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index2/size";
const char* kL3CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index3/size";
-
+
int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
- char* content = nullptr;
- char* last_char = nullptr;
- size_t file_len = 0;
-
- // Read cache file to 'content' for getting cache size.
- FILE* cache_file = fopen(filename, "r");
- if (cache_file == nullptr) {
- return default_size;
- }
- int res = getline(&content, &file_len, cache_file);
- fclose(cache_file);
- if (res == -1) {
- return default_size;
- }
- std::unique_ptr<char, decltype(&free)> content_guard(content, &free);
-
- errno = 0;
- const auto cardinal_num = strtoull(content, &last_char, 0);
- if (errno != 0) {
- return default_size;
- }
- // kB, MB, or GB
- int64_t multip = 1;
- switch (*last_char) {
- case 'g':
- case 'G':
- multip *= 1024;
- case 'm':
- case 'M':
- multip *= 1024;
- case 'k':
- case 'K':
- multip *= 1024;
- }
- return cardinal_num * multip;
-}
-#endif
-
+ char* content = nullptr;
+ char* last_char = nullptr;
+ size_t file_len = 0;
+
+ // Read cache file to 'content' for getting cache size.
+ FILE* cache_file = fopen(filename, "r");
+ if (cache_file == nullptr) {
+ return default_size;
+ }
+ int res = getline(&content, &file_len, cache_file);
+ fclose(cache_file);
+ if (res == -1) {
+ return default_size;
+ }
+ std::unique_ptr<char, decltype(&free)> content_guard(content, &free);
+
+ errno = 0;
+ const auto cardinal_num = strtoull(content, &last_char, 0);
+ if (errno != 0) {
+ return default_size;
+ }
+ // kB, MB, or GB
+ int64_t multip = 1;
+ switch (*last_char) {
+ case 'g':
+ case 'G':
+ multip *= 1024;
+ case 'm':
+ case 'M':
+ multip *= 1024;
+ case 'k':
+ case 'K':
+ multip *= 1024;
+ }
+ return cardinal_num * multip;
+}
+#endif
+
#if !defined(_WIN32) && !defined(__APPLE__)
struct {
- std::string name;
- int64_t flag;
-} flag_mappings[] = {
-#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64))
- {"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1},
- {"sse4_2", CpuInfo::SSE4_2}, {"popcnt", CpuInfo::POPCNT},
- {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2},
- {"avx512f", CpuInfo::AVX512F}, {"avx512cd", CpuInfo::AVX512CD},
- {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
- {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
- {"bmi2", CpuInfo::BMI2},
-#endif
-#if defined(__aarch64__)
- {"asimd", CpuInfo::ASIMD},
-#endif
-};
+ std::string name;
+ int64_t flag;
+} flag_mappings[] = {
+#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64))
+ {"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1},
+ {"sse4_2", CpuInfo::SSE4_2}, {"popcnt", CpuInfo::POPCNT},
+ {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2},
+ {"avx512f", CpuInfo::AVX512F}, {"avx512cd", CpuInfo::AVX512CD},
+ {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
+ {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
+ {"bmi2", CpuInfo::BMI2},
+#endif
+#if defined(__aarch64__)
+ {"asimd", CpuInfo::ASIMD},
+#endif
+};
const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
-
-// Helper function to parse for hardware flags.
-// values contains a list of space-separated flags. check to see if the flags we
-// care about are present.
-// Returns a bitmap of flags.
-int64_t ParseCPUFlags(const std::string& values) {
- int64_t flags = 0;
- for (int i = 0; i < num_flags; ++i) {
- if (values.find(flag_mappings[i].name) != std::string::npos) {
- flags |= flag_mappings[i].flag;
- }
- }
- return flags;
-}
-#endif
-
-#ifdef _WIN32
-bool RetrieveCacheSize(int64_t* cache_sizes) {
- if (!cache_sizes) {
- return false;
- }
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
- DWORD buffer_size = 0;
- size_t offset = 0;
- typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*);
- GetLogicalProcessorInformationFuncPointer func_pointer =
- (GetLogicalProcessorInformationFuncPointer)GetProcAddress(
- GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
-
- if (!func_pointer) {
- return false;
- }
-
- // Get buffer size
- if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
- return false;
-
- buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
-
- if (!buffer || !func_pointer(buffer, &buffer_size)) {
- return false;
- }
-
- buffer_position = buffer;
- while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) {
- if (RelationCache == buffer_position->Relationship) {
- PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
- if (cache->Level >= 1 && cache->Level <= 3) {
- cache_sizes[cache->Level - 1] += cache->Size;
- }
- }
- offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
- buffer_position++;
- }
-
- if (buffer) {
- free(buffer);
- }
- return true;
-}
-
-// Source: https://en.wikipedia.org/wiki/CPUID
-bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
- CpuInfo::Vendor* vendor) {
- if (!hardware_flags || !model_name || !vendor) {
- return false;
- }
- int register_EAX_id = 1;
- int highest_valid_id = 0;
- int highest_extended_valid_id = 0;
- std::bitset<32> features_ECX;
- std::array<int, 4> cpu_info;
-
- // Get highest valid id
- __cpuid(cpu_info.data(), 0);
- highest_valid_id = cpu_info[0];
- // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
- // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
- if (cpu_info[1] == 0x756e6547 && cpu_info[2] == 0x49656e69 &&
- cpu_info[3] == 0x6c65746e) {
- *vendor = CpuInfo::Vendor::Intel;
- } else if (cpu_info[1] == 0x68747541 && cpu_info[2] == 0x69746e65 &&
- cpu_info[3] == 0x444d4163) {
- *vendor = CpuInfo::Vendor::AMD;
- }
-
- if (highest_valid_id <= register_EAX_id) return false;
-
- // EAX=1: Processor Info and Feature Bits
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- features_ECX = cpu_info[2];
-
- // Get highest extended id
- __cpuid(cpu_info.data(), 0x80000000);
- highest_extended_valid_id = cpu_info[0];
-
- // Retrieve CPU model name
- if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
- model_name->clear();
- for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
- __cpuidex(cpu_info.data(), i, 0);
- *model_name +=
- std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info));
- }
- }
-
+
+// Helper function to parse for hardware flags.
+// values contains a list of space-separated flags. check to see if the flags we
+// care about are present.
+// Returns a bitmap of flags.
+int64_t ParseCPUFlags(const std::string& values) {
+ int64_t flags = 0;
+ for (int i = 0; i < num_flags; ++i) {
+ if (values.find(flag_mappings[i].name) != std::string::npos) {
+ flags |= flag_mappings[i].flag;
+ }
+ }
+ return flags;
+}
+#endif
+
+#ifdef _WIN32
+bool RetrieveCacheSize(int64_t* cache_sizes) {
+ if (!cache_sizes) {
+ return false;
+ }
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
+ DWORD buffer_size = 0;
+ size_t offset = 0;
+ typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*);
+ GetLogicalProcessorInformationFuncPointer func_pointer =
+ (GetLogicalProcessorInformationFuncPointer)GetProcAddress(
+ GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
+
+ if (!func_pointer) {
+ return false;
+ }
+
+ // Get buffer size
+ if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+ return false;
+
+ buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
+
+ if (!buffer || !func_pointer(buffer, &buffer_size)) {
+ return false;
+ }
+
+ buffer_position = buffer;
+ while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) {
+ if (RelationCache == buffer_position->Relationship) {
+ PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
+ if (cache->Level >= 1 && cache->Level <= 3) {
+ cache_sizes[cache->Level - 1] += cache->Size;
+ }
+ }
+ offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+ buffer_position++;
+ }
+
+ if (buffer) {
+ free(buffer);
+ }
+ return true;
+}
+
+// Source: https://en.wikipedia.org/wiki/CPUID
+bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
+ CpuInfo::Vendor* vendor) {
+ if (!hardware_flags || !model_name || !vendor) {
+ return false;
+ }
+ int register_EAX_id = 1;
+ int highest_valid_id = 0;
+ int highest_extended_valid_id = 0;
+ std::bitset<32> features_ECX;
+ std::array<int, 4> cpu_info;
+
+ // Get highest valid id
+ __cpuid(cpu_info.data(), 0);
+ highest_valid_id = cpu_info[0];
+ // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
+ // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
+ if (cpu_info[1] == 0x756e6547 && cpu_info[2] == 0x49656e69 &&
+ cpu_info[3] == 0x6c65746e) {
+ *vendor = CpuInfo::Vendor::Intel;
+ } else if (cpu_info[1] == 0x68747541 && cpu_info[2] == 0x69746e65 &&
+ cpu_info[3] == 0x444d4163) {
+ *vendor = CpuInfo::Vendor::AMD;
+ }
+
+ if (highest_valid_id <= register_EAX_id) return false;
+
+ // EAX=1: Processor Info and Feature Bits
+ __cpuidex(cpu_info.data(), register_EAX_id, 0);
+ features_ECX = cpu_info[2];
+
+ // Get highest extended id
+ __cpuid(cpu_info.data(), 0x80000000);
+ highest_extended_valid_id = cpu_info[0];
+
+ // Retrieve CPU model name
+ if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
+ model_name->clear();
+ for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
+ __cpuidex(cpu_info.data(), i, 0);
+ *model_name +=
+ std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info));
+ }
+ }
+
bool zmm_enabled = false;
if (features_ECX[27]) { // OSXSAVE
// Query if the OS supports saving ZMM registers when switching contexts
@@ -281,21 +281,21 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
zmm_enabled = (xcr0 & 0xE0) == 0xE0;
}
- if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
- if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
- if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
- if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
- if (features_ECX[23]) *hardware_flags |= CpuInfo::AVX;
-
- // cpuid with EAX=7, ECX=0: Extended Features
- register_EAX_id = 7;
- if (highest_valid_id > register_EAX_id) {
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- std::bitset<32> features_EBX = cpu_info[1];
-
- if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
- if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
- if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
+ if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
+ if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
+ if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
+ if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
+ if (features_ECX[23]) *hardware_flags |= CpuInfo::AVX;
+
+ // cpuid with EAX=7, ECX=0: Extended Features
+ register_EAX_id = 7;
+ if (highest_valid_id > register_EAX_id) {
+ __cpuidex(cpu_info.data(), register_EAX_id, 0);
+ std::bitset<32> features_EBX = cpu_info[1];
+
+ if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
+ if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
+ if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
// ARROW-11427: only use AVX512 if enabled by the OS
if (zmm_enabled) {
if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
@@ -304,50 +304,50 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
}
- }
-
- return true;
-}
-#endif
-
+ }
+
+ return true;
+}
+#endif
+
} // namespace
-CpuInfo::CpuInfo()
- : hardware_flags_(0),
- num_cores_(1),
- model_name_("unknown"),
- vendor_(Vendor::Unknown) {}
-
-std::unique_ptr<CpuInfo> g_cpu_info;
-static std::once_flag cpuinfo_initialized;
-
-CpuInfo* CpuInfo::GetInstance() {
- std::call_once(cpuinfo_initialized, []() {
- g_cpu_info.reset(new CpuInfo);
- g_cpu_info->Init();
- });
- return g_cpu_info.get();
-}
-
-void CpuInfo::Init() {
- std::string line;
- std::string name;
- std::string value;
-
- float max_mhz = 0;
- int num_cores = 0;
-
- memset(&cache_sizes_, 0, sizeof(cache_sizes_));
-
-#ifdef _WIN32
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- num_cores = system_info.dwNumberOfProcessors;
-
- LARGE_INTEGER performance_frequency;
- if (QueryPerformanceFrequency(&performance_frequency)) {
- max_mhz = static_cast<float>(performance_frequency.QuadPart);
- }
+CpuInfo::CpuInfo()
+ : hardware_flags_(0),
+ num_cores_(1),
+ model_name_("unknown"),
+ vendor_(Vendor::Unknown) {}
+
+std::unique_ptr<CpuInfo> g_cpu_info;
+static std::once_flag cpuinfo_initialized;
+
+CpuInfo* CpuInfo::GetInstance() {
+ std::call_once(cpuinfo_initialized, []() {
+ g_cpu_info.reset(new CpuInfo);
+ g_cpu_info->Init();
+ });
+ return g_cpu_info.get();
+}
+
+void CpuInfo::Init() {
+ std::string line;
+ std::string name;
+ std::string value;
+
+ float max_mhz = 0;
+ int num_cores = 0;
+
+ memset(&cache_sizes_, 0, sizeof(cache_sizes_));
+
+#ifdef _WIN32
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ num_cores = system_info.dwNumberOfProcessors;
+
+ LARGE_INTEGER performance_frequency;
+ if (QueryPerformanceFrequency(&performance_frequency)) {
+ max_mhz = static_cast<float>(performance_frequency.QuadPart);
+ }
#elif defined(__APPLE__)
// On macOS, get CPU information from system information base
struct SysCtlCpuFeature {
@@ -358,7 +358,7 @@ void CpuInfo::Init() {
#if defined(__aarch64__)
// ARM64 (note that this is exposed under Rosetta as well)
{"hw.optional.neon", ASIMD},
-#else
+#else
// x86
{"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
{"hw.optional.avx1_0", AVX},
@@ -379,40 +379,40 @@ void CpuInfo::Init() {
}
}
#else
- // Read from /proc/cpuinfo
- std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
- while (cpuinfo) {
- std::getline(cpuinfo, line);
- size_t colon = line.find(':');
- if (colon != std::string::npos) {
- name = TrimString(line.substr(0, colon - 1));
- value = TrimString(line.substr(colon + 1, std::string::npos));
- if (name.compare("flags") == 0 || name.compare("Features") == 0) {
- hardware_flags_ |= ParseCPUFlags(value);
- } else if (name.compare("cpu MHz") == 0) {
- // Every core will report a different speed. We'll take the max, assuming
- // that when impala is running, the core will not be in a lower power state.
- // TODO: is there a more robust way to do this, such as
- // Window's QueryPerformanceFrequency()
- float mhz = static_cast<float>(atof(value.c_str()));
- max_mhz = max(mhz, max_mhz);
- } else if (name.compare("processor") == 0) {
- ++num_cores;
- } else if (name.compare("model name") == 0) {
- model_name_ = value;
- } else if (name.compare("vendor_id") == 0) {
- if (value.compare("GenuineIntel") == 0) {
- vendor_ = Vendor::Intel;
- } else if (value.compare("AuthenticAMD") == 0) {
- vendor_ = Vendor::AMD;
- }
- }
- }
- }
- if (cpuinfo.is_open()) cpuinfo.close();
-#endif
-
-#ifdef __APPLE__
+ // Read from /proc/cpuinfo
+ std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
+ while (cpuinfo) {
+ std::getline(cpuinfo, line);
+ size_t colon = line.find(':');
+ if (colon != std::string::npos) {
+ name = TrimString(line.substr(0, colon - 1));
+ value = TrimString(line.substr(colon + 1, std::string::npos));
+ if (name.compare("flags") == 0 || name.compare("Features") == 0) {
+ hardware_flags_ |= ParseCPUFlags(value);
+ } else if (name.compare("cpu MHz") == 0) {
+ // Every core will report a different speed. We'll take the max, assuming
+ // that when impala is running, the core will not be in a lower power state.
+ // TODO: is there a more robust way to do this, such as
+ // Window's QueryPerformanceFrequency()
+ float mhz = static_cast<float>(atof(value.c_str()));
+ max_mhz = max(mhz, max_mhz);
+ } else if (name.compare("processor") == 0) {
+ ++num_cores;
+ } else if (name.compare("model name") == 0) {
+ model_name_ = value;
+ } else if (name.compare("vendor_id") == 0) {
+ if (value.compare("GenuineIntel") == 0) {
+ vendor_ = Vendor::Intel;
+ } else if (value.compare("AuthenticAMD") == 0) {
+ vendor_ = Vendor::AMD;
+ }
+ }
+ }
+ }
+ if (cpuinfo.is_open()) cpuinfo.close();
+#endif
+
+#ifdef __APPLE__
// On macOS, get cache size from system information base
SetDefaultCacheSize();
auto c = IntegerSysCtlByName("hw.l1dcachesize");
@@ -427,137 +427,137 @@ void CpuInfo::Init() {
if (c.has_value()) {
cache_sizes_[2] = *c;
}
-#elif _WIN32
- if (!RetrieveCacheSize(cache_sizes_)) {
- SetDefaultCacheSize();
- }
- RetrieveCPUInfo(&hardware_flags_, &model_name_, &vendor_);
-#else
- SetDefaultCacheSize();
-#endif
-
- if (max_mhz != 0) {
- cycles_per_ms_ = static_cast<int64_t>(max_mhz);
-#ifndef _WIN32
- cycles_per_ms_ *= 1000;
-#endif
- } else {
- cycles_per_ms_ = 1000000;
- }
- original_hardware_flags_ = hardware_flags_;
-
- if (num_cores > 0) {
- num_cores_ = num_cores;
- } else {
- num_cores_ = 1;
- }
-
- // Parse the user simd level
- ParseUserSimdLevel();
-}
-
-void CpuInfo::VerifyCpuRequirements() {
-#ifdef ARROW_HAVE_SSE4_2
- if (!IsSupported(CpuInfo::SSSE3)) {
- DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction set";
- }
-#endif
-#if defined(ARROW_HAVE_NEON)
- if (!IsSupported(CpuInfo::ASIMD)) {
- DCHECK(false) << "CPU does not support the Armv8 Neon instruction set";
- }
-#endif
-}
-
-bool CpuInfo::CanUseSSE4_2() const {
-#if defined(ARROW_HAVE_SSE4_2)
- return IsSupported(CpuInfo::SSE4_2);
-#else
- return false;
-#endif
-}
-
-void CpuInfo::EnableFeature(int64_t flag, bool enable) {
- if (!enable) {
- hardware_flags_ &= ~flag;
- } else {
- // Can't turn something on that can't be supported
- DCHECK_NE(original_hardware_flags_ & flag, 0);
- hardware_flags_ |= flag;
- }
-}
-
-int64_t CpuInfo::hardware_flags() { return hardware_flags_; }
-
-int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; }
-
-int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; }
-
-int CpuInfo::num_cores() { return num_cores_; }
-
-std::string CpuInfo::model_name() { return model_name_; }
-
-void CpuInfo::SetDefaultCacheSize() {
-#if defined(_SC_LEVEL1_DCACHE_SIZE) && !defined(__aarch64__)
- // Call sysconf to query for the cache sizes
- cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
- cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE);
- cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE);
- ARROW_UNUSED(kDefaultL1CacheSize);
- ARROW_UNUSED(kDefaultL2CacheSize);
- ARROW_UNUSED(kDefaultL3CacheSize);
-#elif defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
- cache_sizes_[0] = GetArm64CacheSize(kL1CacheSizeFile, kDefaultL1CacheSize);
- cache_sizes_[1] = GetArm64CacheSize(kL2CacheSizeFile, kDefaultL2CacheSize);
- cache_sizes_[2] = GetArm64CacheSize(kL3CacheSizeFile, kDefaultL3CacheSize);
-#else
- // Provide reasonable default values if no info
- cache_sizes_[0] = kDefaultL1CacheSize;
- cache_sizes_[1] = kDefaultL2CacheSize;
- cache_sizes_[2] = kDefaultL3CacheSize;
-#endif
-}
-
-void CpuInfo::ParseUserSimdLevel() {
- auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
- if (!maybe_env_var.ok()) {
- // No user settings
- return;
- }
- std::string s = *std::move(maybe_env_var);
- std::transform(s.begin(), s.end(), s.begin(),
- [](unsigned char c) { return std::toupper(c); });
-
- int level = USER_SIMD_MAX;
- // Parse the level
- if (s == "AVX512") {
- level = USER_SIMD_AVX512;
- } else if (s == "AVX2") {
- level = USER_SIMD_AVX2;
- } else if (s == "AVX") {
- level = USER_SIMD_AVX;
- } else if (s == "SSE4_2") {
- level = USER_SIMD_SSE4_2;
- } else if (s == "NONE") {
- level = USER_SIMD_NONE;
- } else if (!s.empty()) {
- ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
- }
-
- // Disable feature as the level
- if (level < USER_SIMD_AVX512) { // Disable all AVX512 features
- EnableFeature(AVX512, false);
- }
- if (level < USER_SIMD_AVX2) { // Disable all AVX2 features
- EnableFeature(AVX2 | BMI2, false);
- }
- if (level < USER_SIMD_AVX) { // Disable all AVX features
- EnableFeature(AVX, false);
- }
- if (level < USER_SIMD_SSE4_2) { // Disable all SSE4_2 features
- EnableFeature(SSE4_2 | BMI1, false);
- }
-}
-
-} // namespace internal
-} // namespace arrow
+#elif _WIN32
+ if (!RetrieveCacheSize(cache_sizes_)) {
+ SetDefaultCacheSize();
+ }
+ RetrieveCPUInfo(&hardware_flags_, &model_name_, &vendor_);
+#else
+ SetDefaultCacheSize();
+#endif
+
+ if (max_mhz != 0) {
+ cycles_per_ms_ = static_cast<int64_t>(max_mhz);
+#ifndef _WIN32
+ cycles_per_ms_ *= 1000;
+#endif
+ } else {
+ cycles_per_ms_ = 1000000;
+ }
+ original_hardware_flags_ = hardware_flags_;
+
+ if (num_cores > 0) {
+ num_cores_ = num_cores;
+ } else {
+ num_cores_ = 1;
+ }
+
+ // Parse the user simd level
+ ParseUserSimdLevel();
+}
+
+void CpuInfo::VerifyCpuRequirements() {
+#ifdef ARROW_HAVE_SSE4_2
+ if (!IsSupported(CpuInfo::SSSE3)) {
+ DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction set";
+ }
+#endif
+#if defined(ARROW_HAVE_NEON)
+ if (!IsSupported(CpuInfo::ASIMD)) {
+ DCHECK(false) << "CPU does not support the Armv8 Neon instruction set";
+ }
+#endif
+}
+
+bool CpuInfo::CanUseSSE4_2() const {
+#if defined(ARROW_HAVE_SSE4_2)
+ return IsSupported(CpuInfo::SSE4_2);
+#else
+ return false;
+#endif
+}
+
+void CpuInfo::EnableFeature(int64_t flag, bool enable) {
+ if (!enable) {
+ hardware_flags_ &= ~flag;
+ } else {
+ // Can't turn something on that can't be supported
+ DCHECK_NE(original_hardware_flags_ & flag, 0);
+ hardware_flags_ |= flag;
+ }
+}
+
+int64_t CpuInfo::hardware_flags() { return hardware_flags_; }
+
+int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; }
+
+int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; }
+
+int CpuInfo::num_cores() { return num_cores_; }
+
+std::string CpuInfo::model_name() { return model_name_; }
+
+void CpuInfo::SetDefaultCacheSize() {
+#if defined(_SC_LEVEL1_DCACHE_SIZE) && !defined(__aarch64__)
+ // Call sysconf to query for the cache sizes
+ cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
+ cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE);
+ cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE);
+ ARROW_UNUSED(kDefaultL1CacheSize);
+ ARROW_UNUSED(kDefaultL2CacheSize);
+ ARROW_UNUSED(kDefaultL3CacheSize);
+#elif defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
+ cache_sizes_[0] = GetArm64CacheSize(kL1CacheSizeFile, kDefaultL1CacheSize);
+ cache_sizes_[1] = GetArm64CacheSize(kL2CacheSizeFile, kDefaultL2CacheSize);
+ cache_sizes_[2] = GetArm64CacheSize(kL3CacheSizeFile, kDefaultL3CacheSize);
+#else
+ // Provide reasonable default values if no info
+ cache_sizes_[0] = kDefaultL1CacheSize;
+ cache_sizes_[1] = kDefaultL2CacheSize;
+ cache_sizes_[2] = kDefaultL3CacheSize;
+#endif
+}
+
+void CpuInfo::ParseUserSimdLevel() {
+ auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
+ if (!maybe_env_var.ok()) {
+ // No user settings
+ return;
+ }
+ std::string s = *std::move(maybe_env_var);
+ std::transform(s.begin(), s.end(), s.begin(),
+ [](unsigned char c) { return std::toupper(c); });
+
+ int level = USER_SIMD_MAX;
+ // Parse the level
+ if (s == "AVX512") {
+ level = USER_SIMD_AVX512;
+ } else if (s == "AVX2") {
+ level = USER_SIMD_AVX2;
+ } else if (s == "AVX") {
+ level = USER_SIMD_AVX;
+ } else if (s == "SSE4_2") {
+ level = USER_SIMD_SSE4_2;
+ } else if (s == "NONE") {
+ level = USER_SIMD_NONE;
+ } else if (!s.empty()) {
+ ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
+ }
+
+ // Disable feature as the level
+ if (level < USER_SIMD_AVX512) { // Disable all AVX512 features
+ EnableFeature(AVX512, false);
+ }
+ if (level < USER_SIMD_AVX2) { // Disable all AVX2 features
+ EnableFeature(AVX2 | BMI2, false);
+ }
+ if (level < USER_SIMD_AVX) { // Disable all AVX features
+ EnableFeature(AVX, false);
+ }
+ if (level < USER_SIMD_SSE4_2) { // Disable all SSE4_2 features
+ EnableFeature(SSE4_2 | BMI1, false);
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
index 83819c25519..c0840cecb28 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
@@ -1,118 +1,118 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// From Apache Impala (incubating) as of 2016-01-29. Pared down to a minimal
-// set of functions needed for Apache Arrow / Apache parquet-cpp
-
-#pragma once
-
-#include <cstdint>
-#include <string>
-
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-/// CpuInfo is an interface to query for cpu information at runtime. The caller can
-/// ask for the sizes of the caches and what hardware features are supported.
-/// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
-/// /sys/devices)
-class ARROW_EXPORT CpuInfo {
- public:
- static constexpr int64_t SSSE3 = (1 << 1);
- static constexpr int64_t SSE4_1 = (1 << 2);
- static constexpr int64_t SSE4_2 = (1 << 3);
- static constexpr int64_t POPCNT = (1 << 4);
- static constexpr int64_t ASIMD = (1 << 5);
- static constexpr int64_t AVX = (1 << 6);
- static constexpr int64_t AVX2 = (1 << 7);
- static constexpr int64_t AVX512F = (1 << 8);
- static constexpr int64_t AVX512CD = (1 << 9);
- static constexpr int64_t AVX512VL = (1 << 10);
- static constexpr int64_t AVX512DQ = (1 << 11);
- static constexpr int64_t AVX512BW = (1 << 12);
- static constexpr int64_t BMI1 = (1 << 13);
- static constexpr int64_t BMI2 = (1 << 14);
-
- /// Typical AVX512 subsets consists of AVX512F,AVX512BW,AVX512VL,AVX512CD,AVX512DQ
- static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ | AVX512BW;
-
- /// Cache enums for L1 (data), L2 and L3
- enum CacheLevel {
- L1_CACHE = 0,
- L2_CACHE = 1,
- L3_CACHE = 2,
- };
-
- enum class Vendor : int { Unknown = 0, Intel, AMD };
-
- static CpuInfo* GetInstance();
-
- /// Determine if the CPU meets the minimum CPU requirements and if not, issue an error
- /// and terminate.
- void VerifyCpuRequirements();
-
- /// Returns all the flags for this cpu
- int64_t hardware_flags();
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29. Pared down to a minimal
+// set of functions needed for Apache Arrow / Apache parquet-cpp
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// CpuInfo is an interface to query for cpu information at runtime. The caller can
+/// ask for the sizes of the caches and what hardware features are supported.
+/// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
+/// /sys/devices)
+class ARROW_EXPORT CpuInfo {
+ public:
+ static constexpr int64_t SSSE3 = (1 << 1);
+ static constexpr int64_t SSE4_1 = (1 << 2);
+ static constexpr int64_t SSE4_2 = (1 << 3);
+ static constexpr int64_t POPCNT = (1 << 4);
+ static constexpr int64_t ASIMD = (1 << 5);
+ static constexpr int64_t AVX = (1 << 6);
+ static constexpr int64_t AVX2 = (1 << 7);
+ static constexpr int64_t AVX512F = (1 << 8);
+ static constexpr int64_t AVX512CD = (1 << 9);
+ static constexpr int64_t AVX512VL = (1 << 10);
+ static constexpr int64_t AVX512DQ = (1 << 11);
+ static constexpr int64_t AVX512BW = (1 << 12);
+ static constexpr int64_t BMI1 = (1 << 13);
+ static constexpr int64_t BMI2 = (1 << 14);
+
+ /// Typical AVX512 subsets consists of AVX512F,AVX512BW,AVX512VL,AVX512CD,AVX512DQ
+ static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ | AVX512BW;
+
+ /// Cache enums for L1 (data), L2 and L3
+ enum CacheLevel {
+ L1_CACHE = 0,
+ L2_CACHE = 1,
+ L3_CACHE = 2,
+ };
+
+ enum class Vendor : int { Unknown = 0, Intel, AMD };
+
+ static CpuInfo* GetInstance();
+
+ /// Determine if the CPU meets the minimum CPU requirements and if not, issue an error
+ /// and terminate.
+ void VerifyCpuRequirements();
+
+ /// Returns all the flags for this cpu
+ int64_t hardware_flags();
+
/// \brief Returns whether or not the given feature is enabled.
///
/// IsSupported() is true iff IsDetected() is also true and the feature
/// wasn't disabled by the user (for example by setting the ARROW_USER_SIMD_LEVEL
/// environment variable).
- bool IsSupported(int64_t flags) const { return (hardware_flags_ & flags) == flags; }
-
+ bool IsSupported(int64_t flags) const { return (hardware_flags_ & flags) == flags; }
+
/// Returns whether or not the given feature is available on the CPU.
bool IsDetected(int64_t flags) const {
return (original_hardware_flags_ & flags) == flags;
}
- /// \brief The processor supports SSE4.2 and the Arrow libraries are built
- /// with support for it
- bool CanUseSSE4_2() const;
-
- /// Toggle a hardware feature on and off. It is not valid to turn on a feature
- /// that the underlying hardware cannot support. This is useful for testing.
- void EnableFeature(int64_t flag, bool enable);
-
- /// Returns the size of the cache in KB at this cache level
- int64_t CacheSize(CacheLevel level);
-
- /// Returns the number of cpu cycles per millisecond
- int64_t cycles_per_ms();
-
- /// Returns the number of cores (including hyper-threaded) on this machine.
- int num_cores();
-
- /// Returns the model name of the cpu (e.g. Intel i7-2600)
- std::string model_name();
-
- /// Returns the vendor of the cpu.
- Vendor vendor() const { return vendor_; }
-
- bool HasEfficientBmi2() const {
- // BMI2 (pext, pdep) is only efficient on Intel X86 processors.
- return vendor() == Vendor::Intel && IsSupported(BMI2);
- }
-
- private:
- CpuInfo();
-
+ /// \brief The processor supports SSE4.2 and the Arrow libraries are built
+ /// with support for it
+ bool CanUseSSE4_2() const;
+
+ /// Toggle a hardware feature on and off. It is not valid to turn on a feature
+ /// that the underlying hardware cannot support. This is useful for testing.
+ void EnableFeature(int64_t flag, bool enable);
+
+ /// Returns the size of the cache in KB at this cache level
+ int64_t CacheSize(CacheLevel level);
+
+ /// Returns the number of cpu cycles per millisecond
+ int64_t cycles_per_ms();
+
+ /// Returns the number of cores (including hyper-threaded) on this machine.
+ int num_cores();
+
+ /// Returns the model name of the cpu (e.g. Intel i7-2600)
+ std::string model_name();
+
+ /// Returns the vendor of the cpu.
+ Vendor vendor() const { return vendor_; }
+
+ bool HasEfficientBmi2() const {
+ // BMI2 (pext, pdep) is only efficient on Intel X86 processors.
+ return vendor() == Vendor::Intel && IsSupported(BMI2);
+ }
+
+ private:
+ CpuInfo();
+
enum UserSimdLevel {
USER_SIMD_NONE = 0,
USER_SIMD_SSE4_2,
@@ -122,22 +122,22 @@ class ARROW_EXPORT CpuInfo {
USER_SIMD_MAX,
};
- void Init();
-
- /// Inits CPU cache size variables with default values
- void SetDefaultCacheSize();
-
- /// Parse the SIMD level by ARROW_USER_SIMD_LEVEL env
- void ParseUserSimdLevel();
-
- int64_t hardware_flags_;
- int64_t original_hardware_flags_;
- int64_t cache_sizes_[L3_CACHE + 1];
- int64_t cycles_per_ms_;
- int num_cores_;
- std::string model_name_;
- Vendor vendor_;
-};
-
-} // namespace internal
-} // namespace arrow
+ void Init();
+
+ /// Inits CPU cache size variables with default values
+ void SetDefaultCacheSize();
+
+ /// Parse the SIMD level by ARROW_USER_SIMD_LEVEL env
+ void ParseUserSimdLevel();
+
+ int64_t hardware_flags_;
+ int64_t original_hardware_flags_;
+ int64_t cache_sizes_[L3_CACHE + 1];
+ int64_t cycles_per_ms_;
+ int num_cores_;
+ std::string model_name_;
+ Vendor vendor_;
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
index 7aefd1ab9cd..b0b474f599f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
@@ -1,99 +1,99 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-#include <array>
-#include <climits>
-#include <cmath>
-#include <cstdint>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <limits>
-#include <ostream>
-#include <sstream>
-#include <string>
-
-#include "arrow/status.h"
-#include "arrow/util/decimal.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <limits>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/endian.h"
-#include "arrow/util/formatting.h"
-#include "arrow/util/int128_internal.h"
-#include "arrow/util/int_util_internal.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/value_parsing.h"
-
-namespace arrow {
-
-using internal::SafeLeftShift;
-using internal::SafeSignedAdd;
-using internal::uint128_t;
-
-Decimal128::Decimal128(const std::string& str) : Decimal128() {
- *this = Decimal128::FromString(str).ValueOrDie();
-}
-
-static constexpr auto kInt64DecimalDigits =
- static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
-
-static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
- // clang-format off
- 1ULL,
- 10ULL,
- 100ULL,
- 1000ULL,
- 10000ULL,
- 100000ULL,
- 1000000ULL,
- 10000000ULL,
- 100000000ULL,
- 1000000000ULL,
- 10000000000ULL,
- 100000000000ULL,
- 1000000000000ULL,
- 10000000000000ULL,
- 100000000000000ULL,
- 1000000000000000ULL,
- 10000000000000000ULL,
- 100000000000000000ULL,
- 1000000000000000000ULL
- // clang-format on
-};
-
-static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
- 1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
- 1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
- 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
- 1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f,
- 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f,
- 1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
- 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f,
- 1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f};
-
-static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
- 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
- 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
- 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
- 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
- 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
- 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27,
- 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38};
-
+#include "arrow/util/formatting.h"
+#include "arrow/util/int128_internal.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/value_parsing.h"
+
+namespace arrow {
+
+using internal::SafeLeftShift;
+using internal::SafeSignedAdd;
+using internal::uint128_t;
+
+Decimal128::Decimal128(const std::string& str) : Decimal128() {
+ *this = Decimal128::FromString(str).ValueOrDie();
+}
+
+static constexpr auto kInt64DecimalDigits =
+ static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
+
+static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
+ // clang-format off
+ 1ULL,
+ 10ULL,
+ 100ULL,
+ 1000ULL,
+ 10000ULL,
+ 100000ULL,
+ 1000000ULL,
+ 10000000ULL,
+ 100000000ULL,
+ 1000000000ULL,
+ 10000000000ULL,
+ 100000000000ULL,
+ 1000000000000ULL,
+ 10000000000000ULL,
+ 100000000000000ULL,
+ 1000000000000000ULL,
+ 10000000000000000ULL,
+ 100000000000000000ULL,
+ 1000000000000000000ULL
+ // clang-format on
+};
+
+static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
+ 1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
+ 1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
+ 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
+ 1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f,
+ 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f,
+ 1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
+ 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f,
+ 1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f};
+
+static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
+ 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
+ 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
+ 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
+ 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
+ 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
+ 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27,
+ 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38};
+
// On the Windows R toolchain, INFINITY is double type instead of float
static constexpr float kFloatInf = std::numeric_limits<float>::infinity();
static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = {
@@ -135,357 +135,357 @@ static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = {
1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67,
1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76};
-namespace {
-
-template <typename Real, typename Derived>
-struct DecimalRealConversion {
- static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
- int32_t scale) {
- auto x = real;
- if (scale >= -38 && scale <= 38) {
- x *= Derived::powers_of_ten()[scale + 38];
- } else {
- x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
- }
- x = std::nearbyint(x);
- const auto max_abs = Derived::powers_of_ten()[precision + 38];
- if (x <= -max_abs || x >= max_abs) {
- return Status::Invalid("Cannot convert ", real,
- " to Decimal128(precision = ", precision,
- ", scale = ", scale, "): overflow");
- }
- // Extract high and low bits
- const auto high = std::floor(std::ldexp(x, -64));
- const auto low = x - std::ldexp(high, 64);
-
- DCHECK_GE(high, -9.223372036854776e+18); // -2**63
- DCHECK_LT(high, 9.223372036854776e+18); // 2**63
- DCHECK_GE(low, 0);
- DCHECK_LT(low, 1.8446744073709552e+19); // 2**64
- return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
- }
-
- static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
- DCHECK_GT(precision, 0);
- DCHECK_LE(precision, 38);
-
- if (!std::isfinite(x)) {
- return Status::Invalid("Cannot convert ", x, " to Decimal128");
- }
- if (x < 0) {
- ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
- return dec.Negate();
- } else {
- // Includes negative zero
- return FromPositiveReal(x, precision, scale);
- }
- }
-
- static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
- Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
- x += static_cast<Real>(decimal.low_bits());
- if (scale >= -38 && scale <= 38) {
- x *= Derived::powers_of_ten()[-scale + 38];
- } else {
- x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
- }
- return x;
- }
-
- static Real ToReal(Decimal128 decimal, int32_t scale) {
- if (decimal.high_bits() < 0) {
- // Convert the absolute value to avoid precision loss
- decimal.Negate();
- return -ToRealPositive(decimal, scale);
- } else {
- return ToRealPositive(decimal, scale);
- }
- }
-};
-
-struct DecimalFloatConversion
- : public DecimalRealConversion<float, DecimalFloatConversion> {
- static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
-
- static constexpr float two_to_64() { return 1.8446744e+19f; }
-};
-
-struct DecimalDoubleConversion
- : public DecimalRealConversion<double, DecimalDoubleConversion> {
- static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
-
- static constexpr double two_to_64() { return 1.8446744073709552e+19; }
-};
-
-} // namespace
-
-Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
- return DecimalFloatConversion::FromReal(x, precision, scale);
-}
-
-Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
- return DecimalDoubleConversion::FromReal(x, precision, scale);
-}
-
-float Decimal128::ToFloat(int32_t scale) const {
- return DecimalFloatConversion::ToReal(*this, scale);
-}
-
-double Decimal128::ToDouble(int32_t scale) const {
- return DecimalDoubleConversion::ToReal(*this, scale);
-}
-
-template <size_t n>
-static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array,
- std::string* result) {
- const auto most_significant_non_zero =
- find_if(array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
- if (most_significant_non_zero == array.rend()) {
- result->push_back('0');
- return;
- }
-
- size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
- std::array<uint64_t, n> copy = array;
- constexpr uint32_t k1e9 = 1000000000U;
- constexpr size_t kNumBits = n * 64;
- // Segments will contain the array split into groups that map to decimal digits,
- // in little endian order. Each segment will hold at most 9 decimal digits.
- // For example, if the input represents 9876543210123456789, then segments will be
- // [123456789, 876543210, 9].
- // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
- // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
- std::array<uint32_t, (kNumBits + 28) / 29> segments;
- size_t num_segments = 0;
- uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
- do {
- // Compute remainder = copy % 1e9 and copy = copy / 1e9.
- uint32_t remainder = 0;
- uint64_t* elem = most_significant_elem;
- do {
- // Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer);
- // *elem = dividend / 1e9;
- // remainder = dividend % 1e9.
- uint32_t hi = static_cast<uint32_t>(*elem >> 32);
+namespace {
+
+template <typename Real, typename Derived>
+struct DecimalRealConversion {
+ static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
+ int32_t scale) {
+ auto x = real;
+ if (scale >= -38 && scale <= 38) {
+ x *= Derived::powers_of_ten()[scale + 38];
+ } else {
+ x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
+ }
+ x = std::nearbyint(x);
+ const auto max_abs = Derived::powers_of_ten()[precision + 38];
+ if (x <= -max_abs || x >= max_abs) {
+ return Status::Invalid("Cannot convert ", real,
+ " to Decimal128(precision = ", precision,
+ ", scale = ", scale, "): overflow");
+ }
+ // Extract high and low bits
+ const auto high = std::floor(std::ldexp(x, -64));
+ const auto low = x - std::ldexp(high, 64);
+
+ DCHECK_GE(high, -9.223372036854776e+18); // -2**63
+ DCHECK_LT(high, 9.223372036854776e+18); // 2**63
+ DCHECK_GE(low, 0);
+ DCHECK_LT(low, 1.8446744073709552e+19); // 2**64
+ return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
+ }
+
+ static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
+ DCHECK_GT(precision, 0);
+ DCHECK_LE(precision, 38);
+
+ if (!std::isfinite(x)) {
+ return Status::Invalid("Cannot convert ", x, " to Decimal128");
+ }
+ if (x < 0) {
+ ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
+ return dec.Negate();
+ } else {
+ // Includes negative zero
+ return FromPositiveReal(x, precision, scale);
+ }
+ }
+
+ static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+ Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
+ x += static_cast<Real>(decimal.low_bits());
+ if (scale >= -38 && scale <= 38) {
+ x *= Derived::powers_of_ten()[-scale + 38];
+ } else {
+ x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
+ }
+ return x;
+ }
+
+ static Real ToReal(Decimal128 decimal, int32_t scale) {
+ if (decimal.high_bits() < 0) {
+ // Convert the absolute value to avoid precision loss
+ decimal.Negate();
+ return -ToRealPositive(decimal, scale);
+ } else {
+ return ToRealPositive(decimal, scale);
+ }
+ }
+};
+
+struct DecimalFloatConversion
+ : public DecimalRealConversion<float, DecimalFloatConversion> {
+ static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
+
+ static constexpr float two_to_64() { return 1.8446744e+19f; }
+};
+
+struct DecimalDoubleConversion
+ : public DecimalRealConversion<double, DecimalDoubleConversion> {
+ static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
+
+ static constexpr double two_to_64() { return 1.8446744073709552e+19; }
+};
+
+} // namespace
+
+Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
+ return DecimalFloatConversion::FromReal(x, precision, scale);
+}
+
+Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
+ return DecimalDoubleConversion::FromReal(x, precision, scale);
+}
+
+float Decimal128::ToFloat(int32_t scale) const {
+ return DecimalFloatConversion::ToReal(*this, scale);
+}
+
+double Decimal128::ToDouble(int32_t scale) const {
+ return DecimalDoubleConversion::ToReal(*this, scale);
+}
+
+template <size_t n>
+static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array,
+ std::string* result) {
+ const auto most_significant_non_zero =
+ find_if(array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
+ if (most_significant_non_zero == array.rend()) {
+ result->push_back('0');
+ return;
+ }
+
+ size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
+ std::array<uint64_t, n> copy = array;
+ constexpr uint32_t k1e9 = 1000000000U;
+ constexpr size_t kNumBits = n * 64;
+ // Segments will contain the array split into groups that map to decimal digits,
+ // in little endian order. Each segment will hold at most 9 decimal digits.
+ // For example, if the input represents 9876543210123456789, then segments will be
+ // [123456789, 876543210, 9].
+ // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
+ // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
+ std::array<uint32_t, (kNumBits + 28) / 29> segments;
+ size_t num_segments = 0;
+ uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
+ do {
+ // Compute remainder = copy % 1e9 and copy = copy / 1e9.
+ uint32_t remainder = 0;
+ uint64_t* elem = most_significant_elem;
+ do {
+ // Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer);
+ // *elem = dividend / 1e9;
+ // remainder = dividend % 1e9.
+ uint32_t hi = static_cast<uint32_t>(*elem >> 32);
uint32_t lo = static_cast<uint32_t>(*elem & BitUtil::LeastSignificantBitMask(32));
- uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
- uint64_t quotient_hi = dividend_hi / k1e9;
- remainder = static_cast<uint32_t>(dividend_hi % k1e9);
- uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
- uint64_t quotient_lo = dividend_lo / k1e9;
- remainder = static_cast<uint32_t>(dividend_lo % k1e9);
- *elem = (quotient_hi << 32) | quotient_lo;
- } while (elem-- != copy.data());
-
- segments[num_segments++] = remainder;
- } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
-
- size_t old_size = result->size();
- size_t new_size = old_size + num_segments * 9;
- result->resize(new_size, '0');
- char* output = &result->at(old_size);
- const uint32_t* segment = &segments[num_segments - 1];
- internal::StringFormatter<UInt32Type> format;
- // First segment is formatted as-is.
- format(*segment, [&output](util::string_view formatted) {
- memcpy(output, formatted.data(), formatted.size());
- output += formatted.size();
- });
- while (segment != segments.data()) {
- --segment;
- // Right-pad formatted segment such that e.g. 123 is formatted as "000000123".
- output += 9;
- format(*segment, [output](util::string_view formatted) {
- memcpy(output - formatted.size(), formatted.data(), formatted.size());
- });
- }
- result->resize(output - result->data());
-}
-
-std::string Decimal128::ToIntegerString() const {
- std::string result;
- if (high_bits() < 0) {
- result.push_back('-');
- Decimal128 abs = *this;
- abs.Negate();
- AppendLittleEndianArrayToString<2>(
- {abs.low_bits(), static_cast<uint64_t>(abs.high_bits())}, &result);
- } else {
- AppendLittleEndianArrayToString<2>({low_bits(), static_cast<uint64_t>(high_bits())},
- &result);
- }
- return result;
-}
-
-Decimal128::operator int64_t() const {
- DCHECK(high_bits() == 0 || high_bits() == -1)
- << "Trying to cast a Decimal128 greater than the value range of a "
- "int64_t. high_bits_ must be equal to 0 or -1, got: "
- << high_bits();
- return static_cast<int64_t>(low_bits());
-}
-
-static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
- if (scale == 0) {
- return;
- }
- DCHECK(str != nullptr);
- DCHECK(!str->empty());
- const bool is_negative = str->front() == '-';
- const auto is_negative_offset = static_cast<int32_t>(is_negative);
- const auto len = static_cast<int32_t>(str->size());
- const int32_t num_digits = len - is_negative_offset;
- const int32_t adjusted_exponent = num_digits - 1 - scale;
-
- /// Note that the -6 is taken from the Java BigDecimal documentation.
- if (scale < 0 || adjusted_exponent < -6) {
- // Example 1:
- // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
- // adjusted_exponent = 4
- // After inserting decimal point: *str = "1.23"
- // After appending exponent: *str = "1.23E+4"
- // Example 2:
- // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
- // adjusted_exponent = -7
- // After inserting decimal point: *str = "-1.23"
- // After appending exponent: *str = "-1.23E-7"
- str->insert(str->begin() + 1 + is_negative_offset, '.');
- str->push_back('E');
- if (adjusted_exponent >= 0) {
- str->push_back('+');
- }
- internal::StringFormatter<Int32Type> format;
- format(adjusted_exponent, [str](util::string_view formatted) {
- str->append(formatted.data(), formatted.size());
- });
- return;
- }
-
- if (num_digits > scale) {
- const auto n = static_cast<size_t>(len - scale);
- // Example 1:
- // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
- // After inserting decimal point: *str = "12.3"
- // Example 2:
- // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
- // After inserting decimal point: *str = "-12.3"
- str->insert(str->begin() + n, '.');
- return;
- }
-
- // Example 1:
- // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
- // After insert: *str = "000123"
- // After setting decimal point: *str = "0.0123"
- // Example 2:
- // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
- // After insert: *str = "-000123"
- // After setting decimal point: *str = "-0.0123"
- str->insert(is_negative_offset, scale - num_digits + 2, '0');
- str->at(is_negative_offset + 1) = '.';
-}
-
-std::string Decimal128::ToString(int32_t scale) const {
- std::string str(ToIntegerString());
- AdjustIntegerStringWithScale(scale, &str);
- return str;
-}
-
-// Iterates over input and for each group of kInt64DecimalDigits multiple out by
-// the appropriate power of 10 necessary to add source parsed as uint64 and
-// then adds the parsed value of source.
-static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[],
- size_t out_size) {
- for (size_t posn = 0; posn < input.size();) {
- const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn);
- const uint64_t multiple = kUInt64PowersOfTen[group_size];
- uint64_t chunk = 0;
- ARROW_CHECK(
- internal::ParseValue<UInt64Type>(input.data() + posn, group_size, &chunk));
-
- for (size_t i = 0; i < out_size; ++i) {
- uint128_t tmp = out[i];
- tmp *= multiple;
- tmp += chunk;
- out[i] = static_cast<uint64_t>(tmp & 0xFFFFFFFFFFFFFFFFULL);
- chunk = static_cast<uint64_t>(tmp >> 64);
- }
- posn += group_size;
- }
-}
-
-namespace {
-
-struct DecimalComponents {
- util::string_view whole_digits;
- util::string_view fractional_digits;
- int32_t exponent = 0;
- char sign = 0;
- bool has_exponent = false;
-};
-
-inline bool IsSign(char c) { return c == '-' || c == '+'; }
-
-inline bool IsDot(char c) { return c == '.'; }
-
-inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
-
-inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
-
-inline size_t ParseDigitsRun(const char* s, size_t start, size_t size,
- util::string_view* out) {
- size_t pos;
- for (pos = start; pos < size; ++pos) {
- if (!IsDigit(s[pos])) {
- break;
- }
- }
- *out = util::string_view(s + start, pos - start);
- return pos;
-}
-
-bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
- size_t pos = 0;
-
- if (size == 0) {
- return false;
- }
- // Sign of the number
- if (IsSign(s[pos])) {
- out->sign = *(s + pos);
- ++pos;
- }
- // First run of digits
- pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
- if (pos == size) {
- return !out->whole_digits.empty();
- }
- // Optional dot (if given in fractional form)
- bool has_dot = IsDot(s[pos]);
- if (has_dot) {
- // Second run of digits
- ++pos;
- pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
- }
- if (out->whole_digits.empty() && out->fractional_digits.empty()) {
- // Need at least some digits (whole or fractional)
- return false;
- }
- if (pos == size) {
- return true;
- }
- // Optional exponent
- if (StartsExponent(s[pos])) {
- ++pos;
- if (pos != size && s[pos] == '+') {
- ++pos;
- }
- out->has_exponent = true;
- return internal::ParseValue<Int32Type>(s + pos, size - pos, &(out->exponent));
- }
- return pos == size;
-}
-
+ uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
+ uint64_t quotient_hi = dividend_hi / k1e9;
+ remainder = static_cast<uint32_t>(dividend_hi % k1e9);
+ uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
+ uint64_t quotient_lo = dividend_lo / k1e9;
+ remainder = static_cast<uint32_t>(dividend_lo % k1e9);
+ *elem = (quotient_hi << 32) | quotient_lo;
+ } while (elem-- != copy.data());
+
+ segments[num_segments++] = remainder;
+ } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
+
+ size_t old_size = result->size();
+ size_t new_size = old_size + num_segments * 9;
+ result->resize(new_size, '0');
+ char* output = &result->at(old_size);
+ const uint32_t* segment = &segments[num_segments - 1];
+ internal::StringFormatter<UInt32Type> format;
+ // First segment is formatted as-is.
+ format(*segment, [&output](util::string_view formatted) {
+ memcpy(output, formatted.data(), formatted.size());
+ output += formatted.size();
+ });
+ while (segment != segments.data()) {
+ --segment;
+ // Right-pad formatted segment such that e.g. 123 is formatted as "000000123".
+ output += 9;
+ format(*segment, [output](util::string_view formatted) {
+ memcpy(output - formatted.size(), formatted.data(), formatted.size());
+ });
+ }
+ result->resize(output - result->data());
+}
+
+std::string Decimal128::ToIntegerString() const {
+ std::string result;
+ if (high_bits() < 0) {
+ result.push_back('-');
+ Decimal128 abs = *this;
+ abs.Negate();
+ AppendLittleEndianArrayToString<2>(
+ {abs.low_bits(), static_cast<uint64_t>(abs.high_bits())}, &result);
+ } else {
+ AppendLittleEndianArrayToString<2>({low_bits(), static_cast<uint64_t>(high_bits())},
+ &result);
+ }
+ return result;
+}
+
+Decimal128::operator int64_t() const {
+ DCHECK(high_bits() == 0 || high_bits() == -1)
+ << "Trying to cast a Decimal128 greater than the value range of a "
+ "int64_t. high_bits_ must be equal to 0 or -1, got: "
+ << high_bits();
+ return static_cast<int64_t>(low_bits());
+}
+
+static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
+ if (scale == 0) {
+ return;
+ }
+ DCHECK(str != nullptr);
+ DCHECK(!str->empty());
+ const bool is_negative = str->front() == '-';
+ const auto is_negative_offset = static_cast<int32_t>(is_negative);
+ const auto len = static_cast<int32_t>(str->size());
+ const int32_t num_digits = len - is_negative_offset;
+ const int32_t adjusted_exponent = num_digits - 1 - scale;
+
+ /// Note that the -6 is taken from the Java BigDecimal documentation.
+ if (scale < 0 || adjusted_exponent < -6) {
+ // Example 1:
+ // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
+ // adjusted_exponent = 4
+ // After inserting decimal point: *str = "1.23"
+ // After appending exponent: *str = "1.23E+4"
+ // Example 2:
+ // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
+ // adjusted_exponent = -7
+ // After inserting decimal point: *str = "-1.23"
+ // After appending exponent: *str = "-1.23E-7"
+ str->insert(str->begin() + 1 + is_negative_offset, '.');
+ str->push_back('E');
+ if (adjusted_exponent >= 0) {
+ str->push_back('+');
+ }
+ internal::StringFormatter<Int32Type> format;
+ format(adjusted_exponent, [str](util::string_view formatted) {
+ str->append(formatted.data(), formatted.size());
+ });
+ return;
+ }
+
+ if (num_digits > scale) {
+ const auto n = static_cast<size_t>(len - scale);
+ // Example 1:
+ // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
+ // After inserting decimal point: *str = "12.3"
+ // Example 2:
+ // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
+ // After inserting decimal point: *str = "-12.3"
+ str->insert(str->begin() + n, '.');
+ return;
+ }
+
+ // Example 1:
+ // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
+ // After insert: *str = "000123"
+ // After setting decimal point: *str = "0.0123"
+ // Example 2:
+ // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
+ // After insert: *str = "-000123"
+ // After setting decimal point: *str = "-0.0123"
+ str->insert(is_negative_offset, scale - num_digits + 2, '0');
+ str->at(is_negative_offset + 1) = '.';
+}
+
+std::string Decimal128::ToString(int32_t scale) const {
+ std::string str(ToIntegerString());
+ AdjustIntegerStringWithScale(scale, &str);
+ return str;
+}
+
+// Iterates over input and for each group of kInt64DecimalDigits multiple out by
+// the appropriate power of 10 necessary to add source parsed as uint64 and
+// then adds the parsed value of source.
+static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[],
+ size_t out_size) {
+ for (size_t posn = 0; posn < input.size();) {
+ const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn);
+ const uint64_t multiple = kUInt64PowersOfTen[group_size];
+ uint64_t chunk = 0;
+ ARROW_CHECK(
+ internal::ParseValue<UInt64Type>(input.data() + posn, group_size, &chunk));
+
+ for (size_t i = 0; i < out_size; ++i) {
+ uint128_t tmp = out[i];
+ tmp *= multiple;
+ tmp += chunk;
+ out[i] = static_cast<uint64_t>(tmp & 0xFFFFFFFFFFFFFFFFULL);
+ chunk = static_cast<uint64_t>(tmp >> 64);
+ }
+ posn += group_size;
+ }
+}
+
+namespace {
+
+struct DecimalComponents {
+ util::string_view whole_digits;
+ util::string_view fractional_digits;
+ int32_t exponent = 0;
+ char sign = 0;
+ bool has_exponent = false;
+};
+
+inline bool IsSign(char c) { return c == '-' || c == '+'; }
+
+inline bool IsDot(char c) { return c == '.'; }
+
+inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
+
+inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
+
+inline size_t ParseDigitsRun(const char* s, size_t start, size_t size,
+ util::string_view* out) {
+ size_t pos;
+ for (pos = start; pos < size; ++pos) {
+ if (!IsDigit(s[pos])) {
+ break;
+ }
+ }
+ *out = util::string_view(s + start, pos - start);
+ return pos;
+}
+
+bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
+ size_t pos = 0;
+
+ if (size == 0) {
+ return false;
+ }
+ // Sign of the number
+ if (IsSign(s[pos])) {
+ out->sign = *(s + pos);
+ ++pos;
+ }
+ // First run of digits
+ pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
+ if (pos == size) {
+ return !out->whole_digits.empty();
+ }
+ // Optional dot (if given in fractional form)
+ bool has_dot = IsDot(s[pos]);
+ if (has_dot) {
+ // Second run of digits
+ ++pos;
+ pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
+ }
+ if (out->whole_digits.empty() && out->fractional_digits.empty()) {
+ // Need at least some digits (whole or fractional)
+ return false;
+ }
+ if (pos == size) {
+ return true;
+ }
+ // Optional exponent
+ if (StartsExponent(s[pos])) {
+ ++pos;
+ if (pos != size && s[pos] == '+') {
+ ++pos;
+ }
+ out->has_exponent = true;
+ return internal::ParseValue<Int32Type>(s + pos, size - pos, &(out->exponent));
+ }
+ return pos == size;
+}
+
inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
switch (dstatus) {
case DecimalStatus::kSuccess:
@@ -504,171 +504,171 @@ inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
return Status::OK();
}
-} // namespace
-
-Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
- int32_t* precision, int32_t* scale) {
- if (s.empty()) {
- return Status::Invalid("Empty string cannot be converted to decimal");
- }
-
- DecimalComponents dec;
- if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
- return Status::Invalid("The string '", s, "' is not a valid decimal number");
- }
-
- // Count number of significant digits (without leading zeros)
- size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
- size_t significant_digits = dec.fractional_digits.size();
- if (first_non_zero != std::string::npos) {
- significant_digits += dec.whole_digits.size() - first_non_zero;
- }
- int32_t parsed_precision = static_cast<int32_t>(significant_digits);
-
- int32_t parsed_scale = 0;
- if (dec.has_exponent) {
- auto adjusted_exponent = dec.exponent;
- auto len = static_cast<int32_t>(significant_digits);
- parsed_scale = -adjusted_exponent + len - 1;
- } else {
- parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
- }
-
- if (out != nullptr) {
- std::array<uint64_t, 2> little_endian_array = {0, 0};
- ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
- ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
- little_endian_array.size());
- *out =
- Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
- if (parsed_scale < 0) {
- *out *= GetScaleMultiplier(-parsed_scale);
- }
-
- if (dec.sign == '-') {
- out->Negate();
- }
- }
-
- if (parsed_scale < 0) {
- parsed_precision -= parsed_scale;
- parsed_scale = 0;
- }
-
- if (precision != nullptr) {
- *precision = parsed_precision;
- }
- if (scale != nullptr) {
- *scale = parsed_scale;
- }
-
- return Status::OK();
-}
-
-Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
- int32_t* scale) {
- return FromString(util::string_view(s), out, precision, scale);
-}
-
-Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
- int32_t* scale) {
- return FromString(util::string_view(s), out, precision, scale);
-}
-
-Result<Decimal128> Decimal128::FromString(const util::string_view& s) {
- Decimal128 out;
- RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
- return std::move(out);
-}
-
-Result<Decimal128> Decimal128::FromString(const std::string& s) {
- return FromString(util::string_view(s));
-}
-
-Result<Decimal128> Decimal128::FromString(const char* s) {
- return FromString(util::string_view(s));
-}
-
-// Helper function used by Decimal128::FromBigEndian
-static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
- // We don't bounds check the length here because this is called by
- // FromBigEndian that has a Decimal128 as its out parameters and
- // that function is already checking the length of the bytes and only
- // passes lengths between zero and eight.
- uint64_t result = 0;
- // Using memcpy instead of special casing for length
- // and doing the conversion in 16, 32 parts, which could
- // possibly create unaligned memory access on certain platforms
- memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
- return ::arrow::BitUtil::FromBigEndian(result);
-}
-
-Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length) {
- static constexpr int32_t kMinDecimalBytes = 1;
- static constexpr int32_t kMaxDecimalBytes = 16;
-
- int64_t high, low;
-
+} // namespace
+
+Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
+ int32_t* precision, int32_t* scale) {
+ if (s.empty()) {
+ return Status::Invalid("Empty string cannot be converted to decimal");
+ }
+
+ DecimalComponents dec;
+ if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
+ return Status::Invalid("The string '", s, "' is not a valid decimal number");
+ }
+
+ // Count number of significant digits (without leading zeros)
+ size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
+ size_t significant_digits = dec.fractional_digits.size();
+ if (first_non_zero != std::string::npos) {
+ significant_digits += dec.whole_digits.size() - first_non_zero;
+ }
+ int32_t parsed_precision = static_cast<int32_t>(significant_digits);
+
+ int32_t parsed_scale = 0;
+ if (dec.has_exponent) {
+ auto adjusted_exponent = dec.exponent;
+ auto len = static_cast<int32_t>(significant_digits);
+ parsed_scale = -adjusted_exponent + len - 1;
+ } else {
+ parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
+ }
+
+ if (out != nullptr) {
+ std::array<uint64_t, 2> little_endian_array = {0, 0};
+ ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
+ ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
+ little_endian_array.size());
+ *out =
+ Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
+ if (parsed_scale < 0) {
+ *out *= GetScaleMultiplier(-parsed_scale);
+ }
+
+ if (dec.sign == '-') {
+ out->Negate();
+ }
+ }
+
+ if (parsed_scale < 0) {
+ parsed_precision -= parsed_scale;
+ parsed_scale = 0;
+ }
+
+ if (precision != nullptr) {
+ *precision = parsed_precision;
+ }
+ if (scale != nullptr) {
+ *scale = parsed_scale;
+ }
+
+ return Status::OK();
+}
+
+Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
+ int32_t* scale) {
+ return FromString(util::string_view(s), out, precision, scale);
+}
+
+Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
+ int32_t* scale) {
+ return FromString(util::string_view(s), out, precision, scale);
+}
+
+Result<Decimal128> Decimal128::FromString(const util::string_view& s) {
+ Decimal128 out;
+ RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
+ return std::move(out);
+}
+
+Result<Decimal128> Decimal128::FromString(const std::string& s) {
+ return FromString(util::string_view(s));
+}
+
+Result<Decimal128> Decimal128::FromString(const char* s) {
+ return FromString(util::string_view(s));
+}
+
+// Helper function used by Decimal128::FromBigEndian
+static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
+ // We don't bounds check the length here because this is called by
+ // FromBigEndian that has a Decimal128 as its out parameters and
+ // that function is already checking the length of the bytes and only
+ // passes lengths between zero and eight.
+ uint64_t result = 0;
+ // Using memcpy instead of special casing for length
+ // and doing the conversion in 16, 32 parts, which could
+ // possibly create unaligned memory access on certain platforms
+ memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
+ return ::arrow::BitUtil::FromBigEndian(result);
+}
+
+Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length) {
+ static constexpr int32_t kMinDecimalBytes = 1;
+ static constexpr int32_t kMaxDecimalBytes = 16;
+
+ int64_t high, low;
+
if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
- return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
- "was ", length, ", but must be between ", kMinDecimalBytes,
- " and ", kMaxDecimalBytes);
- }
-
- // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
- // sign bit.
- const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
-
- // 1. Extract the high bytes
- // Stop byte of the high bytes
- const int32_t high_bits_offset = std::max(0, length - 8);
- const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
-
- if (high_bits_offset == 8) {
- // Avoid undefined shift by 64 below
- high = high_bits;
- } else {
- high = -1 * (is_negative && length < kMaxDecimalBytes);
- // Shift left enough bits to make room for the incoming int64_t
- high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
- // Preserve the upper bits by inplace OR-ing the int64_t
- high |= high_bits;
- }
-
- // 2. Extract the low bytes
- // Stop byte of the low bytes
- const int32_t low_bits_offset = std::min(length, 8);
- const auto low_bits =
- UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
-
- if (low_bits_offset == 8) {
- // Avoid undefined shift by 64 below
- low = low_bits;
- } else {
- // Sign extend the low bits if necessary
- low = -1 * (is_negative && length < 8);
- // Shift left enough bits to make room for the incoming int64_t
- low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
- // Preserve the upper bits by inplace OR-ing the int64_t
- low |= low_bits;
- }
-
- return Decimal128(high, static_cast<uint64_t>(low));
-}
-
-Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
+ return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
+ "was ", length, ", but must be between ", kMinDecimalBytes,
+ " and ", kMaxDecimalBytes);
+ }
+
+ // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+ // sign bit.
+ const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+
+ // 1. Extract the high bytes
+ // Stop byte of the high bytes
+ const int32_t high_bits_offset = std::max(0, length - 8);
+ const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
+
+ if (high_bits_offset == 8) {
+ // Avoid undefined shift by 64 below
+ high = high_bits;
+ } else {
+ high = -1 * (is_negative && length < kMaxDecimalBytes);
+ // Shift left enough bits to make room for the incoming int64_t
+ high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
+ // Preserve the upper bits by inplace OR-ing the int64_t
+ high |= high_bits;
+ }
+
+ // 2. Extract the low bytes
+ // Stop byte of the low bytes
+ const int32_t low_bits_offset = std::min(length, 8);
+ const auto low_bits =
+ UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
+
+ if (low_bits_offset == 8) {
+ // Avoid undefined shift by 64 below
+ low = low_bits;
+ } else {
+ // Sign extend the low bits if necessary
+ low = -1 * (is_negative && length < 8);
+ // Shift left enough bits to make room for the incoming int64_t
+ low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
+ // Preserve the upper bits by inplace OR-ing the int64_t
+ low |= low_bits;
+ }
+
+ return Decimal128(high, static_cast<uint64_t>(low));
+}
+
+Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
return arrow::ToArrowStatus(dstatus, 128);
}
-
+
std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
os << decimal.ToIntegerString();
return os;
}
-
+
Decimal256::Decimal256(const std::string& str) : Decimal256() {
*this = Decimal256::FromString(str).ValueOrDie();
}
-
+
std::string Decimal256::ToIntegerString() const {
std::string result;
if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
@@ -681,7 +681,7 @@ std::string Decimal256::ToIntegerString() const {
}
return result;
}
-
+
std::string Decimal256::ToString(int32_t scale) const {
std::string str(ToIntegerString());
AdjustIntegerStringWithScale(scale, &str);
@@ -692,7 +692,7 @@ Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
int32_t* precision, int32_t* scale) {
if (s.empty()) {
return Status::Invalid("Empty string cannot be converted to decimal");
- }
+ }
DecimalComponents dec;
if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
@@ -733,8 +733,8 @@ Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
}
return Status::OK();
-}
-
+}
+
Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
int32_t* scale) {
return FromString(util::string_view(s), out, precision, scale);
@@ -926,7 +926,7 @@ double Decimal256::ToDouble(int32_t scale) const {
}
std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
- os << decimal.ToIntegerString();
- return os;
-}
-} // namespace arrow
+ os << decimal.ToIntegerString();
+ return os;
+}
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
index 4a158728833..7c83b812fe9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
@@ -1,178 +1,178 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <iosfwd>
-#include <limits>
-#include <string>
-#include <utility>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/basic_decimal.h"
-#include "arrow/util/string_view.h"
-
-namespace arrow {
-
-/// Represents a signed 128-bit integer in two's complement.
-/// Calculations wrap around and overflow is ignored.
-/// The max decimal precision that can be safely represented is
-/// 38 significant digits.
-///
-/// For a discussion of the algorithms, look at Knuth's volume 2,
-/// Semi-numerical Algorithms section 4.3.1.
-///
-/// Adapted from the Apache ORC C++ implementation
-///
-/// The implementation is split into two parts :
-///
-/// 1. BasicDecimal128
-/// - can be safely compiled to IR without references to libstdc++.
-/// 2. Decimal128
-/// - has additional functionality on top of BasicDecimal128 to deal with
-/// strings and streams.
-class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
- public:
- /// \cond FALSE
- // (need to avoid a duplicate definition in Sphinx)
- using BasicDecimal128::BasicDecimal128;
- /// \endcond
-
- /// \brief constructor creates a Decimal128 from a BasicDecimal128.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <string>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/basic_decimal.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+
+/// Represents a signed 128-bit integer in two's complement.
+/// Calculations wrap around and overflow is ignored.
+/// The max decimal precision that can be safely represented is
+/// 38 significant digits.
+///
+/// For a discussion of the algorithms, look at Knuth's volume 2,
+/// Semi-numerical Algorithms section 4.3.1.
+///
+/// Adapted from the Apache ORC C++ implementation
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal128
+/// - can be safely compiled to IR without references to libstdc++.
+/// 2. Decimal128
+/// - has additional functionality on top of BasicDecimal128 to deal with
+/// strings and streams.
+class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
+ public:
+ /// \cond FALSE
+ // (need to avoid a duplicate definition in Sphinx)
+ using BasicDecimal128::BasicDecimal128;
+ /// \endcond
+
+ /// \brief constructor creates a Decimal128 from a BasicDecimal128.
constexpr Decimal128(const BasicDecimal128& value) noexcept // NOLINT runtime/explicit
: BasicDecimal128(value) {}
-
- /// \brief Parse the number from a base 10 string representation.
- explicit Decimal128(const std::string& value);
-
- /// \brief Empty constructor creates a Decimal128 with a value of 0.
- // This is required on some older compilers.
- constexpr Decimal128() noexcept : BasicDecimal128() {}
-
- /// Divide this number by right and return the result.
- ///
- /// This operation is not destructive.
- /// The answer rounds to zero. Signs work like:
- /// 21 / 5 -> 4, 1
- /// -21 / 5 -> -4, -1
- /// 21 / -5 -> -4, 1
- /// -21 / -5 -> 4, -1
- /// \param[in] divisor the number to divide by
- /// \return the pair of the quotient and the remainder
- Result<std::pair<Decimal128, Decimal128>> Divide(const Decimal128& divisor) const {
- std::pair<Decimal128, Decimal128> result;
- auto dstatus = BasicDecimal128::Divide(divisor, &result.first, &result.second);
- ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
- return std::move(result);
- }
-
- /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
- /// scale.
- std::string ToString(int32_t scale) const;
-
- /// \brief Convert the value to an integer string
- std::string ToIntegerString() const;
-
- /// \brief Cast this value to an int64_t.
- explicit operator int64_t() const;
-
- /// \brief Convert a decimal string to a Decimal128 value, optionally including
- /// precision and scale if they're passed in and not null.
- static Status FromString(const util::string_view& s, Decimal128* out,
- int32_t* precision, int32_t* scale = NULLPTR);
- static Status FromString(const std::string& s, Decimal128* out, int32_t* precision,
- int32_t* scale = NULLPTR);
- static Status FromString(const char* s, Decimal128* out, int32_t* precision,
- int32_t* scale = NULLPTR);
- static Result<Decimal128> FromString(const util::string_view& s);
- static Result<Decimal128> FromString(const std::string& s);
- static Result<Decimal128> FromString(const char* s);
-
- static Result<Decimal128> FromReal(double real, int32_t precision, int32_t scale);
- static Result<Decimal128> FromReal(float real, int32_t precision, int32_t scale);
-
- /// \brief Convert from a big-endian byte representation. The length must be
- /// between 1 and 16.
- /// \return error status if the length is an invalid value
- static Result<Decimal128> FromBigEndian(const uint8_t* data, int32_t length);
-
- /// \brief Convert Decimal128 from one scale to another
- Result<Decimal128> Rescale(int32_t original_scale, int32_t new_scale) const {
- Decimal128 out;
- auto dstatus = BasicDecimal128::Rescale(original_scale, new_scale, &out);
- ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
- return std::move(out);
- }
-
- /// \brief Convert to a signed integer
- template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
- Result<T> ToInteger() const {
- constexpr auto min_value = std::numeric_limits<T>::min();
- constexpr auto max_value = std::numeric_limits<T>::max();
- const auto& self = *this;
- if (self < min_value || self > max_value) {
- return Status::Invalid("Invalid cast from Decimal128 to ", sizeof(T),
- " byte integer");
- }
- return static_cast<T>(low_bits());
- }
-
- /// \brief Convert to a signed integer
- template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
- Status ToInteger(T* out) const {
- return ToInteger<T>().Value(out);
- }
-
- /// \brief Convert to a floating-point number (scaled)
- float ToFloat(int32_t scale) const;
- /// \brief Convert to a floating-point number (scaled)
- double ToDouble(int32_t scale) const;
-
- /// \brief Convert to a floating-point number (scaled)
- template <typename T>
- T ToReal(int32_t scale) const {
- return ToRealConversion<T>::ToReal(*this, scale);
- }
-
- friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
- const Decimal128& decimal);
-
- private:
- /// Converts internal error code to Status
- Status ToArrowStatus(DecimalStatus dstatus) const;
-
- template <typename T>
- struct ToRealConversion {};
-};
-
-template <>
-struct Decimal128::ToRealConversion<float> {
- static float ToReal(const Decimal128& dec, int32_t scale) { return dec.ToFloat(scale); }
-};
-
-template <>
-struct Decimal128::ToRealConversion<double> {
- static double ToReal(const Decimal128& dec, int32_t scale) {
- return dec.ToDouble(scale);
- }
-};
-
+
+ /// \brief Parse the number from a base 10 string representation.
+ explicit Decimal128(const std::string& value);
+
+ /// \brief Empty constructor creates a Decimal128 with a value of 0.
+ // This is required on some older compilers.
+ constexpr Decimal128() noexcept : BasicDecimal128() {}
+
+ /// Divide this number by right and return the result.
+ ///
+ /// This operation is not destructive.
+ /// The answer rounds to zero. Signs work like:
+ /// 21 / 5 -> 4, 1
+ /// -21 / 5 -> -4, -1
+ /// 21 / -5 -> -4, 1
+ /// -21 / -5 -> 4, -1
+ /// \param[in] divisor the number to divide by
+ /// \return the pair of the quotient and the remainder
+ Result<std::pair<Decimal128, Decimal128>> Divide(const Decimal128& divisor) const {
+ std::pair<Decimal128, Decimal128> result;
+ auto dstatus = BasicDecimal128::Divide(divisor, &result.first, &result.second);
+ ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+ return std::move(result);
+ }
+
+ /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
+ /// scale.
+ std::string ToString(int32_t scale) const;
+
+ /// \brief Convert the value to an integer string
+ std::string ToIntegerString() const;
+
+ /// \brief Cast this value to an int64_t.
+ explicit operator int64_t() const;
+
+ /// \brief Convert a decimal string to a Decimal128 value, optionally including
+ /// precision and scale if they're passed in and not null.
+ static Status FromString(const util::string_view& s, Decimal128* out,
+ int32_t* precision, int32_t* scale = NULLPTR);
+ static Status FromString(const std::string& s, Decimal128* out, int32_t* precision,
+ int32_t* scale = NULLPTR);
+ static Status FromString(const char* s, Decimal128* out, int32_t* precision,
+ int32_t* scale = NULLPTR);
+ static Result<Decimal128> FromString(const util::string_view& s);
+ static Result<Decimal128> FromString(const std::string& s);
+ static Result<Decimal128> FromString(const char* s);
+
+ static Result<Decimal128> FromReal(double real, int32_t precision, int32_t scale);
+ static Result<Decimal128> FromReal(float real, int32_t precision, int32_t scale);
+
+ /// \brief Convert from a big-endian byte representation. The length must be
+ /// between 1 and 16.
+ /// \return error status if the length is an invalid value
+ static Result<Decimal128> FromBigEndian(const uint8_t* data, int32_t length);
+
+ /// \brief Convert Decimal128 from one scale to another
+ Result<Decimal128> Rescale(int32_t original_scale, int32_t new_scale) const {
+ Decimal128 out;
+ auto dstatus = BasicDecimal128::Rescale(original_scale, new_scale, &out);
+ ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+ return std::move(out);
+ }
+
+ /// \brief Convert to a signed integer
+ template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
+ Result<T> ToInteger() const {
+ constexpr auto min_value = std::numeric_limits<T>::min();
+ constexpr auto max_value = std::numeric_limits<T>::max();
+ const auto& self = *this;
+ if (self < min_value || self > max_value) {
+ return Status::Invalid("Invalid cast from Decimal128 to ", sizeof(T),
+ " byte integer");
+ }
+ return static_cast<T>(low_bits());
+ }
+
+ /// \brief Convert to a signed integer
+ template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
+ Status ToInteger(T* out) const {
+ return ToInteger<T>().Value(out);
+ }
+
+ /// \brief Convert to a floating-point number (scaled)
+ float ToFloat(int32_t scale) const;
+ /// \brief Convert to a floating-point number (scaled)
+ double ToDouble(int32_t scale) const;
+
+ /// \brief Convert to a floating-point number (scaled)
+ template <typename T>
+ T ToReal(int32_t scale) const {
+ return ToRealConversion<T>::ToReal(*this, scale);
+ }
+
+ friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
+ const Decimal128& decimal);
+
+ private:
+ /// Converts internal error code to Status
+ Status ToArrowStatus(DecimalStatus dstatus) const;
+
+ template <typename T>
+ struct ToRealConversion {};
+};
+
+template <>
+struct Decimal128::ToRealConversion<float> {
+ static float ToReal(const Decimal128& dec, int32_t scale) { return dec.ToFloat(scale); }
+};
+
+template <>
+struct Decimal128::ToRealConversion<double> {
+ static double ToReal(const Decimal128& dec, int32_t scale) {
+ return dec.ToDouble(scale);
+ }
+};
+
/// Represents a signed 256-bit integer in two's complement.
/// The max decimal precision that can be safely represented is
/// 76 significant digits.
@@ -288,4 +288,4 @@ struct Decimal256::ToRealConversion<double> {
}
};
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
index fe1b6ea3126..a4d0ae6d68d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
@@ -1,66 +1,66 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/delimiting.h"
-#include "arrow/buffer.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/delimiting.h"
+#include "arrow/buffer.h"
#include "arrow/util/logging.h"
-
-namespace arrow {
-
-BoundaryFinder::~BoundaryFinder() {}
-
-namespace {
-
-Status StraddlingTooLarge() {
- return Status::Invalid(
- "straddling object straddles two block boundaries (try to increase block size?)");
-}
-
-class NewlineBoundaryFinder : public BoundaryFinder {
- public:
- Status FindFirst(util::string_view partial, util::string_view block,
- int64_t* out_pos) override {
- auto pos = block.find_first_of(newline_delimiters);
- if (pos == util::string_view::npos) {
- *out_pos = kNoDelimiterFound;
- } else {
- auto end = block.find_first_not_of(newline_delimiters, pos);
- if (end == util::string_view::npos) {
- end = block.length();
- }
- *out_pos = static_cast<int64_t>(end);
- }
- return Status::OK();
- }
-
- Status FindLast(util::string_view block, int64_t* out_pos) override {
- auto pos = block.find_last_of(newline_delimiters);
- if (pos == util::string_view::npos) {
- *out_pos = kNoDelimiterFound;
- } else {
- auto end = block.find_first_not_of(newline_delimiters, pos);
- if (end == util::string_view::npos) {
- end = block.length();
- }
- *out_pos = static_cast<int64_t>(end);
- }
- return Status::OK();
- }
-
+
+namespace arrow {
+
+BoundaryFinder::~BoundaryFinder() {}
+
+namespace {
+
+Status StraddlingTooLarge() {
+ return Status::Invalid(
+ "straddling object straddles two block boundaries (try to increase block size?)");
+}
+
+class NewlineBoundaryFinder : public BoundaryFinder {
+ public:
+ Status FindFirst(util::string_view partial, util::string_view block,
+ int64_t* out_pos) override {
+ auto pos = block.find_first_of(newline_delimiters);
+ if (pos == util::string_view::npos) {
+ *out_pos = kNoDelimiterFound;
+ } else {
+ auto end = block.find_first_not_of(newline_delimiters, pos);
+ if (end == util::string_view::npos) {
+ end = block.length();
+ }
+ *out_pos = static_cast<int64_t>(end);
+ }
+ return Status::OK();
+ }
+
+ Status FindLast(util::string_view block, int64_t* out_pos) override {
+ auto pos = block.find_last_of(newline_delimiters);
+ if (pos == util::string_view::npos) {
+ *out_pos = kNoDelimiterFound;
+ } else {
+ auto end = block.find_first_not_of(newline_delimiters, pos);
+ if (end == util::string_view::npos) {
+ end = block.length();
+ }
+ *out_pos = static_cast<int64_t>(end);
+ }
+ return Status::OK();
+ }
+
Status FindNth(util::string_view partial, util::string_view block, int64_t count,
int64_t* out_pos, int64_t* num_found) override {
DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos);
@@ -90,84 +90,84 @@ class NewlineBoundaryFinder : public BoundaryFinder {
return Status::OK();
}
- protected:
- static constexpr const char* newline_delimiters = "\r\n";
-};
-
-} // namespace
-
-std::shared_ptr<BoundaryFinder> MakeNewlineBoundaryFinder() {
- return std::make_shared<NewlineBoundaryFinder>();
-}
-
-Chunker::~Chunker() {}
-
-Chunker::Chunker(std::shared_ptr<BoundaryFinder> delimiter)
- : boundary_finder_(delimiter) {}
-
-Status Chunker::Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
- std::shared_ptr<Buffer>* partial) {
- int64_t last_pos = -1;
- RETURN_NOT_OK(boundary_finder_->FindLast(util::string_view(*block), &last_pos));
- if (last_pos == BoundaryFinder::kNoDelimiterFound) {
- // No delimiter found
- *whole = SliceBuffer(block, 0, 0);
- *partial = block;
- return Status::OK();
- } else {
- *whole = SliceBuffer(block, 0, last_pos);
- *partial = SliceBuffer(block, last_pos);
- }
- return Status::OK();
-}
-
-Status Chunker::ProcessWithPartial(std::shared_ptr<Buffer> partial,
- std::shared_ptr<Buffer> block,
- std::shared_ptr<Buffer>* completion,
- std::shared_ptr<Buffer>* rest) {
- if (partial->size() == 0) {
- // If partial is empty, don't bother looking for completion
- *completion = SliceBuffer(block, 0, 0);
- *rest = block;
- return Status::OK();
- }
- int64_t first_pos = -1;
- RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial),
- util::string_view(*block), &first_pos));
- if (first_pos == BoundaryFinder::kNoDelimiterFound) {
- // No delimiter in block => the current object is too large for block size
- return StraddlingTooLarge();
- } else {
- *completion = SliceBuffer(block, 0, first_pos);
- *rest = SliceBuffer(block, first_pos);
- return Status::OK();
- }
-}
-
-Status Chunker::ProcessFinal(std::shared_ptr<Buffer> partial,
- std::shared_ptr<Buffer> block,
- std::shared_ptr<Buffer>* completion,
- std::shared_ptr<Buffer>* rest) {
- if (partial->size() == 0) {
- // If partial is empty, don't bother looking for completion
- *completion = SliceBuffer(block, 0, 0);
- *rest = block;
- return Status::OK();
- }
- int64_t first_pos = -1;
- RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial),
- util::string_view(*block), &first_pos));
- if (first_pos == BoundaryFinder::kNoDelimiterFound) {
- // No delimiter in block => it's entirely a completion of partial
- *completion = block;
- *rest = SliceBuffer(block, 0, 0);
- } else {
- *completion = SliceBuffer(block, 0, first_pos);
- *rest = SliceBuffer(block, first_pos);
- }
- return Status::OK();
-}
-
+ protected:
+ static constexpr const char* newline_delimiters = "\r\n";
+};
+
+} // namespace
+
+std::shared_ptr<BoundaryFinder> MakeNewlineBoundaryFinder() {
+ return std::make_shared<NewlineBoundaryFinder>();
+}
+
+Chunker::~Chunker() {}
+
+Chunker::Chunker(std::shared_ptr<BoundaryFinder> delimiter)
+ : boundary_finder_(delimiter) {}
+
+Status Chunker::Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
+ std::shared_ptr<Buffer>* partial) {
+ int64_t last_pos = -1;
+ RETURN_NOT_OK(boundary_finder_->FindLast(util::string_view(*block), &last_pos));
+ if (last_pos == BoundaryFinder::kNoDelimiterFound) {
+ // No delimiter found
+ *whole = SliceBuffer(block, 0, 0);
+ *partial = block;
+ return Status::OK();
+ } else {
+ *whole = SliceBuffer(block, 0, last_pos);
+ *partial = SliceBuffer(block, last_pos);
+ }
+ return Status::OK();
+}
+
+Status Chunker::ProcessWithPartial(std::shared_ptr<Buffer> partial,
+ std::shared_ptr<Buffer> block,
+ std::shared_ptr<Buffer>* completion,
+ std::shared_ptr<Buffer>* rest) {
+ if (partial->size() == 0) {
+ // If partial is empty, don't bother looking for completion
+ *completion = SliceBuffer(block, 0, 0);
+ *rest = block;
+ return Status::OK();
+ }
+ int64_t first_pos = -1;
+ RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial),
+ util::string_view(*block), &first_pos));
+ if (first_pos == BoundaryFinder::kNoDelimiterFound) {
+ // No delimiter in block => the current object is too large for block size
+ return StraddlingTooLarge();
+ } else {
+ *completion = SliceBuffer(block, 0, first_pos);
+ *rest = SliceBuffer(block, first_pos);
+ return Status::OK();
+ }
+}
+
+Status Chunker::ProcessFinal(std::shared_ptr<Buffer> partial,
+ std::shared_ptr<Buffer> block,
+ std::shared_ptr<Buffer>* completion,
+ std::shared_ptr<Buffer>* rest) {
+ if (partial->size() == 0) {
+ // If partial is empty, don't bother looking for completion
+ *completion = SliceBuffer(block, 0, 0);
+ *rest = block;
+ return Status::OK();
+ }
+ int64_t first_pos = -1;
+ RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial),
+ util::string_view(*block), &first_pos));
+ if (first_pos == BoundaryFinder::kNoDelimiterFound) {
+ // No delimiter in block => it's entirely a completion of partial
+ *completion = block;
+ *rest = SliceBuffer(block, 0, 0);
+ } else {
+ *completion = SliceBuffer(block, 0, first_pos);
+ *rest = SliceBuffer(block, first_pos);
+ }
+ return Status::OK();
+}
+
Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial,
std::shared_ptr<Buffer> block, bool final, int64_t* count,
std::shared_ptr<Buffer>* rest) {
@@ -190,4 +190,4 @@ Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial,
return Status::OK();
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
index b4b868340db..220e25bbd1d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
@@ -1,58 +1,58 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Buffer;
-
-class ARROW_EXPORT BoundaryFinder {
- public:
- BoundaryFinder() = default;
-
- virtual ~BoundaryFinder();
-
- /// \brief Find the position of the first delimiter inside block
- ///
- /// `partial` is taken to be the beginning of the block, and `block`
- /// its continuation. Also, `partial` doesn't contain a delimiter.
- ///
- /// The returned `out_pos` is relative to `block`'s start and should point
- /// to the first character after the first delimiter.
- /// `out_pos` will be -1 if no delimiter is found.
- virtual Status FindFirst(util::string_view partial, util::string_view block,
- int64_t* out_pos) = 0;
-
- /// \brief Find the position of the last delimiter inside block
- ///
- /// The returned `out_pos` is relative to `block`'s start and should point
- /// to the first character after the last delimiter.
- /// `out_pos` will be -1 if no delimiter is found.
- virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+
+class ARROW_EXPORT BoundaryFinder {
+ public:
+ BoundaryFinder() = default;
+
+ virtual ~BoundaryFinder();
+
+ /// \brief Find the position of the first delimiter inside block
+ ///
+ /// `partial` is taken to be the beginning of the block, and `block`
+ /// its continuation. Also, `partial` doesn't contain a delimiter.
+ ///
+ /// The returned `out_pos` is relative to `block`'s start and should point
+ /// to the first character after the first delimiter.
+ /// `out_pos` will be -1 if no delimiter is found.
+ virtual Status FindFirst(util::string_view partial, util::string_view block,
+ int64_t* out_pos) = 0;
+
+ /// \brief Find the position of the last delimiter inside block
+ ///
+ /// The returned `out_pos` is relative to `block`'s start and should point
+ /// to the first character after the last delimiter.
+ /// `out_pos` will be -1 if no delimiter is found.
+ virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0;
+
/// \brief Find the position of the Nth delimiter inside the block
///
/// `partial` is taken to be the beginning of the block, and `block`
@@ -66,91 +66,91 @@ class ARROW_EXPORT BoundaryFinder {
virtual Status FindNth(util::string_view partial, util::string_view block,
int64_t count, int64_t* out_pos, int64_t* num_found) = 0;
- static constexpr int64_t kNoDelimiterFound = -1;
-
- protected:
- ARROW_DISALLOW_COPY_AND_ASSIGN(BoundaryFinder);
-};
-
-ARROW_EXPORT
-std::shared_ptr<BoundaryFinder> MakeNewlineBoundaryFinder();
-
-/// \brief A reusable block-based chunker for delimited data
-///
-/// The chunker takes a block of delimited data and helps carve a sub-block
-/// which begins and ends on delimiters (suitable for consumption by parsers
-/// which can only parse whole objects).
-class ARROW_EXPORT Chunker {
- public:
- explicit Chunker(std::shared_ptr<BoundaryFinder> delimiter);
- ~Chunker();
-
- /// \brief Carve up a chunk in a block of data to contain only whole objects
- ///
- /// Pre-conditions:
- /// - `block` is the start of a valid block of delimited data
- /// (i.e. starts just after a delimiter)
- ///
- /// Post-conditions:
- /// - block == whole + partial
- /// - `whole` is a valid block of delimited data
- /// (i.e. starts just after a delimiter and ends with a delimiter)
- /// - `partial` doesn't contain an entire delimited object
- /// (IOW: `partial` is generally small)
- ///
- /// This method will look for the last delimiter in `block` and may
- /// therefore be costly.
- ///
- /// \param[in] block data to be chunked
- /// \param[out] whole subrange of block containing whole delimited objects
- /// \param[out] partial subrange of block starting with a partial delimited object
- Status Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
- std::shared_ptr<Buffer>* partial);
-
- /// \brief Carve the completion of a partial object out of a block
- ///
- /// Pre-conditions:
- /// - `partial` is the start of a valid block of delimited data
- /// (i.e. starts just after a delimiter)
- /// - `block` follows `partial` in file order
- ///
- /// Post-conditions:
- /// - block == completion + rest
- /// - `partial + completion` is a valid block of delimited data
- /// (i.e. starts just after a delimiter and ends with a delimiter)
- /// - `completion` doesn't contain an entire delimited object
- /// (IOW: `completion` is generally small)
- ///
- /// This method will look for the first delimiter in `block` and should
- /// therefore be reasonably cheap.
- ///
- /// \param[in] partial incomplete delimited data
- /// \param[in] block delimited data following partial
- /// \param[out] completion subrange of block containing the completion of partial
- /// \param[out] rest subrange of block containing what completion does not cover
- Status ProcessWithPartial(std::shared_ptr<Buffer> partial,
- std::shared_ptr<Buffer> block,
- std::shared_ptr<Buffer>* completion,
- std::shared_ptr<Buffer>* rest);
-
- /// \brief Like ProcessWithPartial, but for the last block of a file
- ///
- /// This method allows for a final delimited object without a trailing delimiter
- /// (ProcessWithPartial would return an error in that case).
- ///
- /// Pre-conditions:
- /// - `partial` is the start of a valid block of delimited data
- /// - `block` follows `partial` in file order and is the last data block
- ///
- /// Post-conditions:
- /// - block == completion + rest
- /// - `partial + completion` is a valid block of delimited data
- /// - `completion` doesn't contain an entire delimited object
- /// (IOW: `completion` is generally small)
- ///
- Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
- std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
-
+ static constexpr int64_t kNoDelimiterFound = -1;
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(BoundaryFinder);
+};
+
+ARROW_EXPORT
+std::shared_ptr<BoundaryFinder> MakeNewlineBoundaryFinder();
+
+/// \brief A reusable block-based chunker for delimited data
+///
+/// The chunker takes a block of delimited data and helps carve a sub-block
+/// which begins and ends on delimiters (suitable for consumption by parsers
+/// which can only parse whole objects).
+class ARROW_EXPORT Chunker {
+ public:
+ explicit Chunker(std::shared_ptr<BoundaryFinder> delimiter);
+ ~Chunker();
+
+ /// \brief Carve up a chunk in a block of data to contain only whole objects
+ ///
+ /// Pre-conditions:
+ /// - `block` is the start of a valid block of delimited data
+ /// (i.e. starts just after a delimiter)
+ ///
+ /// Post-conditions:
+ /// - block == whole + partial
+ /// - `whole` is a valid block of delimited data
+ /// (i.e. starts just after a delimiter and ends with a delimiter)
+ /// - `partial` doesn't contain an entire delimited object
+ /// (IOW: `partial` is generally small)
+ ///
+ /// This method will look for the last delimiter in `block` and may
+ /// therefore be costly.
+ ///
+ /// \param[in] block data to be chunked
+ /// \param[out] whole subrange of block containing whole delimited objects
+ /// \param[out] partial subrange of block starting with a partial delimited object
+ Status Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
+ std::shared_ptr<Buffer>* partial);
+
+ /// \brief Carve the completion of a partial object out of a block
+ ///
+ /// Pre-conditions:
+ /// - `partial` is the start of a valid block of delimited data
+ /// (i.e. starts just after a delimiter)
+ /// - `block` follows `partial` in file order
+ ///
+ /// Post-conditions:
+ /// - block == completion + rest
+ /// - `partial + completion` is a valid block of delimited data
+ /// (i.e. starts just after a delimiter and ends with a delimiter)
+ /// - `completion` doesn't contain an entire delimited object
+ /// (IOW: `completion` is generally small)
+ ///
+ /// This method will look for the first delimiter in `block` and should
+ /// therefore be reasonably cheap.
+ ///
+ /// \param[in] partial incomplete delimited data
+ /// \param[in] block delimited data following partial
+ /// \param[out] completion subrange of block containing the completion of partial
+ /// \param[out] rest subrange of block containing what completion does not cover
+ Status ProcessWithPartial(std::shared_ptr<Buffer> partial,
+ std::shared_ptr<Buffer> block,
+ std::shared_ptr<Buffer>* completion,
+ std::shared_ptr<Buffer>* rest);
+
+ /// \brief Like ProcessWithPartial, but for the last block of a file
+ ///
+ /// This method allows for a final delimited object without a trailing delimiter
+ /// (ProcessWithPartial would return an error in that case).
+ ///
+ /// Pre-conditions:
+ /// - `partial` is the start of a valid block of delimited data
+ /// - `block` follows `partial` in file order and is the last data block
+ ///
+ /// Post-conditions:
+ /// - block == completion + rest
+ /// - `partial + completion` is a valid block of delimited data
+ /// - `completion` doesn't contain an entire delimited object
+ /// (IOW: `completion` is generally small)
+ ///
+ Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+ std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
+
/// \brief Skip count number of rows
/// Pre-conditions:
/// - `partial` is the start of a valid block of delimited data
@@ -172,10 +172,10 @@ class ARROW_EXPORT Chunker {
Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
bool final, int64_t* count, std::shared_ptr<Buffer>* rest);
- protected:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
-
- std::shared_ptr<BoundaryFinder> boundary_finder_;
-};
-
-} // namespace arrow
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
+
+ std::shared_ptr<BoundaryFinder> boundary_finder_;
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/dispatch.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/dispatch.h
index fae9293f9e7..4cdbecfdc77 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/dispatch.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/dispatch.h
@@ -1,115 +1,115 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <utility>
-#include <vector>
-
-#include "arrow/status.h"
-#include "arrow/util/cpu_info.h"
-
-namespace arrow {
-namespace internal {
-
-enum class DispatchLevel : int {
- // These dispatch levels, corresponding to instruction set features,
- // are sorted in increasing order of preference.
- NONE = 0,
- SSE4_2,
- AVX2,
- AVX512,
- NEON,
- MAX
-};
-
-/*
- A facility for dynamic dispatch according to available DispatchLevel.
-
- Typical use:
-
- static void my_function_default(...);
- static void my_function_avx2(...);
-
- struct MyDynamicFunction {
- using FunctionType = decltype(&my_function_default);
-
- static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
- return {
- { DispatchLevel::NONE, my_function_default }
- #if defined(ARROW_HAVE_RUNTIME_AVX2)
- , { DispatchLevel::AVX2, my_function_avx2 }
- #endif
- };
- }
- };
-
- void my_function(...) {
- static DynamicDispatch<MyDynamicFunction> dispatch;
- return dispatch.func(...);
- }
-*/
-template <typename DynamicFunction>
-class DynamicDispatch {
- protected:
- using FunctionType = typename DynamicFunction::FunctionType;
- using Implementation = std::pair<DispatchLevel, FunctionType>;
-
- public:
- DynamicDispatch() { Resolve(DynamicFunction::implementations()); }
-
- FunctionType func = {};
-
- protected:
- // Use the Implementation with the highest DispatchLevel
- void Resolve(const std::vector<Implementation>& implementations) {
- Implementation cur{DispatchLevel::NONE, {}};
-
- for (const auto& impl : implementations) {
- if (impl.first >= cur.first && IsSupported(impl.first)) {
- // Higher (or same) level than current
- cur = impl;
- }
- }
-
- if (!cur.second) {
- Status::Invalid("No appropriate implementation found").Abort();
- }
- func = cur.second;
- }
-
- private:
- bool IsSupported(DispatchLevel level) const {
- static const auto cpu_info = arrow::internal::CpuInfo::GetInstance();
-
- switch (level) {
- case DispatchLevel::NONE:
- return true;
- case DispatchLevel::SSE4_2:
- return cpu_info->IsSupported(CpuInfo::SSE4_2);
- case DispatchLevel::AVX2:
- return cpu_info->IsSupported(CpuInfo::AVX2);
- case DispatchLevel::AVX512:
- return cpu_info->IsSupported(CpuInfo::AVX512);
- default:
- return false;
- }
- }
-};
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/cpu_info.h"
+
+namespace arrow {
+namespace internal {
+
+enum class DispatchLevel : int {
+ // These dispatch levels, corresponding to instruction set features,
+ // are sorted in increasing order of preference.
+ NONE = 0,
+ SSE4_2,
+ AVX2,
+ AVX512,
+ NEON,
+ MAX
+};
+
+/*
+ A facility for dynamic dispatch according to available DispatchLevel.
+
+ Typical use:
+
+ static void my_function_default(...);
+ static void my_function_avx2(...);
+
+ struct MyDynamicFunction {
+ using FunctionType = decltype(&my_function_default);
+
+ static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+ return {
+ { DispatchLevel::NONE, my_function_default }
+ #if defined(ARROW_HAVE_RUNTIME_AVX2)
+ , { DispatchLevel::AVX2, my_function_avx2 }
+ #endif
+ };
+ }
+ };
+
+ void my_function(...) {
+ static DynamicDispatch<MyDynamicFunction> dispatch;
+ return dispatch.func(...);
+ }
+*/
+template <typename DynamicFunction>
+class DynamicDispatch {
+ protected:
+ using FunctionType = typename DynamicFunction::FunctionType;
+ using Implementation = std::pair<DispatchLevel, FunctionType>;
+
+ public:
+ DynamicDispatch() { Resolve(DynamicFunction::implementations()); }
+
+ FunctionType func = {};
+
+ protected:
+ // Use the Implementation with the highest DispatchLevel
+ void Resolve(const std::vector<Implementation>& implementations) {
+ Implementation cur{DispatchLevel::NONE, {}};
+
+ for (const auto& impl : implementations) {
+ if (impl.first >= cur.first && IsSupported(impl.first)) {
+ // Higher (or same) level than current
+ cur = impl;
+ }
+ }
+
+ if (!cur.second) {
+ Status::Invalid("No appropriate implementation found").Abort();
+ }
+ func = cur.second;
+ }
+
+ private:
+ bool IsSupported(DispatchLevel level) const {
+ static const auto cpu_info = arrow::internal::CpuInfo::GetInstance();
+
+ switch (level) {
+ case DispatchLevel::NONE:
+ return true;
+ case DispatchLevel::SSE4_2:
+ return cpu_info->IsSupported(CpuInfo::SSE4_2);
+ case DispatchLevel::AVX2:
+ return cpu_info->IsSupported(CpuInfo::AVX2);
+ case DispatchLevel::AVX512:
+ return cpu_info->IsSupported(CpuInfo::AVX512);
+ default:
+ return false;
+ }
+ }
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/double_conversion.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/double_conversion.h
index bd99c0618db..63880acf60c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/double_conversion.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/double_conversion.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "contrib/libs/double-conversion/double-conversion.h" // IWYU pragma: export
-
-namespace arrow {
-namespace util {
-namespace double_conversion {
-
-using ::double_conversion::DoubleToStringConverter;
-using ::double_conversion::StringBuilder;
-using ::double_conversion::StringToDoubleConverter;
-
-} // namespace double_conversion
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "contrib/libs/double-conversion/double-conversion.h" // IWYU pragma: export
+
+namespace arrow {
+namespace util {
+namespace double_conversion {
+
+using ::double_conversion::DoubleToStringConverter;
+using ::double_conversion::StringBuilder;
+using ::double_conversion::StringToDoubleConverter;
+
+} // namespace double_conversion
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
index c16d42ce5cf..e1f6e86a4e6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
@@ -1,48 +1,48 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/formatting.h"
-#include "arrow/util/config.h"
-#include "arrow/util/double_conversion.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-
-using util::double_conversion::DoubleToStringConverter;
-
-static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1;
-
-namespace internal {
-namespace detail {
-
-const char digit_pairs[] =
- "0001020304050607080910111213141516171819"
- "2021222324252627282930313233343536373839"
- "4041424344454647484950515253545556575859"
- "6061626364656667686970717273747576777879"
- "8081828384858687888990919293949596979899";
-
-} // namespace detail
-
-struct FloatToStringFormatter::Impl {
- Impl()
- : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
- 'e', -6, 10, 6, 0) {}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/formatting.h"
+#include "arrow/util/config.h"
+#include "arrow/util/double_conversion.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using util::double_conversion::DoubleToStringConverter;
+
+static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1;
+
+namespace internal {
+namespace detail {
+
+const char digit_pairs[] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+
+} // namespace detail
+
+struct FloatToStringFormatter::Impl {
+ Impl()
+ : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
+ 'e', -6, 10, 6, 0) {}
+
Impl(int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
int decimal_in_shortest_low, int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
@@ -51,11 +51,11 @@ struct FloatToStringFormatter::Impl {
decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
max_trailing_padding_zeroes_in_precision_mode) {}
- DoubleToStringConverter converter_;
-};
-
-FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
-
+ DoubleToStringConverter converter_;
+};
+
+FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
+
FloatToStringFormatter::FloatToStringFormatter(
int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
int decimal_in_shortest_low, int decimal_in_shortest_high,
@@ -66,26 +66,26 @@ FloatToStringFormatter::FloatToStringFormatter(
max_leading_padding_zeroes_in_precision_mode,
max_trailing_padding_zeroes_in_precision_mode)) {}
-FloatToStringFormatter::~FloatToStringFormatter() {}
-
-int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
- DCHECK_GE(out_size, kMinBufferSize);
- // StringBuilder checks bounds in debug mode for us
- util::double_conversion::StringBuilder builder(out_buffer, out_size);
- bool result = impl_->converter_.ToShortestSingle(v, &builder);
- DCHECK(result);
- ARROW_UNUSED(result);
- return builder.position();
-}
-
-int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size) {
- DCHECK_GE(out_size, kMinBufferSize);
- util::double_conversion::StringBuilder builder(out_buffer, out_size);
- bool result = impl_->converter_.ToShortest(v, &builder);
- DCHECK(result);
- ARROW_UNUSED(result);
- return builder.position();
-}
-
-} // namespace internal
-} // namespace arrow
+FloatToStringFormatter::~FloatToStringFormatter() {}
+
+int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
+ DCHECK_GE(out_size, kMinBufferSize);
+ // StringBuilder checks bounds in debug mode for us
+ util::double_conversion::StringBuilder builder(out_buffer, out_size);
+ bool result = impl_->converter_.ToShortestSingle(v, &builder);
+ DCHECK(result);
+ ARROW_UNUSED(result);
+ return builder.position();
+}
+
+int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size) {
+ DCHECK_GE(out_size, kMinBufferSize);
+ util::double_conversion::StringBuilder builder(out_buffer, out_size);
+ bool result = impl_->converter_.ToShortest(v, &builder);
+ DCHECK(result);
+ ARROW_UNUSED(result);
+ return builder.position();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
index 566c9795f83..8268b953de4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
@@ -1,250 +1,250 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This is a private header for number-to-string formatting utilities
-
-#pragma once
-
-#include <array>
-#include <cassert>
-#include <chrono>
-#include <limits>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This is a private header for number-to-string formatting utilities
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <chrono>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
#include "arrow/util/double_conversion.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/time.h"
-#include "arrow/util/visibility.h"
-#include "arrow/vendored/datetime.h"
-
-namespace arrow {
-namespace internal {
-
-/// \brief The entry point for conversion to strings.
-template <typename ARROW_TYPE, typename Enable = void>
-class StringFormatter;
-
-template <typename T>
-struct is_formattable {
- template <typename U, typename = typename StringFormatter<U>::value_type>
- static std::true_type Test(U*);
-
- template <typename U>
- static std::false_type Test(...);
-
- static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
-};
-
-template <typename T, typename R = void>
-using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
-
-template <typename Appender>
-using Return = decltype(std::declval<Appender>()(util::string_view{}));
-
-/////////////////////////////////////////////////////////////////////////
-// Boolean formatting
-
-template <>
-class StringFormatter<BooleanType> {
- public:
- explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
-
- using value_type = bool;
-
- template <typename Appender>
- Return<Appender> operator()(bool value, Appender&& append) {
- if (value) {
- const char string[] = "true";
- return append(util::string_view(string));
- } else {
- const char string[] = "false";
- return append(util::string_view(string));
- }
- }
-};
-
-/////////////////////////////////////////////////////////////////////////
-// Integer formatting
-
-namespace detail {
-
-// A 2x100 direct table mapping integers in [0..99] to their decimal representations.
-ARROW_EXPORT extern const char digit_pairs[];
-
-// Based on fmtlib's format_int class:
-// Write digits from right to left into a stack allocated buffer
-inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
-
-template <typename Int>
-void FormatOneDigit(Int value, char** cursor) {
- assert(value >= 0 && value <= 9);
- FormatOneChar(static_cast<char>('0' + value), cursor);
-}
-
-template <typename Int>
-void FormatTwoDigits(Int value, char** cursor) {
- assert(value >= 0 && value <= 99);
- auto digit_pair = &digit_pairs[value * 2];
- FormatOneChar(digit_pair[1], cursor);
- FormatOneChar(digit_pair[0], cursor);
-}
-
-template <typename Int>
-void FormatAllDigits(Int value, char** cursor) {
- assert(value >= 0);
- while (value >= 100) {
- FormatTwoDigits(value % 100, cursor);
- value /= 100;
- }
-
- if (value >= 10) {
- FormatTwoDigits(value, cursor);
- } else {
- FormatOneDigit(value, cursor);
- }
-}
-
-template <typename Int>
-void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
- auto end = *cursor - pad;
- FormatAllDigits(value, cursor);
- while (*cursor > end) {
- FormatOneChar(pad_char, cursor);
- }
-}
-
-template <size_t BUFFER_SIZE>
-util::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
- char* cursor) {
- auto buffer_end = buffer.data() + BUFFER_SIZE;
- return {cursor, static_cast<size_t>(buffer_end - cursor)};
-}
-
-template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
-constexpr UInt Abs(Int value) {
- return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
-}
-
-template <typename Int>
-constexpr size_t Digits10(Int value) {
- return value <= 9 ? 1 : Digits10(value / 10) + 1;
-}
-
-} // namespace detail
-
-template <typename ARROW_TYPE>
-class IntToStringFormatterMixin {
- public:
- explicit IntToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
-
- using value_type = typename ARROW_TYPE::c_type;
-
- template <typename Appender>
- Return<Appender> operator()(value_type value, Appender&& append) {
- constexpr size_t buffer_size =
- detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
-
- std::array<char, buffer_size> buffer;
- char* cursor = buffer.data() + buffer_size;
- detail::FormatAllDigits(detail::Abs(value), &cursor);
- if (value < 0) {
- detail::FormatOneChar('-', &cursor);
- }
- return append(detail::ViewDigitBuffer(buffer, cursor));
- }
-};
-
-template <>
-class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-/////////////////////////////////////////////////////////////////////////
-// Floating-point formatting
-
-class ARROW_EXPORT FloatToStringFormatter {
- public:
- FloatToStringFormatter();
+#include "arrow/util/string_view.h"
+#include "arrow/util/time.h"
+#include "arrow/util/visibility.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief The entry point for conversion to strings.
+template <typename ARROW_TYPE, typename Enable = void>
+class StringFormatter;
+
+template <typename T>
+struct is_formattable {
+ template <typename U, typename = typename StringFormatter<U>::value_type>
+ static std::true_type Test(U*);
+
+ template <typename U>
+ static std::false_type Test(...);
+
+ static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
+};
+
+template <typename T, typename R = void>
+using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
+
+template <typename Appender>
+using Return = decltype(std::declval<Appender>()(util::string_view{}));
+
+/////////////////////////////////////////////////////////////////////////
+// Boolean formatting
+
+template <>
+class StringFormatter<BooleanType> {
+ public:
+ explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
+
+ using value_type = bool;
+
+ template <typename Appender>
+ Return<Appender> operator()(bool value, Appender&& append) {
+ if (value) {
+ const char string[] = "true";
+ return append(util::string_view(string));
+ } else {
+ const char string[] = "false";
+ return append(util::string_view(string));
+ }
+ }
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Integer formatting
+
+namespace detail {
+
+// A 2x100 direct table mapping integers in [0..99] to their decimal representations.
+ARROW_EXPORT extern const char digit_pairs[];
+
+// Based on fmtlib's format_int class:
+// Write digits from right to left into a stack allocated buffer
+inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
+
+template <typename Int>
+void FormatOneDigit(Int value, char** cursor) {
+ assert(value >= 0 && value <= 9);
+ FormatOneChar(static_cast<char>('0' + value), cursor);
+}
+
+template <typename Int>
+void FormatTwoDigits(Int value, char** cursor) {
+ assert(value >= 0 && value <= 99);
+ auto digit_pair = &digit_pairs[value * 2];
+ FormatOneChar(digit_pair[1], cursor);
+ FormatOneChar(digit_pair[0], cursor);
+}
+
+template <typename Int>
+void FormatAllDigits(Int value, char** cursor) {
+ assert(value >= 0);
+ while (value >= 100) {
+ FormatTwoDigits(value % 100, cursor);
+ value /= 100;
+ }
+
+ if (value >= 10) {
+ FormatTwoDigits(value, cursor);
+ } else {
+ FormatOneDigit(value, cursor);
+ }
+}
+
+template <typename Int>
+void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
+ auto end = *cursor - pad;
+ FormatAllDigits(value, cursor);
+ while (*cursor > end) {
+ FormatOneChar(pad_char, cursor);
+ }
+}
+
+template <size_t BUFFER_SIZE>
+util::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
+ char* cursor) {
+ auto buffer_end = buffer.data() + BUFFER_SIZE;
+ return {cursor, static_cast<size_t>(buffer_end - cursor)};
+}
+
+template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
+constexpr UInt Abs(Int value) {
+ return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
+}
+
+template <typename Int>
+constexpr size_t Digits10(Int value) {
+ return value <= 9 ? 1 : Digits10(value / 10) + 1;
+}
+
+} // namespace detail
+
+template <typename ARROW_TYPE>
+class IntToStringFormatterMixin {
+ public:
+ explicit IntToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
+
+ using value_type = typename ARROW_TYPE::c_type;
+
+ template <typename Appender>
+ Return<Appender> operator()(value_type value, Appender&& append) {
+ constexpr size_t buffer_size =
+ detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
+
+ std::array<char, buffer_size> buffer;
+ char* cursor = buffer.data() + buffer_size;
+ detail::FormatAllDigits(detail::Abs(value), &cursor);
+ if (value < 0) {
+ detail::FormatOneChar('-', &cursor);
+ }
+ return append(detail::ViewDigitBuffer(buffer, cursor));
+ }
+};
+
+template <>
+class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Floating-point formatting
+
+class ARROW_EXPORT FloatToStringFormatter {
+ public:
+ FloatToStringFormatter();
FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
char exp_character, int decimal_in_shortest_low,
int decimal_in_shortest_high,
int max_leading_padding_zeroes_in_precision_mode,
int max_trailing_padding_zeroes_in_precision_mode);
- ~FloatToStringFormatter();
-
- // Returns the number of characters written
- int FormatFloat(float v, char* out_buffer, int out_size);
- int FormatFloat(double v, char* out_buffer, int out_size);
-
- protected:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-template <typename ARROW_TYPE>
-class FloatToStringFormatterMixin : public FloatToStringFormatter {
- public:
- using value_type = typename ARROW_TYPE::c_type;
-
- static constexpr int buffer_size = 50;
-
- explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
-
+ ~FloatToStringFormatter();
+
+ // Returns the number of characters written
+ int FormatFloat(float v, char* out_buffer, int out_size);
+ int FormatFloat(double v, char* out_buffer, int out_size);
+
+ protected:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+template <typename ARROW_TYPE>
+class FloatToStringFormatterMixin : public FloatToStringFormatter {
+ public:
+ using value_type = typename ARROW_TYPE::c_type;
+
+ static constexpr int buffer_size = 50;
+
+ explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
+
FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
char exp_character, int decimal_in_shortest_low,
int decimal_in_shortest_high,
@@ -255,172 +255,172 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
max_leading_padding_zeroes_in_precision_mode,
max_trailing_padding_zeroes_in_precision_mode) {}
- template <typename Appender>
- Return<Appender> operator()(value_type value, Appender&& append) {
- char buffer[buffer_size];
- int size = FormatFloat(value, buffer, buffer_size);
- return append(util::string_view(buffer, size));
- }
-};
-
-template <>
-class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
- public:
- using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
-};
-
-template <>
-class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
- public:
- using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
-};
-
-/////////////////////////////////////////////////////////////////////////
-// Temporal formatting
-
-namespace detail {
-
-template <typename V>
-constexpr size_t BufferSizeYYYY_MM_DD() {
- return detail::Digits10(9999) + 1 + detail::Digits10(12) + 1 + detail::Digits10(31);
-}
-
-inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
- FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
- FormatOneChar('-', cursor);
- FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
- FormatOneChar('-', cursor);
- auto year = static_cast<int>(ymd.year());
- assert(year <= 9999);
- FormatTwoDigits(year % 100, cursor);
- FormatTwoDigits(year / 100, cursor);
-}
-
-template <typename Duration>
-constexpr size_t BufferSizeHH_MM_SS() {
- return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
- detail::Digits10(Duration::period::den) - 1;
-}
-
-template <typename Duration>
-void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
- constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
- if (subsecond_digits != 0) {
- FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
- FormatOneChar('.', cursor);
- }
- FormatTwoDigits(hms.seconds().count(), cursor);
- FormatOneChar(':', cursor);
- FormatTwoDigits(hms.minutes().count(), cursor);
- FormatOneChar(':', cursor);
- FormatTwoDigits(hms.hours().count(), cursor);
-}
-
-} // namespace detail
-
-template <>
-class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
- using IntToStringFormatterMixin::IntToStringFormatterMixin;
-};
-
-template <typename T>
-class StringFormatter<T, enable_if_date<T>> {
- public:
- using value_type = typename T::c_type;
-
- explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
-
- template <typename Appender>
- Return<Appender> operator()(value_type value, Appender&& append) {
- arrow_vendored::date::days since_epoch;
- if (T::type_id == Type::DATE32) {
- since_epoch = arrow_vendored::date::days{value};
- } else {
- since_epoch = std::chrono::duration_cast<arrow_vendored::date::days>(
- std::chrono::milliseconds{value});
- }
-
- arrow_vendored::date::sys_days timepoint_days{since_epoch};
-
- constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>();
-
- std::array<char, buffer_size> buffer;
- char* cursor = buffer.data() + buffer_size;
-
- detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
- &cursor);
- return append(detail::ViewDigitBuffer(buffer, cursor));
- }
-};
-
-template <typename T>
-class StringFormatter<T, enable_if_time<T>> {
- public:
- using value_type = typename T::c_type;
-
- explicit StringFormatter(const std::shared_ptr<DataType>& type)
- : unit_(checked_cast<const T&>(*type).unit()) {}
-
- template <typename Duration, typename Appender>
- Return<Appender> operator()(Duration, value_type count, Appender&& append) {
- Duration since_midnight{count};
-
- constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
-
- std::array<char, buffer_size> buffer;
- char* cursor = buffer.data() + buffer_size;
-
- detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
- return append(detail::ViewDigitBuffer(buffer, cursor));
- }
-
- template <typename Appender>
- Return<Appender> operator()(value_type value, Appender&& append) {
- return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
- }
-
- private:
- TimeUnit::type unit_;
-};
-
-template <>
-class StringFormatter<TimestampType> {
- public:
- using value_type = int64_t;
-
- explicit StringFormatter(const std::shared_ptr<DataType>& type)
- : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
-
- template <typename Duration, typename Appender>
- Return<Appender> operator()(Duration, value_type count, Appender&& append) {
- Duration since_epoch{count};
-
- arrow_vendored::date::sys_days timepoint_days{
- arrow_vendored::date::floor<arrow_vendored::date::days>(since_epoch)};
-
- Duration since_midnight = since_epoch - timepoint_days.time_since_epoch();
-
- constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>() + 1 +
- detail::BufferSizeHH_MM_SS<Duration>();
-
- std::array<char, buffer_size> buffer;
- char* cursor = buffer.data() + buffer_size;
-
- detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
- detail::FormatOneChar(' ', &cursor);
- detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
- &cursor);
- return append(detail::ViewDigitBuffer(buffer, cursor));
- }
-
- template <typename Appender>
- Return<Appender> operator()(value_type value, Appender&& append) {
- return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
- }
-
- private:
- TimeUnit::type unit_;
-};
-
-} // namespace internal
-} // namespace arrow
+ template <typename Appender>
+ Return<Appender> operator()(value_type value, Appender&& append) {
+ char buffer[buffer_size];
+ int size = FormatFloat(value, buffer, buffer_size);
+ return append(util::string_view(buffer, size));
+ }
+};
+
+template <>
+class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
+ public:
+ using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
+ public:
+ using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Temporal formatting
+
+namespace detail {
+
+template <typename V>
+constexpr size_t BufferSizeYYYY_MM_DD() {
+ return detail::Digits10(9999) + 1 + detail::Digits10(12) + 1 + detail::Digits10(31);
+}
+
+inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
+ FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
+ FormatOneChar('-', cursor);
+ FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
+ FormatOneChar('-', cursor);
+ auto year = static_cast<int>(ymd.year());
+ assert(year <= 9999);
+ FormatTwoDigits(year % 100, cursor);
+ FormatTwoDigits(year / 100, cursor);
+}
+
+template <typename Duration>
+constexpr size_t BufferSizeHH_MM_SS() {
+ return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
+ detail::Digits10(Duration::period::den) - 1;
+}
+
+template <typename Duration>
+void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
+ constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
+ if (subsecond_digits != 0) {
+ FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
+ FormatOneChar('.', cursor);
+ }
+ FormatTwoDigits(hms.seconds().count(), cursor);
+ FormatOneChar(':', cursor);
+ FormatTwoDigits(hms.minutes().count(), cursor);
+ FormatOneChar(':', cursor);
+ FormatTwoDigits(hms.hours().count(), cursor);
+}
+
+} // namespace detail
+
+template <>
+class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
+ using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <typename T>
+class StringFormatter<T, enable_if_date<T>> {
+ public:
+ using value_type = typename T::c_type;
+
+ explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
+
+ template <typename Appender>
+ Return<Appender> operator()(value_type value, Appender&& append) {
+ arrow_vendored::date::days since_epoch;
+ if (T::type_id == Type::DATE32) {
+ since_epoch = arrow_vendored::date::days{value};
+ } else {
+ since_epoch = std::chrono::duration_cast<arrow_vendored::date::days>(
+ std::chrono::milliseconds{value});
+ }
+
+ arrow_vendored::date::sys_days timepoint_days{since_epoch};
+
+ constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>();
+
+ std::array<char, buffer_size> buffer;
+ char* cursor = buffer.data() + buffer_size;
+
+ detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
+ &cursor);
+ return append(detail::ViewDigitBuffer(buffer, cursor));
+ }
+};
+
+template <typename T>
+class StringFormatter<T, enable_if_time<T>> {
+ public:
+ using value_type = typename T::c_type;
+
+ explicit StringFormatter(const std::shared_ptr<DataType>& type)
+ : unit_(checked_cast<const T&>(*type).unit()) {}
+
+ template <typename Duration, typename Appender>
+ Return<Appender> operator()(Duration, value_type count, Appender&& append) {
+ Duration since_midnight{count};
+
+ constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
+
+ std::array<char, buffer_size> buffer;
+ char* cursor = buffer.data() + buffer_size;
+
+ detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
+ return append(detail::ViewDigitBuffer(buffer, cursor));
+ }
+
+ template <typename Appender>
+ Return<Appender> operator()(value_type value, Appender&& append) {
+ return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
+ }
+
+ private:
+ TimeUnit::type unit_;
+};
+
+template <>
+class StringFormatter<TimestampType> {
+ public:
+ using value_type = int64_t;
+
+ explicit StringFormatter(const std::shared_ptr<DataType>& type)
+ : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
+
+ template <typename Duration, typename Appender>
+ Return<Appender> operator()(Duration, value_type count, Appender&& append) {
+ Duration since_epoch{count};
+
+ arrow_vendored::date::sys_days timepoint_days{
+ arrow_vendored::date::floor<arrow_vendored::date::days>(since_epoch)};
+
+ Duration since_midnight = since_epoch - timepoint_days.time_since_epoch();
+
+ constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>() + 1 +
+ detail::BufferSizeHH_MM_SS<Duration>();
+
+ std::array<char, buffer_size> buffer;
+ char* cursor = buffer.data() + buffer_size;
+
+ detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
+ detail::FormatOneChar(' ', &cursor);
+ detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
+ &cursor);
+ return append(detail::ViewDigitBuffer(buffer, cursor));
+ }
+
+ template <typename Appender>
+ Return<Appender> operator()(value_type value, Appender&& append) {
+ return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
+ }
+
+ private:
+ TimeUnit::type unit_;
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
index 9da79046fec..7e51a264b37 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <memory>
-#include <tuple>
-#include <type_traits>
-
+#include <tuple>
+#include <type_traits>
+
#include "arrow/result.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace internal {
-
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
struct Empty {
static Result<Empty> ToResult(Status s) {
if (ARROW_PREDICT_TRUE(s.ok())) {
@@ -36,33 +36,33 @@ struct Empty {
}
};
-/// Helper struct for examining lambdas and other callables.
+/// Helper struct for examining lambdas and other callables.
/// TODO(ARROW-12655) support function pointers
-struct call_traits {
- public:
- template <typename R, typename... A>
- static std::false_type is_overloaded_impl(R(A...));
-
- template <typename F>
- static std::false_type is_overloaded_impl(decltype(&F::operator())*);
-
- template <typename F>
- static std::true_type is_overloaded_impl(...);
-
- template <typename F, typename R, typename... A>
- static R return_type_impl(R (F::*)(A...));
-
- template <typename F, typename R, typename... A>
- static R return_type_impl(R (F::*)(A...) const);
-
- template <std::size_t I, typename F, typename R, typename... A>
- static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
- R (F::*)(A...));
-
- template <std::size_t I, typename F, typename R, typename... A>
- static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
- R (F::*)(A...) const);
-
+struct call_traits {
+ public:
+ template <typename R, typename... A>
+ static std::false_type is_overloaded_impl(R(A...));
+
+ template <typename F>
+ static std::false_type is_overloaded_impl(decltype(&F::operator())*);
+
+ template <typename F>
+ static std::true_type is_overloaded_impl(...);
+
+ template <typename F, typename R, typename... A>
+ static R return_type_impl(R (F::*)(A...));
+
+ template <typename F, typename R, typename... A>
+ static R return_type_impl(R (F::*)(A...) const);
+
+ template <std::size_t I, typename F, typename R, typename... A>
+ static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+ R (F::*)(A...));
+
+ template <std::size_t I, typename F, typename R, typename... A>
+ static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+ R (F::*)(A...) const);
+
template <std::size_t I, typename F, typename R, typename... A>
static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
R (F::*)(A...) &&);
@@ -77,34 +77,34 @@ struct call_traits {
template <typename F, typename R, typename... A>
static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&);
- /// bool constant indicating whether F is a callable with more than one possible
- /// signature. Will be true_type for objects which define multiple operator() or which
- /// define a template operator()
- template <typename F>
- using is_overloaded =
- decltype(is_overloaded_impl<typename std::decay<F>::type>(NULLPTR));
-
- template <typename F, typename T = void>
- using enable_if_overloaded = typename std::enable_if<is_overloaded<F>::value, T>::type;
-
- template <typename F, typename T = void>
- using disable_if_overloaded =
- typename std::enable_if<!is_overloaded<F>::value, T>::type;
-
- /// If F is not overloaded, the argument types of its call operator can be
- /// extracted via call_traits::argument_type<Index, F>
- template <std::size_t I, typename F>
- using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
-
- template <typename F>
+ /// bool constant indicating whether F is a callable with more than one possible
+ /// signature. Will be true_type for objects which define multiple operator() or which
+ /// define a template operator()
+ template <typename F>
+ using is_overloaded =
+ decltype(is_overloaded_impl<typename std::decay<F>::type>(NULLPTR));
+
+ template <typename F, typename T = void>
+ using enable_if_overloaded = typename std::enable_if<is_overloaded<F>::value, T>::type;
+
+ template <typename F, typename T = void>
+ using disable_if_overloaded =
+ typename std::enable_if<!is_overloaded<F>::value, T>::type;
+
+ /// If F is not overloaded, the argument types of its call operator can be
+ /// extracted via call_traits::argument_type<Index, F>
+ template <std::size_t I, typename F>
+ using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
+
+ template <typename F>
using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator()));
template <typename F>
- using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
-
- template <typename F, typename T, typename RT = T>
- using enable_if_return =
- typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
+ using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
+
+ template <typename F, typename T, typename RT = T>
+ using enable_if_return =
+ typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
template <typename T, typename R = void>
using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type;
@@ -112,8 +112,8 @@ struct call_traits {
template <typename T, typename R = void>
using enable_if_not_empty =
typename std::enable_if<!std::is_same<T, Empty>::value, R>::type;
-};
-
+};
+
/// A type erased callable object which may only be invoked once.
/// It can be constructed from any lambda which matches the provided call signature.
/// Invoking it results in destruction of the lambda, freeing any state/references
@@ -156,5 +156,5 @@ class FnOnce<R(A...)> {
std::unique_ptr<Impl> impl_;
};
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
index f288a15be3f..a1cdf63d04b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
@@ -1,237 +1,237 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/future.h"
-
-#include <algorithm>
-#include <atomic>
-#include <chrono>
-#include <condition_variable>
-#include <mutex>
-#include <numeric>
-
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/future.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <numeric>
+
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
#include "arrow/util/thread_pool.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-// Shared mutex for all FutureWaiter instances.
-// This simplifies lock management compared to a per-waiter mutex.
-// The locking order is: global waiter mutex, then per-future mutex.
-//
-// It is unlikely that many waiter instances are alive at once, so this
-// should ideally not limit scalability.
-static std::mutex global_waiter_mutex;
-
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// Shared mutex for all FutureWaiter instances.
+// This simplifies lock management compared to a per-waiter mutex.
+// The locking order is: global waiter mutex, then per-future mutex.
+//
+// It is unlikely that many waiter instances are alive at once, so this
+// should ideally not limit scalability.
+static std::mutex global_waiter_mutex;
+
const double FutureWaiter::kInfinity = HUGE_VAL;
-class FutureWaiterImpl : public FutureWaiter {
- public:
- FutureWaiterImpl(Kind kind, std::vector<FutureImpl*> futures)
- : signalled_(false),
- kind_(kind),
- futures_(std::move(futures)),
- one_failed_(-1),
- fetch_pos_(0) {
- finished_futures_.reserve(futures_.size());
-
- // Observe the current state of futures and add waiters to receive future
- // state changes, atomically per future.
- // We need to lock ourselves, because as soon as SetWaiter() is called,
- // a FutureImpl may call MarkFutureFinished() from another thread
- // before this constructor finishes.
- std::unique_lock<std::mutex> lock(global_waiter_mutex);
-
- for (int i = 0; i < static_cast<int>(futures_.size()); ++i) {
- const auto state = futures_[i]->SetWaiter(this, i);
- if (IsFutureFinished(state)) {
- finished_futures_.push_back(i);
- }
- if (state != FutureState::SUCCESS) {
- one_failed_ = i;
- }
- }
-
- // Maybe signal the waiter, if the ending condition is already satisfied
- if (ShouldSignal()) {
- // No need to notify non-existent Wait() calls
- signalled_ = true;
- }
- }
-
+class FutureWaiterImpl : public FutureWaiter {
+ public:
+ FutureWaiterImpl(Kind kind, std::vector<FutureImpl*> futures)
+ : signalled_(false),
+ kind_(kind),
+ futures_(std::move(futures)),
+ one_failed_(-1),
+ fetch_pos_(0) {
+ finished_futures_.reserve(futures_.size());
+
+ // Observe the current state of futures and add waiters to receive future
+ // state changes, atomically per future.
+ // We need to lock ourselves, because as soon as SetWaiter() is called,
+ // a FutureImpl may call MarkFutureFinished() from another thread
+ // before this constructor finishes.
+ std::unique_lock<std::mutex> lock(global_waiter_mutex);
+
+ for (int i = 0; i < static_cast<int>(futures_.size()); ++i) {
+ const auto state = futures_[i]->SetWaiter(this, i);
+ if (IsFutureFinished(state)) {
+ finished_futures_.push_back(i);
+ }
+ if (state != FutureState::SUCCESS) {
+ one_failed_ = i;
+ }
+ }
+
+ // Maybe signal the waiter, if the ending condition is already satisfied
+ if (ShouldSignal()) {
+ // No need to notify non-existent Wait() calls
+ signalled_ = true;
+ }
+ }
+
~FutureWaiterImpl() override {
- for (auto future : futures_) {
- future->RemoveWaiter(this);
- }
- }
-
- // Is the ending condition satisfied?
- bool ShouldSignal() {
- bool do_signal = false;
- switch (kind_) {
- case ANY:
- do_signal = (finished_futures_.size() > 0);
- break;
- case ALL:
- do_signal = (finished_futures_.size() == futures_.size());
- break;
- case ALL_OR_FIRST_FAILED:
- do_signal = (finished_futures_.size() == futures_.size()) || one_failed_ >= 0;
- break;
- case ITERATE:
- do_signal = (finished_futures_.size() > static_cast<size_t>(fetch_pos_));
- break;
- }
- return do_signal;
- }
-
- void Signal() {
- signalled_ = true;
- cv_.notify_one();
- }
-
- void DoWaitUnlocked(std::unique_lock<std::mutex>* lock) {
- cv_.wait(*lock, [this] { return signalled_.load(); });
- }
-
- bool DoWait() {
- if (signalled_) {
- return true;
- }
- std::unique_lock<std::mutex> lock(global_waiter_mutex);
- DoWaitUnlocked(&lock);
- return true;
- }
-
- template <class Rep, class Period>
- bool DoWait(const std::chrono::duration<Rep, Period>& duration) {
- if (signalled_) {
- return true;
- }
- std::unique_lock<std::mutex> lock(global_waiter_mutex);
- cv_.wait_for(lock, duration, [this] { return signalled_.load(); });
- return signalled_.load();
- }
-
- void DoMarkFutureFinishedUnlocked(int future_num, FutureState state) {
- finished_futures_.push_back(future_num);
- if (state != FutureState::SUCCESS) {
- one_failed_ = future_num;
- }
- if (!signalled_ && ShouldSignal()) {
- Signal();
- }
- }
-
- int DoWaitAndFetchOne() {
- std::unique_lock<std::mutex> lock(global_waiter_mutex);
-
- DCHECK_EQ(kind_, ITERATE);
- DoWaitUnlocked(&lock);
- DCHECK_LT(static_cast<size_t>(fetch_pos_), finished_futures_.size());
- if (static_cast<size_t>(fetch_pos_) == finished_futures_.size() - 1) {
- signalled_ = false;
- }
- return finished_futures_[fetch_pos_++];
- }
-
- std::vector<int> DoMoveFinishedFutures() {
- std::unique_lock<std::mutex> lock(global_waiter_mutex);
-
- return std::move(finished_futures_);
- }
-
- protected:
- std::condition_variable cv_;
- std::atomic<bool> signalled_;
-
- Kind kind_;
- std::vector<FutureImpl*> futures_;
- std::vector<int> finished_futures_;
- int one_failed_;
- int fetch_pos_;
-};
-
-namespace {
-
-FutureWaiterImpl* GetConcreteWaiter(FutureWaiter* waiter) {
- return checked_cast<FutureWaiterImpl*>(waiter);
-}
-
-} // namespace
-
+ for (auto future : futures_) {
+ future->RemoveWaiter(this);
+ }
+ }
+
+ // Is the ending condition satisfied?
+ bool ShouldSignal() {
+ bool do_signal = false;
+ switch (kind_) {
+ case ANY:
+ do_signal = (finished_futures_.size() > 0);
+ break;
+ case ALL:
+ do_signal = (finished_futures_.size() == futures_.size());
+ break;
+ case ALL_OR_FIRST_FAILED:
+ do_signal = (finished_futures_.size() == futures_.size()) || one_failed_ >= 0;
+ break;
+ case ITERATE:
+ do_signal = (finished_futures_.size() > static_cast<size_t>(fetch_pos_));
+ break;
+ }
+ return do_signal;
+ }
+
+ void Signal() {
+ signalled_ = true;
+ cv_.notify_one();
+ }
+
+ void DoWaitUnlocked(std::unique_lock<std::mutex>* lock) {
+ cv_.wait(*lock, [this] { return signalled_.load(); });
+ }
+
+ bool DoWait() {
+ if (signalled_) {
+ return true;
+ }
+ std::unique_lock<std::mutex> lock(global_waiter_mutex);
+ DoWaitUnlocked(&lock);
+ return true;
+ }
+
+ template <class Rep, class Period>
+ bool DoWait(const std::chrono::duration<Rep, Period>& duration) {
+ if (signalled_) {
+ return true;
+ }
+ std::unique_lock<std::mutex> lock(global_waiter_mutex);
+ cv_.wait_for(lock, duration, [this] { return signalled_.load(); });
+ return signalled_.load();
+ }
+
+ void DoMarkFutureFinishedUnlocked(int future_num, FutureState state) {
+ finished_futures_.push_back(future_num);
+ if (state != FutureState::SUCCESS) {
+ one_failed_ = future_num;
+ }
+ if (!signalled_ && ShouldSignal()) {
+ Signal();
+ }
+ }
+
+ int DoWaitAndFetchOne() {
+ std::unique_lock<std::mutex> lock(global_waiter_mutex);
+
+ DCHECK_EQ(kind_, ITERATE);
+ DoWaitUnlocked(&lock);
+ DCHECK_LT(static_cast<size_t>(fetch_pos_), finished_futures_.size());
+ if (static_cast<size_t>(fetch_pos_) == finished_futures_.size() - 1) {
+ signalled_ = false;
+ }
+ return finished_futures_[fetch_pos_++];
+ }
+
+ std::vector<int> DoMoveFinishedFutures() {
+ std::unique_lock<std::mutex> lock(global_waiter_mutex);
+
+ return std::move(finished_futures_);
+ }
+
+ protected:
+ std::condition_variable cv_;
+ std::atomic<bool> signalled_;
+
+ Kind kind_;
+ std::vector<FutureImpl*> futures_;
+ std::vector<int> finished_futures_;
+ int one_failed_;
+ int fetch_pos_;
+};
+
+namespace {
+
+FutureWaiterImpl* GetConcreteWaiter(FutureWaiter* waiter) {
+ return checked_cast<FutureWaiterImpl*>(waiter);
+}
+
+} // namespace
+
FutureWaiter::FutureWaiter() = default;
-
+
FutureWaiter::~FutureWaiter() = default;
-
-std::unique_ptr<FutureWaiter> FutureWaiter::Make(Kind kind,
- std::vector<FutureImpl*> futures) {
- return std::unique_ptr<FutureWaiter>(new FutureWaiterImpl(kind, std::move(futures)));
-}
-
-void FutureWaiter::MarkFutureFinishedUnlocked(int future_num, FutureState state) {
- // Called by FutureImpl on state changes
- GetConcreteWaiter(this)->DoMarkFutureFinishedUnlocked(future_num, state);
-}
-
-bool FutureWaiter::Wait(double seconds) {
- if (seconds == kInfinity) {
- return GetConcreteWaiter(this)->DoWait();
- } else {
- return GetConcreteWaiter(this)->DoWait(std::chrono::duration<double>(seconds));
- }
-}
-
-int FutureWaiter::WaitAndFetchOne() {
- return GetConcreteWaiter(this)->DoWaitAndFetchOne();
-}
-
-std::vector<int> FutureWaiter::MoveFinishedFutures() {
- return GetConcreteWaiter(this)->DoMoveFinishedFutures();
-}
-
-class ConcreteFutureImpl : public FutureImpl {
- public:
- FutureState DoSetWaiter(FutureWaiter* w, int future_num) {
- std::unique_lock<std::mutex> lock(mutex_);
-
- // Atomically load state at the time of adding the waiter, to avoid
- // missed or duplicate events in the caller
- ARROW_CHECK_EQ(waiter_, nullptr)
- << "Only one Waiter allowed per Future at any given time";
- waiter_ = w;
- waiter_arg_ = future_num;
- return state_.load();
- }
-
- void DoRemoveWaiter(FutureWaiter* w) {
- std::unique_lock<std::mutex> lock(mutex_);
-
- ARROW_CHECK_EQ(waiter_, w);
- waiter_ = nullptr;
- }
-
- void DoMarkFinished() { DoMarkFinishedOrFailed(FutureState::SUCCESS); }
-
- void DoMarkFailed() { DoMarkFinishedOrFailed(FutureState::FAILURE); }
-
+
+std::unique_ptr<FutureWaiter> FutureWaiter::Make(Kind kind,
+ std::vector<FutureImpl*> futures) {
+ return std::unique_ptr<FutureWaiter>(new FutureWaiterImpl(kind, std::move(futures)));
+}
+
+void FutureWaiter::MarkFutureFinishedUnlocked(int future_num, FutureState state) {
+ // Called by FutureImpl on state changes
+ GetConcreteWaiter(this)->DoMarkFutureFinishedUnlocked(future_num, state);
+}
+
+bool FutureWaiter::Wait(double seconds) {
+ if (seconds == kInfinity) {
+ return GetConcreteWaiter(this)->DoWait();
+ } else {
+ return GetConcreteWaiter(this)->DoWait(std::chrono::duration<double>(seconds));
+ }
+}
+
+int FutureWaiter::WaitAndFetchOne() {
+ return GetConcreteWaiter(this)->DoWaitAndFetchOne();
+}
+
+std::vector<int> FutureWaiter::MoveFinishedFutures() {
+ return GetConcreteWaiter(this)->DoMoveFinishedFutures();
+}
+
+class ConcreteFutureImpl : public FutureImpl {
+ public:
+ FutureState DoSetWaiter(FutureWaiter* w, int future_num) {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ // Atomically load state at the time of adding the waiter, to avoid
+ // missed or duplicate events in the caller
+ ARROW_CHECK_EQ(waiter_, nullptr)
+ << "Only one Waiter allowed per Future at any given time";
+ waiter_ = w;
+ waiter_arg_ = future_num;
+ return state_.load();
+ }
+
+ void DoRemoveWaiter(FutureWaiter* w) {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ ARROW_CHECK_EQ(waiter_, w);
+ waiter_ = nullptr;
+ }
+
+ void DoMarkFinished() { DoMarkFinishedOrFailed(FutureState::SUCCESS); }
+
+ void DoMarkFailed() { DoMarkFinishedOrFailed(FutureState::FAILURE); }
+
void CheckOptions(const CallbackOptions& opts) {
if (opts.should_schedule != ShouldSchedule::Never) {
DCHECK_NE(opts.executor, nullptr)
@@ -296,20 +296,20 @@ class ConcreteFutureImpl : public FutureImpl {
}
}
- void DoMarkFinishedOrFailed(FutureState state) {
- {
- // Lock the hypothetical waiter first, and the future after.
- // This matches the locking order done in FutureWaiter constructor.
- std::unique_lock<std::mutex> waiter_lock(global_waiter_mutex);
- std::unique_lock<std::mutex> lock(mutex_);
-
- DCHECK(!IsFutureFinished(state_)) << "Future already marked finished";
- state_ = state;
- if (waiter_ != nullptr) {
- waiter_->MarkFutureFinishedUnlocked(waiter_arg_, state);
- }
- }
- cv_.notify_all();
+ void DoMarkFinishedOrFailed(FutureState state) {
+ {
+ // Lock the hypothetical waiter first, and the future after.
+ // This matches the locking order done in FutureWaiter constructor.
+ std::unique_lock<std::mutex> waiter_lock(global_waiter_mutex);
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ DCHECK(!IsFutureFinished(state_)) << "Future already marked finished";
+ state_ = state;
+ if (waiter_ != nullptr) {
+ waiter_->MarkFutureFinishedUnlocked(waiter_arg_, state);
+ }
+ }
+ cv_.notify_all();
// run callbacks, lock not needed since the future is finished by this
// point so nothing else can modify the callbacks list and it is safe
@@ -321,64 +321,64 @@ class ConcreteFutureImpl : public FutureImpl {
RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/false);
}
callbacks_.clear();
- }
-
- void DoWait() {
- std::unique_lock<std::mutex> lock(mutex_);
-
- cv_.wait(lock, [this] { return IsFutureFinished(state_); });
- }
-
- bool DoWait(double seconds) {
- std::unique_lock<std::mutex> lock(mutex_);
-
- cv_.wait_for(lock, std::chrono::duration<double>(seconds),
- [this] { return IsFutureFinished(state_); });
- return IsFutureFinished(state_);
- }
-
- std::mutex mutex_;
- std::condition_variable cv_;
- FutureWaiter* waiter_ = nullptr;
- int waiter_arg_ = -1;
-};
-
-namespace {
-
-ConcreteFutureImpl* GetConcreteFuture(FutureImpl* future) {
- return checked_cast<ConcreteFutureImpl*>(future);
-}
-
-} // namespace
-
-std::unique_ptr<FutureImpl> FutureImpl::Make() {
- return std::unique_ptr<FutureImpl>(new ConcreteFutureImpl());
-}
-
+ }
+
+ void DoWait() {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ cv_.wait(lock, [this] { return IsFutureFinished(state_); });
+ }
+
+ bool DoWait(double seconds) {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ cv_.wait_for(lock, std::chrono::duration<double>(seconds),
+ [this] { return IsFutureFinished(state_); });
+ return IsFutureFinished(state_);
+ }
+
+ std::mutex mutex_;
+ std::condition_variable cv_;
+ FutureWaiter* waiter_ = nullptr;
+ int waiter_arg_ = -1;
+};
+
+namespace {
+
+ConcreteFutureImpl* GetConcreteFuture(FutureImpl* future) {
+ return checked_cast<ConcreteFutureImpl*>(future);
+}
+
+} // namespace
+
+std::unique_ptr<FutureImpl> FutureImpl::Make() {
+ return std::unique_ptr<FutureImpl>(new ConcreteFutureImpl());
+}
+
std::unique_ptr<FutureImpl> FutureImpl::MakeFinished(FutureState state) {
std::unique_ptr<ConcreteFutureImpl> ptr(new ConcreteFutureImpl());
ptr->state_ = state;
return std::move(ptr);
}
-FutureImpl::FutureImpl() : state_(FutureState::PENDING) {}
-
-FutureState FutureImpl::SetWaiter(FutureWaiter* w, int future_num) {
- return GetConcreteFuture(this)->DoSetWaiter(w, future_num);
-}
-
-void FutureImpl::RemoveWaiter(FutureWaiter* w) {
- GetConcreteFuture(this)->DoRemoveWaiter(w);
-}
-
-void FutureImpl::Wait() { GetConcreteFuture(this)->DoWait(); }
-
-bool FutureImpl::Wait(double seconds) { return GetConcreteFuture(this)->DoWait(seconds); }
-
-void FutureImpl::MarkFinished() { GetConcreteFuture(this)->DoMarkFinished(); }
-
-void FutureImpl::MarkFailed() { GetConcreteFuture(this)->DoMarkFailed(); }
-
+FutureImpl::FutureImpl() : state_(FutureState::PENDING) {}
+
+FutureState FutureImpl::SetWaiter(FutureWaiter* w, int future_num) {
+ return GetConcreteFuture(this)->DoSetWaiter(w, future_num);
+}
+
+void FutureImpl::RemoveWaiter(FutureWaiter* w) {
+ GetConcreteFuture(this)->DoRemoveWaiter(w);
+}
+
+void FutureImpl::Wait() { GetConcreteFuture(this)->DoWait(); }
+
+bool FutureImpl::Wait(double seconds) { return GetConcreteFuture(this)->DoWait(seconds); }
+
+void FutureImpl::MarkFinished() { GetConcreteFuture(this)->DoMarkFinished(); }
+
+void FutureImpl::MarkFailed() { GetConcreteFuture(this)->DoMarkFailed(); }
+
void FutureImpl::AddCallback(Callback callback, CallbackOptions opts) {
GetConcreteFuture(this)->AddCallback(std::move(callback), opts);
}
@@ -418,4 +418,4 @@ Future<> AllComplete(const std::vector<Future<>>& futures) {
return out;
}
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
index d9e0a939f25..32b44ca60fc 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
@@ -1,41 +1,41 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <atomic>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
#include <cmath>
#include <functional>
-#include <memory>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/functional.h"
-#include "arrow/util/macros.h"
+#include "arrow/util/macros.h"
#include "arrow/util/optional.h"
#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
template <typename>
struct EnsureFuture;
@@ -212,11 +212,11 @@ struct ContinueFuture::ForReturnImpl<Future<T>> {
} // namespace detail
-/// A Future's execution or completion status
-enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
-
-inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
-
+/// A Future's execution or completion status
+enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
+
+inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
+
/// \brief Describe whether the callback should be scheduled or run synchronously
enum class ShouldSchedule {
/// Always run the callback synchronously (the default)
@@ -230,7 +230,7 @@ enum class ShouldSchedule {
/// the specified executor.
IfDifferentExecutor = 3,
};
-
+
/// \brief Options that control how a continuation is run
struct CallbackOptions {
/// Describe whether the callback should be run synchronously or scheduled
@@ -238,42 +238,42 @@ struct CallbackOptions {
/// If the callback is scheduled then this is the executor it should be scheduled
/// on. If this is NULL then should_schedule must be Never
internal::Executor* executor = NULLPTR;
-
+
static CallbackOptions Defaults() { return {}; }
};
// Untyped private implementation
class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> {
- public:
+ public:
FutureImpl();
- virtual ~FutureImpl() = default;
-
- FutureState state() { return state_.load(); }
-
- static std::unique_ptr<FutureImpl> Make();
+ virtual ~FutureImpl() = default;
+
+ FutureState state() { return state_.load(); }
+
+ static std::unique_ptr<FutureImpl> Make();
static std::unique_ptr<FutureImpl> MakeFinished(FutureState state);
-
- // Future API
- void MarkFinished();
- void MarkFailed();
- void Wait();
- bool Wait(double seconds);
+
+ // Future API
+ void MarkFinished();
+ void MarkFailed();
+ void Wait();
+ bool Wait(double seconds);
template <typename ValueType>
Result<ValueType>* CastResult() const {
return static_cast<Result<ValueType>*>(result_.get());
}
-
+
using Callback = internal::FnOnce<void(const FutureImpl& impl)>;
void AddCallback(Callback callback, CallbackOptions opts);
bool TryAddCallback(const std::function<Callback()>& callback_factory,
CallbackOptions opts);
- // Waiter API
- inline FutureState SetWaiter(FutureWaiter* w, int future_num);
- inline void RemoveWaiter(FutureWaiter* w);
-
+ // Waiter API
+ inline FutureState SetWaiter(FutureWaiter* w, int future_num);
+ inline void RemoveWaiter(FutureWaiter* w);
+
std::atomic<FutureState> state_{FutureState::PENDING};
-
+
// Type erased storage for arbitrary results
// XXX small objects could be stored inline instead of boxed in a pointer
using Storage = std::unique_ptr<void, void (*)(void*)>;
@@ -284,118 +284,118 @@ class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl>
CallbackOptions options;
};
std::vector<CallbackRecord> callbacks_;
-};
-
-// An object that waits on multiple futures at once. Only one waiter
-// can be registered for each future at any time.
-class ARROW_EXPORT FutureWaiter {
- public:
- enum Kind : int8_t { ANY, ALL, ALL_OR_FIRST_FAILED, ITERATE };
-
+};
+
+// An object that waits on multiple futures at once. Only one waiter
+// can be registered for each future at any time.
+class ARROW_EXPORT FutureWaiter {
+ public:
+ enum Kind : int8_t { ANY, ALL, ALL_OR_FIRST_FAILED, ITERATE };
+
// HUGE_VAL isn't constexpr on Windows
// https://social.msdn.microsoft.com/Forums/vstudio/en-US/47e8b9ff-b205-4189-968e-ee3bc3e2719f/constexpr-compile-error?forum=vclanguage
static const double kInfinity;
-
- static std::unique_ptr<FutureWaiter> Make(Kind kind, std::vector<FutureImpl*> futures);
-
- template <typename FutureType>
- static std::unique_ptr<FutureWaiter> Make(Kind kind,
- const std::vector<FutureType>& futures) {
- return Make(kind, ExtractFutures(futures));
- }
-
- virtual ~FutureWaiter();
-
- bool Wait(double seconds = kInfinity);
- int WaitAndFetchOne();
-
- std::vector<int> MoveFinishedFutures();
-
- protected:
- // Extract FutureImpls from Futures
- template <typename FutureType,
- typename Enable = std::enable_if<!std::is_pointer<FutureType>::value>>
- static std::vector<FutureImpl*> ExtractFutures(const std::vector<FutureType>& futures) {
- std::vector<FutureImpl*> base_futures(futures.size());
- for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
+
+ static std::unique_ptr<FutureWaiter> Make(Kind kind, std::vector<FutureImpl*> futures);
+
+ template <typename FutureType>
+ static std::unique_ptr<FutureWaiter> Make(Kind kind,
+ const std::vector<FutureType>& futures) {
+ return Make(kind, ExtractFutures(futures));
+ }
+
+ virtual ~FutureWaiter();
+
+ bool Wait(double seconds = kInfinity);
+ int WaitAndFetchOne();
+
+ std::vector<int> MoveFinishedFutures();
+
+ protected:
+ // Extract FutureImpls from Futures
+ template <typename FutureType,
+ typename Enable = std::enable_if<!std::is_pointer<FutureType>::value>>
+ static std::vector<FutureImpl*> ExtractFutures(const std::vector<FutureType>& futures) {
+ std::vector<FutureImpl*> base_futures(futures.size());
+ for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
base_futures[i] = futures[i].impl_.get();
- }
- return base_futures;
- }
-
- // Extract FutureImpls from Future pointers
- template <typename FutureType>
- static std::vector<FutureImpl*> ExtractFutures(
- const std::vector<FutureType*>& futures) {
- std::vector<FutureImpl*> base_futures(futures.size());
- for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
+ }
+ return base_futures;
+ }
+
+ // Extract FutureImpls from Future pointers
+ template <typename FutureType>
+ static std::vector<FutureImpl*> ExtractFutures(
+ const std::vector<FutureType*>& futures) {
+ std::vector<FutureImpl*> base_futures(futures.size());
+ for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
base_futures[i] = futures[i]->impl_.get();
- }
- return base_futures;
- }
-
- FutureWaiter();
- ARROW_DISALLOW_COPY_AND_ASSIGN(FutureWaiter);
-
- inline void MarkFutureFinishedUnlocked(int future_num, FutureState state);
-
- friend class FutureImpl;
- friend class ConcreteFutureImpl;
-};
-
-// ---------------------------------------------------------------------
-// Public API
-
-/// \brief EXPERIMENTAL A std::future-like class with more functionality.
-///
-/// A Future represents the results of a past or future computation.
-/// The Future API has two sides: a producer side and a consumer side.
-///
-/// The producer API allows creating a Future and setting its result or
-/// status, possibly after running a computation function.
-///
-/// The consumer API allows querying a Future's current state, wait for it
-/// to complete, or wait on multiple Futures at once (using WaitForAll,
-/// WaitForAny or AsCompletedIterator).
-template <typename T>
+ }
+ return base_futures;
+ }
+
+ FutureWaiter();
+ ARROW_DISALLOW_COPY_AND_ASSIGN(FutureWaiter);
+
+ inline void MarkFutureFinishedUnlocked(int future_num, FutureState state);
+
+ friend class FutureImpl;
+ friend class ConcreteFutureImpl;
+};
+
+// ---------------------------------------------------------------------
+// Public API
+
+/// \brief EXPERIMENTAL A std::future-like class with more functionality.
+///
+/// A Future represents the results of a past or future computation.
+/// The Future API has two sides: a producer side and a consumer side.
+///
+/// The producer API allows creating a Future and setting its result or
+/// status, possibly after running a computation function.
+///
+/// The consumer API allows querying a Future's current state, wait for it
+/// to complete, or wait on multiple Futures at once (using WaitForAll,
+/// WaitForAny or AsCompletedIterator).
+template <typename T>
class ARROW_MUST_USE_TYPE Future {
- public:
+ public:
using ValueType = T;
using SyncType = typename detail::SyncType<T>::type;
static constexpr bool is_empty = std::is_same<T, internal::Empty>::value;
- // The default constructor creates an invalid Future. Use Future::Make()
- // for a valid Future. This constructor is mostly for the convenience
- // of being able to presize a vector of Futures.
- Future() = default;
-
- // Consumer API
-
+ // The default constructor creates an invalid Future. Use Future::Make()
+ // for a valid Future. This constructor is mostly for the convenience
+ // of being able to presize a vector of Futures.
+ Future() = default;
+
+ // Consumer API
+
bool is_valid() const { return impl_ != NULLPTR; }
-
- /// \brief Return the Future's current state
- ///
- /// A return value of PENDING is only indicative, as the Future can complete
- /// concurrently. A return value of FAILURE or SUCCESS is definitive, though.
- FutureState state() const {
- CheckValid();
- return impl_->state();
- }
-
+
+ /// \brief Return the Future's current state
+ ///
+ /// A return value of PENDING is only indicative, as the Future can complete
+ /// concurrently. A return value of FAILURE or SUCCESS is definitive, though.
+ FutureState state() const {
+ CheckValid();
+ return impl_->state();
+ }
+
/// \brief Whether the Future is finished
- ///
+ ///
/// A false return value is only indicative, as the Future can complete
/// concurrently. A true return value is definitive, though.
bool is_finished() const {
- CheckValid();
+ CheckValid();
return IsFutureFinished(impl_->state());
}
/// \brief Wait for the Future to complete and return its Result
const Result<ValueType>& result() const& {
- Wait();
+ Wait();
return *GetResult();
- }
-
+ }
+
/// \brief Returns an rvalue to the result. This method is potentially unsafe
///
/// The future is not the unique owner of the result, copies of a future will
@@ -403,11 +403,11 @@ class ARROW_MUST_USE_TYPE Future {
/// of the future exist. Attempts to add callbacks after you move the result
/// will result in undefined behavior.
Result<ValueType>&& MoveResult() {
- Wait();
+ Wait();
return std::move(*GetResult());
- }
-
- /// \brief Wait for the Future to complete and return its Status
+ }
+
+ /// \brief Wait for the Future to complete and return its Status
const Status& status() const { return result().status(); }
/// \brief Future<T> is convertible to Future<>, which views only the
@@ -416,62 +416,62 @@ class ARROW_MUST_USE_TYPE Future {
Future<> status_future;
status_future.impl_ = impl_;
return status_future;
- }
-
- /// \brief Wait for the Future to complete
- void Wait() const {
- CheckValid();
- if (!IsFutureFinished(impl_->state())) {
- impl_->Wait();
- }
- }
-
- /// \brief Wait for the Future to complete, or for the timeout to expire
- ///
- /// `true` is returned if the Future completed, `false` if the timeout expired.
- /// Note a `false` value is only indicative, as the Future can complete
- /// concurrently.
- bool Wait(double seconds) const {
- CheckValid();
- if (IsFutureFinished(impl_->state())) {
- return true;
- }
- return impl_->Wait(seconds);
- }
-
- // Producer API
-
+ }
+
+ /// \brief Wait for the Future to complete
+ void Wait() const {
+ CheckValid();
+ if (!IsFutureFinished(impl_->state())) {
+ impl_->Wait();
+ }
+ }
+
+ /// \brief Wait for the Future to complete, or for the timeout to expire
+ ///
+ /// `true` is returned if the Future completed, `false` if the timeout expired.
+ /// Note a `false` value is only indicative, as the Future can complete
+ /// concurrently.
+ bool Wait(double seconds) const {
+ CheckValid();
+ if (IsFutureFinished(impl_->state())) {
+ return true;
+ }
+ return impl_->Wait(seconds);
+ }
+
+ // Producer API
+
/// \brief Producer API: mark Future finished
- ///
+ ///
/// The Future's result is set to `res`.
void MarkFinished(Result<ValueType> res) { DoMarkFinished(std::move(res)); }
-
+
/// \brief Mark a Future<> completed with the provided Status.
template <typename E = ValueType, typename = typename std::enable_if<
std::is_same<E, internal::Empty>::value>::type>
void MarkFinished(Status s = Status::OK()) {
return DoMarkFinished(E::ToResult(std::move(s)));
- }
-
- /// \brief Producer API: instantiate a valid Future
- ///
+ }
+
+ /// \brief Producer API: instantiate a valid Future
+ ///
/// The Future's state is initialized with PENDING. If you are creating a future with
/// this method you must ensure that future is eventually completed (with success or
/// failure). Creating a future, returning it, and never completing the future can lead
/// to memory leaks (for example, see Loop).
- static Future Make() {
- Future fut;
+ static Future Make() {
+ Future fut;
fut.impl_ = FutureImpl::Make();
- return fut;
- }
-
- /// \brief Producer API: instantiate a finished Future
+ return fut;
+ }
+
+ /// \brief Producer API: instantiate a finished Future
static Future<ValueType> MakeFinished(Result<ValueType> res) {
Future<ValueType> fut;
fut.InitializeFromResult(std::move(res));
- return fut;
- }
-
+ return fut;
+ }
+
/// \brief Make a finished Future<> with the provided Status.
template <typename E = ValueType, typename = typename std::enable_if<
std::is_same<E, internal::Empty>::value>::type>
@@ -676,7 +676,7 @@ class ARROW_MUST_USE_TYPE Future {
Future(Status s) // NOLINT runtime/explicit
: Future(Result<ValueType>(std::move(s))) {}
- protected:
+ protected:
void InitializeFromResult(Result<ValueType> res) {
if (ARROW_PREDICT_TRUE(res.ok())) {
impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
@@ -705,19 +705,19 @@ class ARROW_MUST_USE_TYPE Future {
}
}
- void CheckValid() const {
-#ifndef NDEBUG
- if (!is_valid()) {
- Status::Invalid("Invalid Future (default-initialized?)").Abort();
- }
-#endif
- }
-
+ void CheckValid() const {
+#ifndef NDEBUG
+ if (!is_valid()) {
+ Status::Invalid("Invalid Future (default-initialized?)").Abort();
+ }
+#endif
+ }
+
explicit Future(std::shared_ptr<FutureImpl> impl) : impl_(std::move(impl)) {}
-
+
std::shared_ptr<FutureImpl> impl_;
- friend class FutureWaiter;
+ friend class FutureWaiter;
friend struct detail::ContinueFuture;
template <typename U>
@@ -727,8 +727,8 @@ class ARROW_MUST_USE_TYPE Future {
FRIEND_TEST(FutureRefTest, ChainRemoved);
FRIEND_TEST(FutureRefTest, TailRemoved);
FRIEND_TEST(FutureRefTest, HeadRemoved);
-};
-
+};
+
template <typename T>
typename Future<T>::SyncType FutureToSync(const Future<T>& fut) {
return fut.result();
@@ -761,28 +761,28 @@ static Future<T> DeferNotOk(Result<Future<T>> maybe_future) {
return std::move(maybe_future).MoveValueUnsafe();
}
-/// \brief Wait for all the futures to end, or for the given timeout to expire.
-///
-/// `true` is returned if all the futures completed before the timeout was reached,
-/// `false` otherwise.
-template <typename T>
-inline bool WaitForAll(const std::vector<Future<T>>& futures,
- double seconds = FutureWaiter::kInfinity) {
- auto waiter = FutureWaiter::Make(FutureWaiter::ALL, futures);
- return waiter->Wait(seconds);
-}
-
-/// \brief Wait for all the futures to end, or for the given timeout to expire.
-///
-/// `true` is returned if all the futures completed before the timeout was reached,
-/// `false` otherwise.
-template <typename T>
-inline bool WaitForAll(const std::vector<Future<T>*>& futures,
- double seconds = FutureWaiter::kInfinity) {
- auto waiter = FutureWaiter::Make(FutureWaiter::ALL, futures);
- return waiter->Wait(seconds);
-}
-
+/// \brief Wait for all the futures to end, or for the given timeout to expire.
+///
+/// `true` is returned if all the futures completed before the timeout was reached,
+/// `false` otherwise.
+template <typename T>
+inline bool WaitForAll(const std::vector<Future<T>>& futures,
+ double seconds = FutureWaiter::kInfinity) {
+ auto waiter = FutureWaiter::Make(FutureWaiter::ALL, futures);
+ return waiter->Wait(seconds);
+}
+
+/// \brief Wait for all the futures to end, or for the given timeout to expire.
+///
+/// `true` is returned if all the futures completed before the timeout was reached,
+/// `false` otherwise.
+template <typename T>
+inline bool WaitForAll(const std::vector<Future<T>*>& futures,
+ double seconds = FutureWaiter::kInfinity) {
+ auto waiter = FutureWaiter::Make(FutureWaiter::ALL, futures);
+ return waiter->Wait(seconds);
+}
+
/// \brief Create a Future which completes when all of `futures` complete.
///
/// The future's result is a vector of the results of `futures`.
@@ -830,30 +830,30 @@ inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s
ARROW_EXPORT
Future<> AllComplete(const std::vector<Future<>>& futures);
-/// \brief Wait for one of the futures to end, or for the given timeout to expire.
-///
-/// The indices of all completed futures are returned. Note that some futures
-/// may not be in the returned set, but still complete concurrently.
-template <typename T>
-inline std::vector<int> WaitForAny(const std::vector<Future<T>>& futures,
- double seconds = FutureWaiter::kInfinity) {
- auto waiter = FutureWaiter::Make(FutureWaiter::ANY, futures);
- waiter->Wait(seconds);
- return waiter->MoveFinishedFutures();
-}
-
-/// \brief Wait for one of the futures to end, or for the given timeout to expire.
-///
-/// The indices of all completed futures are returned. Note that some futures
-/// may not be in the returned set, but still complete concurrently.
-template <typename T>
-inline std::vector<int> WaitForAny(const std::vector<Future<T>*>& futures,
- double seconds = FutureWaiter::kInfinity) {
- auto waiter = FutureWaiter::Make(FutureWaiter::ANY, futures);
- waiter->Wait(seconds);
- return waiter->MoveFinishedFutures();
-}
-
+/// \brief Wait for one of the futures to end, or for the given timeout to expire.
+///
+/// The indices of all completed futures are returned. Note that some futures
+/// may not be in the returned set, but still complete concurrently.
+template <typename T>
+inline std::vector<int> WaitForAny(const std::vector<Future<T>>& futures,
+ double seconds = FutureWaiter::kInfinity) {
+ auto waiter = FutureWaiter::Make(FutureWaiter::ANY, futures);
+ waiter->Wait(seconds);
+ return waiter->MoveFinishedFutures();
+}
+
+/// \brief Wait for one of the futures to end, or for the given timeout to expire.
+///
+/// The indices of all completed futures are returned. Note that some futures
+/// may not be in the returned set, but still complete concurrently.
+template <typename T>
+inline std::vector<int> WaitForAny(const std::vector<Future<T>*>& futures,
+ double seconds = FutureWaiter::kInfinity) {
+ auto waiter = FutureWaiter::Make(FutureWaiter::ANY, futures);
+ waiter->Wait(seconds);
+ return waiter->MoveFinishedFutures();
+}
+
struct Continue {
template <typename T>
operator util::optional<T>() && { // NOLINT explicit
@@ -954,4 +954,4 @@ struct EnsureFuture {
using type = decltype(ToFuture(std::declval<T>()));
};
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/hash_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/hash_util.h
index dd1c38a7821..52956e4380e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/hash_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/hash_util.h
@@ -1,66 +1,66 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-namespace arrow {
-namespace internal {
-
-// ----------------------------------------------------------------------
-// BEGIN Hash utilities from Boost
-
-namespace detail {
-
-#if defined(_MSC_VER)
-#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
-#else
-#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
-#endif
-
-template <typename SizeT>
-inline void hash_combine_impl(SizeT& seed, SizeT value) {
- seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-}
-
-inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
- const uint32_t c1 = 0xcc9e2d51;
- const uint32_t c2 = 0x1b873593;
-
- k1 *= c1;
- k1 = ARROW_HASH_ROTL32(k1, 15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = ARROW_HASH_ROTL32(h1, 13);
- h1 = h1 * 5 + 0xe6546b64;
-}
-
-#undef ARROW_HASH_ROTL32
-
-} // namespace detail
-
-template <class T>
-inline void hash_combine(std::size_t& seed, T const& v) {
- std::hash<T> hasher;
- return ::arrow::internal::detail::hash_combine_impl(seed, hasher(v));
-}
-
-// END Hash utilities from Boost
-// ----------------------------------------------------------------------
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+namespace internal {
+
+// ----------------------------------------------------------------------
+// BEGIN Hash utilities from Boost
+
+namespace detail {
+
+#if defined(_MSC_VER)
+#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
+#else
+#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
+#endif
+
+template <typename SizeT>
+inline void hash_combine_impl(SizeT& seed, SizeT value) {
+ seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
+inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
+ const uint32_t c1 = 0xcc9e2d51;
+ const uint32_t c2 = 0x1b873593;
+
+ k1 *= c1;
+ k1 = ARROW_HASH_ROTL32(k1, 15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ARROW_HASH_ROTL32(h1, 13);
+ h1 = h1 * 5 + 0xe6546b64;
+}
+
+#undef ARROW_HASH_ROTL32
+
+} // namespace detail
+
+template <class T>
+inline void hash_combine(std::size_t& seed, T const& v) {
+ std::hash<T> hasher;
+ return ::arrow::internal::detail::hash_combine_impl(seed, hasher(v));
+}
+
+// END Hash utilities from Boost
+// ----------------------------------------------------------------------
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
index ac1adcfb13e..d38e3f30666 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
@@ -1,465 +1,465 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Private header, not to be exported
-
-#pragma once
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/array/builder_binary.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_builders.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Private header, not to be exported
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/builder_binary.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_builders.h"
#include "arrow/util/endian.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/ubsan.h"
-
-#define XXH_INLINE_ALL
-
-#include "contrib/libs/xxhash/xxhash.h" // IWYU pragma: keep
-
-namespace arrow {
-namespace internal {
-
-// XXX would it help to have a 32-bit hash value on large datasets?
-typedef uint64_t hash_t;
-
-// Notes about the choice of a hash function.
-// - XXH3 is extremely fast on most data sizes, from small to huge;
-// faster even than HW CRC-based hashing schemes
-// - our custom hash function for tiny values (< 16 bytes) is still
-// significantly faster (~30%), at least on this machine and compiler
-
-template <uint64_t AlgNum>
-inline hash_t ComputeStringHash(const void* data, int64_t length);
-
-template <typename Scalar, uint64_t AlgNum>
-struct ScalarHelperBase {
- static bool CompareScalars(Scalar u, Scalar v) { return u == v; }
-
- static hash_t ComputeHash(const Scalar& value) {
- // Generic hash computation for scalars. Simply apply the string hash
- // to the bit representation of the value.
-
- // XXX in the case of FP values, we'd like equal values to have the same hash,
- // even if they have different bit representations...
- return ComputeStringHash<AlgNum>(&value, sizeof(value));
- }
-};
-
-template <typename Scalar, uint64_t AlgNum = 0, typename Enable = void>
-struct ScalarHelper : public ScalarHelperBase<Scalar, AlgNum> {};
-
-template <typename Scalar, uint64_t AlgNum>
-struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_integral<Scalar>::value>>
- : public ScalarHelperBase<Scalar, AlgNum> {
- // ScalarHelper specialization for integers
-
- static hash_t ComputeHash(const Scalar& value) {
- // Faster hash computation for integers.
-
- // Two of xxhash's prime multipliers (which are chosen for their
- // bit dispersion properties)
- static constexpr uint64_t multipliers[] = {11400714785074694791ULL,
- 14029467366897019727ULL};
-
- // Multiplying by the prime number mixes the low bits into the high bits,
- // then byte-swapping (which is a single CPU instruction) allows the
- // combined high and low bits to participate in the initial hash table index.
- auto h = static_cast<hash_t>(value);
- return BitUtil::ByteSwap(multipliers[AlgNum] * h);
- }
-};
-
-template <typename Scalar, uint64_t AlgNum>
-struct ScalarHelper<Scalar, AlgNum,
- enable_if_t<std::is_same<util::string_view, Scalar>::value>>
- : public ScalarHelperBase<Scalar, AlgNum> {
- // ScalarHelper specialization for util::string_view
-
- static hash_t ComputeHash(const util::string_view& value) {
- return ComputeStringHash<AlgNum>(value.data(), static_cast<int64_t>(value.size()));
- }
-};
-
-template <typename Scalar, uint64_t AlgNum>
-struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_floating_point<Scalar>::value>>
- : public ScalarHelperBase<Scalar, AlgNum> {
- // ScalarHelper specialization for reals
-
- static bool CompareScalars(Scalar u, Scalar v) {
- if (std::isnan(u)) {
- // XXX should we do a bit-precise comparison?
- return std::isnan(v);
- }
- return u == v;
- }
-};
-
-template <uint64_t AlgNum = 0>
-hash_t ComputeStringHash(const void* data, int64_t length) {
- if (ARROW_PREDICT_TRUE(length <= 16)) {
- // Specialize for small hash strings, as they are quite common as
- // hash table keys. Even XXH3 isn't quite as fast.
- auto p = reinterpret_cast<const uint8_t*>(data);
- auto n = static_cast<uint32_t>(length);
- if (n <= 8) {
- if (n <= 3) {
- if (n == 0) {
- return 1U;
- }
- uint32_t x = (n << 24) ^ (p[0] << 16) ^ (p[n / 2] << 8) ^ p[n - 1];
- return ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
- }
- // 4 <= length <= 8
- // We can read the string as two overlapping 32-bit ints, apply
- // different hash functions to each of them in parallel, then XOR
- // the results
- uint32_t x, y;
- hash_t hx, hy;
- x = util::SafeLoadAs<uint32_t>(p + n - 4);
- y = util::SafeLoadAs<uint32_t>(p);
- hx = ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
- hy = ScalarHelper<uint32_t, AlgNum ^ 1>::ComputeHash(y);
- return n ^ hx ^ hy;
- }
- // 8 <= length <= 16
- // Apply the same principle as above
- uint64_t x, y;
- hash_t hx, hy;
- x = util::SafeLoadAs<uint64_t>(p + n - 8);
- y = util::SafeLoadAs<uint64_t>(p);
- hx = ScalarHelper<uint64_t, AlgNum>::ComputeHash(x);
- hy = ScalarHelper<uint64_t, AlgNum ^ 1>::ComputeHash(y);
- return n ^ hx ^ hy;
- }
-
-#if XXH3_SECRET_SIZE_MIN != 136
-#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
-#endif
-
- // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
- // Instead, we use hard-coded random secrets. To maximize cache efficiency,
- // they reuse the same memory area.
- static constexpr unsigned char kXxh3Secrets[XXH3_SECRET_SIZE_MIN + 1] = {
- 0xe7, 0x8b, 0x13, 0xf9, 0xfc, 0xb5, 0x8e, 0xef, 0x81, 0x48, 0x2c, 0xbf, 0xf9, 0x9f,
- 0xc1, 0x1e, 0x43, 0x6d, 0xbf, 0xa6, 0x6d, 0xb5, 0x72, 0xbc, 0x97, 0xd8, 0x61, 0x24,
- 0x0f, 0x12, 0xe3, 0x05, 0x21, 0xf7, 0x5c, 0x66, 0x67, 0xa5, 0x65, 0x03, 0x96, 0x26,
- 0x69, 0xd8, 0x29, 0x20, 0xf8, 0xc7, 0xb0, 0x3d, 0xdd, 0x7d, 0x18, 0xa0, 0x60, 0x75,
- 0x92, 0xa4, 0xce, 0xba, 0xc0, 0x77, 0xf4, 0xac, 0xb7, 0x03, 0x53, 0xf0, 0x98, 0xce,
- 0xe6, 0x2b, 0x20, 0xc7, 0x82, 0x91, 0xab, 0xbf, 0x68, 0x5c, 0x62, 0x4d, 0x33, 0xa3,
- 0xe1, 0xb3, 0xff, 0x97, 0x54, 0x4c, 0x44, 0x34, 0xb5, 0xb9, 0x32, 0x4c, 0x75, 0x42,
- 0x89, 0x53, 0x94, 0xd4, 0x9f, 0x2b, 0x76, 0x4d, 0x4e, 0xe6, 0xfa, 0x15, 0x3e, 0xc1,
- 0xdb, 0x71, 0x4b, 0x2c, 0x94, 0xf5, 0xfc, 0x8c, 0x89, 0x4b, 0xfb, 0xc1, 0x82, 0xa5,
- 0x6a, 0x53, 0xf9, 0x4a, 0xba, 0xce, 0x1f, 0xc0, 0x97, 0x1a, 0x87};
-
- static_assert(AlgNum < 2, "AlgNum too large");
- static constexpr auto secret = kXxh3Secrets + AlgNum;
- return XXH3_64bits_withSecret(data, static_cast<size_t>(length), secret,
- XXH3_SECRET_SIZE_MIN);
-}
-
-// XXX add a HashEq<ArrowType> struct with both hash and compare functions?
-
-// ----------------------------------------------------------------------
-// An open-addressing insert-only hash table (no deletes)
-
-template <typename Payload>
-class HashTable {
- public:
- static constexpr hash_t kSentinel = 0ULL;
- static constexpr int64_t kLoadFactor = 2UL;
-
- struct Entry {
- hash_t h;
- Payload payload;
-
- // An entry is valid if the hash is different from the sentinel value
- operator bool() const { return h != kSentinel; }
- };
-
- HashTable(MemoryPool* pool, uint64_t capacity) : entries_builder_(pool) {
- DCHECK_NE(pool, nullptr);
- // Minimum of 32 elements
- capacity = std::max<uint64_t>(capacity, 32UL);
- capacity_ = BitUtil::NextPower2(capacity);
- capacity_mask_ = capacity_ - 1;
- size_ = 0;
-
- DCHECK_OK(UpsizeBuffer(capacity_));
- }
-
- // Lookup with non-linear probing
- // cmp_func should have signature bool(const Payload*).
- // Return a (Entry*, found) pair.
- template <typename CmpFunc>
- std::pair<Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) {
- auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
- std::forward<CmpFunc>(cmp_func));
- return {&entries_[p.first], p.second};
- }
-
- template <typename CmpFunc>
- std::pair<const Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) const {
- auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
- std::forward<CmpFunc>(cmp_func));
- return {&entries_[p.first], p.second};
- }
-
- Status Insert(Entry* entry, hash_t h, const Payload& payload) {
- // Ensure entry is empty before inserting
- assert(!*entry);
- entry->h = FixHash(h);
- entry->payload = payload;
- ++size_;
-
- if (ARROW_PREDICT_FALSE(NeedUpsizing())) {
- // Resize less frequently since it is expensive
- return Upsize(capacity_ * kLoadFactor * 2);
- }
- return Status::OK();
- }
-
- uint64_t size() const { return size_; }
-
- // Visit all non-empty entries in the table
- // The visit_func should have signature void(const Entry*)
- template <typename VisitFunc>
- void VisitEntries(VisitFunc&& visit_func) const {
- for (uint64_t i = 0; i < capacity_; i++) {
- const auto& entry = entries_[i];
- if (entry) {
- visit_func(&entry);
- }
- }
- }
-
- protected:
- // NoCompare is for when the value is known not to exist in the table
- enum CompareKind { DoCompare, NoCompare };
-
- // The workhorse lookup function
- template <CompareKind CKind, typename CmpFunc>
- std::pair<uint64_t, bool> Lookup(hash_t h, const Entry* entries, uint64_t size_mask,
- CmpFunc&& cmp_func) const {
- static constexpr uint8_t perturb_shift = 5;
-
- uint64_t index, perturb;
- const Entry* entry;
-
- h = FixHash(h);
- index = h & size_mask;
- perturb = (h >> perturb_shift) + 1U;
-
- while (true) {
- entry = &entries[index];
- if (CompareEntry<CKind, CmpFunc>(h, entry, std::forward<CmpFunc>(cmp_func))) {
- // Found
- return {index, true};
- }
- if (entry->h == kSentinel) {
- // Empty slot
- return {index, false};
- }
-
- // Perturbation logic inspired from CPython's set / dict object.
- // The goal is that all 64 bits of the unmasked hash value eventually
- // participate in the probing sequence, to minimize clustering.
- index = (index + perturb) & size_mask;
- perturb = (perturb >> perturb_shift) + 1U;
- }
- }
-
- template <CompareKind CKind, typename CmpFunc>
- bool CompareEntry(hash_t h, const Entry* entry, CmpFunc&& cmp_func) const {
- if (CKind == NoCompare) {
- return false;
- } else {
- return entry->h == h && cmp_func(&entry->payload);
- }
- }
-
- bool NeedUpsizing() const {
- // Keep the load factor <= 1/2
- return size_ * kLoadFactor >= capacity_;
- }
-
- Status UpsizeBuffer(uint64_t capacity) {
- RETURN_NOT_OK(entries_builder_.Resize(capacity));
- entries_ = entries_builder_.mutable_data();
- memset(static_cast<void*>(entries_), 0, capacity * sizeof(Entry));
-
- return Status::OK();
- }
-
- Status Upsize(uint64_t new_capacity) {
- assert(new_capacity > capacity_);
- uint64_t new_mask = new_capacity - 1;
- assert((new_capacity & new_mask) == 0); // it's a power of two
-
- // Stash old entries and seal builder, effectively resetting the Buffer
- const Entry* old_entries = entries_;
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+
+#define XXH_INLINE_ALL
+
+#include "contrib/libs/xxhash/xxhash.h" // IWYU pragma: keep
+
+namespace arrow {
+namespace internal {
+
+// XXX would it help to have a 32-bit hash value on large datasets?
+typedef uint64_t hash_t;
+
+// Notes about the choice of a hash function.
+// - XXH3 is extremely fast on most data sizes, from small to huge;
+// faster even than HW CRC-based hashing schemes
+// - our custom hash function for tiny values (< 16 bytes) is still
+// significantly faster (~30%), at least on this machine and compiler
+
+template <uint64_t AlgNum>
+inline hash_t ComputeStringHash(const void* data, int64_t length);
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelperBase {
+ static bool CompareScalars(Scalar u, Scalar v) { return u == v; }
+
+ static hash_t ComputeHash(const Scalar& value) {
+ // Generic hash computation for scalars. Simply apply the string hash
+ // to the bit representation of the value.
+
+ // XXX in the case of FP values, we'd like equal values to have the same hash,
+ // even if they have different bit representations...
+ return ComputeStringHash<AlgNum>(&value, sizeof(value));
+ }
+};
+
+template <typename Scalar, uint64_t AlgNum = 0, typename Enable = void>
+struct ScalarHelper : public ScalarHelperBase<Scalar, AlgNum> {};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_integral<Scalar>::value>>
+ : public ScalarHelperBase<Scalar, AlgNum> {
+ // ScalarHelper specialization for integers
+
+ static hash_t ComputeHash(const Scalar& value) {
+ // Faster hash computation for integers.
+
+ // Two of xxhash's prime multipliers (which are chosen for their
+ // bit dispersion properties)
+ static constexpr uint64_t multipliers[] = {11400714785074694791ULL,
+ 14029467366897019727ULL};
+
+ // Multiplying by the prime number mixes the low bits into the high bits,
+ // then byte-swapping (which is a single CPU instruction) allows the
+ // combined high and low bits to participate in the initial hash table index.
+ auto h = static_cast<hash_t>(value);
+ return BitUtil::ByteSwap(multipliers[AlgNum] * h);
+ }
+};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum,
+ enable_if_t<std::is_same<util::string_view, Scalar>::value>>
+ : public ScalarHelperBase<Scalar, AlgNum> {
+ // ScalarHelper specialization for util::string_view
+
+ static hash_t ComputeHash(const util::string_view& value) {
+ return ComputeStringHash<AlgNum>(value.data(), static_cast<int64_t>(value.size()));
+ }
+};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_floating_point<Scalar>::value>>
+ : public ScalarHelperBase<Scalar, AlgNum> {
+ // ScalarHelper specialization for reals
+
+ static bool CompareScalars(Scalar u, Scalar v) {
+ if (std::isnan(u)) {
+ // XXX should we do a bit-precise comparison?
+ return std::isnan(v);
+ }
+ return u == v;
+ }
+};
+
+template <uint64_t AlgNum = 0>
+hash_t ComputeStringHash(const void* data, int64_t length) {
+ if (ARROW_PREDICT_TRUE(length <= 16)) {
+ // Specialize for small hash strings, as they are quite common as
+ // hash table keys. Even XXH3 isn't quite as fast.
+ auto p = reinterpret_cast<const uint8_t*>(data);
+ auto n = static_cast<uint32_t>(length);
+ if (n <= 8) {
+ if (n <= 3) {
+ if (n == 0) {
+ return 1U;
+ }
+ uint32_t x = (n << 24) ^ (p[0] << 16) ^ (p[n / 2] << 8) ^ p[n - 1];
+ return ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
+ }
+ // 4 <= length <= 8
+ // We can read the string as two overlapping 32-bit ints, apply
+ // different hash functions to each of them in parallel, then XOR
+ // the results
+ uint32_t x, y;
+ hash_t hx, hy;
+ x = util::SafeLoadAs<uint32_t>(p + n - 4);
+ y = util::SafeLoadAs<uint32_t>(p);
+ hx = ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
+ hy = ScalarHelper<uint32_t, AlgNum ^ 1>::ComputeHash(y);
+ return n ^ hx ^ hy;
+ }
+ // 8 <= length <= 16
+ // Apply the same principle as above
+ uint64_t x, y;
+ hash_t hx, hy;
+ x = util::SafeLoadAs<uint64_t>(p + n - 8);
+ y = util::SafeLoadAs<uint64_t>(p);
+ hx = ScalarHelper<uint64_t, AlgNum>::ComputeHash(x);
+ hy = ScalarHelper<uint64_t, AlgNum ^ 1>::ComputeHash(y);
+ return n ^ hx ^ hy;
+ }
+
+#if XXH3_SECRET_SIZE_MIN != 136
+#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
+#endif
+
+ // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
+ // Instead, we use hard-coded random secrets. To maximize cache efficiency,
+ // they reuse the same memory area.
+ static constexpr unsigned char kXxh3Secrets[XXH3_SECRET_SIZE_MIN + 1] = {
+ 0xe7, 0x8b, 0x13, 0xf9, 0xfc, 0xb5, 0x8e, 0xef, 0x81, 0x48, 0x2c, 0xbf, 0xf9, 0x9f,
+ 0xc1, 0x1e, 0x43, 0x6d, 0xbf, 0xa6, 0x6d, 0xb5, 0x72, 0xbc, 0x97, 0xd8, 0x61, 0x24,
+ 0x0f, 0x12, 0xe3, 0x05, 0x21, 0xf7, 0x5c, 0x66, 0x67, 0xa5, 0x65, 0x03, 0x96, 0x26,
+ 0x69, 0xd8, 0x29, 0x20, 0xf8, 0xc7, 0xb0, 0x3d, 0xdd, 0x7d, 0x18, 0xa0, 0x60, 0x75,
+ 0x92, 0xa4, 0xce, 0xba, 0xc0, 0x77, 0xf4, 0xac, 0xb7, 0x03, 0x53, 0xf0, 0x98, 0xce,
+ 0xe6, 0x2b, 0x20, 0xc7, 0x82, 0x91, 0xab, 0xbf, 0x68, 0x5c, 0x62, 0x4d, 0x33, 0xa3,
+ 0xe1, 0xb3, 0xff, 0x97, 0x54, 0x4c, 0x44, 0x34, 0xb5, 0xb9, 0x32, 0x4c, 0x75, 0x42,
+ 0x89, 0x53, 0x94, 0xd4, 0x9f, 0x2b, 0x76, 0x4d, 0x4e, 0xe6, 0xfa, 0x15, 0x3e, 0xc1,
+ 0xdb, 0x71, 0x4b, 0x2c, 0x94, 0xf5, 0xfc, 0x8c, 0x89, 0x4b, 0xfb, 0xc1, 0x82, 0xa5,
+ 0x6a, 0x53, 0xf9, 0x4a, 0xba, 0xce, 0x1f, 0xc0, 0x97, 0x1a, 0x87};
+
+ static_assert(AlgNum < 2, "AlgNum too large");
+ static constexpr auto secret = kXxh3Secrets + AlgNum;
+ return XXH3_64bits_withSecret(data, static_cast<size_t>(length), secret,
+ XXH3_SECRET_SIZE_MIN);
+}
+
+// XXX add a HashEq<ArrowType> struct with both hash and compare functions?
+
+// ----------------------------------------------------------------------
+// An open-addressing insert-only hash table (no deletes)
+
+template <typename Payload>
+class HashTable {
+ public:
+ static constexpr hash_t kSentinel = 0ULL;
+ static constexpr int64_t kLoadFactor = 2UL;
+
+ struct Entry {
+ hash_t h;
+ Payload payload;
+
+ // An entry is valid if the hash is different from the sentinel value
+ operator bool() const { return h != kSentinel; }
+ };
+
+ HashTable(MemoryPool* pool, uint64_t capacity) : entries_builder_(pool) {
+ DCHECK_NE(pool, nullptr);
+ // Minimum of 32 elements
+ capacity = std::max<uint64_t>(capacity, 32UL);
+ capacity_ = BitUtil::NextPower2(capacity);
+ capacity_mask_ = capacity_ - 1;
+ size_ = 0;
+
+ DCHECK_OK(UpsizeBuffer(capacity_));
+ }
+
+ // Lookup with non-linear probing
+ // cmp_func should have signature bool(const Payload*).
+ // Return a (Entry*, found) pair.
+ template <typename CmpFunc>
+ std::pair<Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) {
+ auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
+ std::forward<CmpFunc>(cmp_func));
+ return {&entries_[p.first], p.second};
+ }
+
+ template <typename CmpFunc>
+ std::pair<const Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) const {
+ auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
+ std::forward<CmpFunc>(cmp_func));
+ return {&entries_[p.first], p.second};
+ }
+
+ Status Insert(Entry* entry, hash_t h, const Payload& payload) {
+ // Ensure entry is empty before inserting
+ assert(!*entry);
+ entry->h = FixHash(h);
+ entry->payload = payload;
+ ++size_;
+
+ if (ARROW_PREDICT_FALSE(NeedUpsizing())) {
+ // Resize less frequently since it is expensive
+ return Upsize(capacity_ * kLoadFactor * 2);
+ }
+ return Status::OK();
+ }
+
+ uint64_t size() const { return size_; }
+
+ // Visit all non-empty entries in the table
+ // The visit_func should have signature void(const Entry*)
+ template <typename VisitFunc>
+ void VisitEntries(VisitFunc&& visit_func) const {
+ for (uint64_t i = 0; i < capacity_; i++) {
+ const auto& entry = entries_[i];
+ if (entry) {
+ visit_func(&entry);
+ }
+ }
+ }
+
+ protected:
+ // NoCompare is for when the value is known not to exist in the table
+ enum CompareKind { DoCompare, NoCompare };
+
+ // The workhorse lookup function
+ template <CompareKind CKind, typename CmpFunc>
+ std::pair<uint64_t, bool> Lookup(hash_t h, const Entry* entries, uint64_t size_mask,
+ CmpFunc&& cmp_func) const {
+ static constexpr uint8_t perturb_shift = 5;
+
+ uint64_t index, perturb;
+ const Entry* entry;
+
+ h = FixHash(h);
+ index = h & size_mask;
+ perturb = (h >> perturb_shift) + 1U;
+
+ while (true) {
+ entry = &entries[index];
+ if (CompareEntry<CKind, CmpFunc>(h, entry, std::forward<CmpFunc>(cmp_func))) {
+ // Found
+ return {index, true};
+ }
+ if (entry->h == kSentinel) {
+ // Empty slot
+ return {index, false};
+ }
+
+ // Perturbation logic inspired from CPython's set / dict object.
+ // The goal is that all 64 bits of the unmasked hash value eventually
+ // participate in the probing sequence, to minimize clustering.
+ index = (index + perturb) & size_mask;
+ perturb = (perturb >> perturb_shift) + 1U;
+ }
+ }
+
+ template <CompareKind CKind, typename CmpFunc>
+ bool CompareEntry(hash_t h, const Entry* entry, CmpFunc&& cmp_func) const {
+ if (CKind == NoCompare) {
+ return false;
+ } else {
+ return entry->h == h && cmp_func(&entry->payload);
+ }
+ }
+
+ bool NeedUpsizing() const {
+ // Keep the load factor <= 1/2
+ return size_ * kLoadFactor >= capacity_;
+ }
+
+ Status UpsizeBuffer(uint64_t capacity) {
+ RETURN_NOT_OK(entries_builder_.Resize(capacity));
+ entries_ = entries_builder_.mutable_data();
+ memset(static_cast<void*>(entries_), 0, capacity * sizeof(Entry));
+
+ return Status::OK();
+ }
+
+ Status Upsize(uint64_t new_capacity) {
+ assert(new_capacity > capacity_);
+ uint64_t new_mask = new_capacity - 1;
+ assert((new_capacity & new_mask) == 0); // it's a power of two
+
+ // Stash old entries and seal builder, effectively resetting the Buffer
+ const Entry* old_entries = entries_;
ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_));
- // Allocate new buffer
- RETURN_NOT_OK(UpsizeBuffer(new_capacity));
-
- for (uint64_t i = 0; i < capacity_; i++) {
- const auto& entry = old_entries[i];
- if (entry) {
- // Dummy compare function will not be called
- auto p = Lookup<NoCompare>(entry.h, entries_, new_mask,
- [](const Payload*) { return false; });
- // Lookup<NoCompare> (and CompareEntry<NoCompare>) ensure that an
- // empty slots is always returned
- assert(!p.second);
- entries_[p.first] = entry;
- }
- }
- capacity_ = new_capacity;
- capacity_mask_ = new_mask;
-
- return Status::OK();
- }
-
- hash_t FixHash(hash_t h) const { return (h == kSentinel) ? 42U : h; }
-
- // The number of slots available in the hash table array.
- uint64_t capacity_;
- uint64_t capacity_mask_;
- // The number of used slots in the hash table array.
- uint64_t size_;
-
- Entry* entries_;
- TypedBufferBuilder<Entry> entries_builder_;
-};
-
-// XXX typedef memo_index_t int32_t ?
-
-constexpr int32_t kKeyNotFound = -1;
-
-// ----------------------------------------------------------------------
-// A base class for memoization table.
-
-class MemoTable {
- public:
- virtual ~MemoTable() = default;
-
- virtual int32_t size() const = 0;
-};
-
-// ----------------------------------------------------------------------
-// A memoization table for memory-cheap scalar values.
-
-// The memoization table remembers and allows to look up the insertion
-// index for each key.
-
-template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
-class ScalarMemoTable : public MemoTable {
- public:
- explicit ScalarMemoTable(MemoryPool* pool, int64_t entries = 0)
- : hash_table_(pool, static_cast<uint64_t>(entries)) {}
-
- int32_t Get(const Scalar& value) const {
- auto cmp_func = [value](const Payload* payload) -> bool {
- return ScalarHelper<Scalar, 0>::CompareScalars(payload->value, value);
- };
- hash_t h = ComputeHash(value);
- auto p = hash_table_.Lookup(h, cmp_func);
- if (p.second) {
- return p.first->payload.memo_index;
- } else {
- return kKeyNotFound;
- }
- }
-
- template <typename Func1, typename Func2>
- Status GetOrInsert(const Scalar& value, Func1&& on_found, Func2&& on_not_found,
- int32_t* out_memo_index) {
- auto cmp_func = [value](const Payload* payload) -> bool {
- return ScalarHelper<Scalar, 0>::CompareScalars(value, payload->value);
- };
- hash_t h = ComputeHash(value);
- auto p = hash_table_.Lookup(h, cmp_func);
- int32_t memo_index;
- if (p.second) {
- memo_index = p.first->payload.memo_index;
- on_found(memo_index);
- } else {
- memo_index = size();
- RETURN_NOT_OK(hash_table_.Insert(p.first, h, {value, memo_index}));
- on_not_found(memo_index);
- }
- *out_memo_index = memo_index;
- return Status::OK();
- }
-
- Status GetOrInsert(const Scalar& value, int32_t* out_memo_index) {
- return GetOrInsert(
- value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
- }
-
- int32_t GetNull() const { return null_index_; }
-
- template <typename Func1, typename Func2>
- int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
- int32_t memo_index = GetNull();
- if (memo_index != kKeyNotFound) {
- on_found(memo_index);
- } else {
- null_index_ = memo_index = size();
- on_not_found(memo_index);
- }
- return memo_index;
- }
-
- int32_t GetOrInsertNull() {
- return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
- }
-
- // The number of entries in the memo table +1 if null was added.
- // (which is also 1 + the largest memo index)
- int32_t size() const override {
- return static_cast<int32_t>(hash_table_.size()) + (GetNull() != kKeyNotFound);
- }
-
- // Copy values starting from index `start` into `out_data`
- void CopyValues(int32_t start, Scalar* out_data) const {
- hash_table_.VisitEntries([=](const HashTableEntry* entry) {
- int32_t index = entry->payload.memo_index - start;
- if (index >= 0) {
- out_data[index] = entry->payload.value;
- }
- });
+ // Allocate new buffer
+ RETURN_NOT_OK(UpsizeBuffer(new_capacity));
+
+ for (uint64_t i = 0; i < capacity_; i++) {
+ const auto& entry = old_entries[i];
+ if (entry) {
+ // Dummy compare function will not be called
+ auto p = Lookup<NoCompare>(entry.h, entries_, new_mask,
+ [](const Payload*) { return false; });
+ // Lookup<NoCompare> (and CompareEntry<NoCompare>) ensure that an
+ // empty slots is always returned
+ assert(!p.second);
+ entries_[p.first] = entry;
+ }
+ }
+ capacity_ = new_capacity;
+ capacity_mask_ = new_mask;
+
+ return Status::OK();
+ }
+
+ hash_t FixHash(hash_t h) const { return (h == kSentinel) ? 42U : h; }
+
+ // The number of slots available in the hash table array.
+ uint64_t capacity_;
+ uint64_t capacity_mask_;
+ // The number of used slots in the hash table array.
+ uint64_t size_;
+
+ Entry* entries_;
+ TypedBufferBuilder<Entry> entries_builder_;
+};
+
+// XXX typedef memo_index_t int32_t ?
+
+constexpr int32_t kKeyNotFound = -1;
+
+// ----------------------------------------------------------------------
+// A base class for memoization table.
+
+class MemoTable {
+ public:
+ virtual ~MemoTable() = default;
+
+ virtual int32_t size() const = 0;
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for memory-cheap scalar values.
+
+// The memoization table remembers and allows to look up the insertion
+// index for each key.
+
+template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
+class ScalarMemoTable : public MemoTable {
+ public:
+ explicit ScalarMemoTable(MemoryPool* pool, int64_t entries = 0)
+ : hash_table_(pool, static_cast<uint64_t>(entries)) {}
+
+ int32_t Get(const Scalar& value) const {
+ auto cmp_func = [value](const Payload* payload) -> bool {
+ return ScalarHelper<Scalar, 0>::CompareScalars(payload->value, value);
+ };
+ hash_t h = ComputeHash(value);
+ auto p = hash_table_.Lookup(h, cmp_func);
+ if (p.second) {
+ return p.first->payload.memo_index;
+ } else {
+ return kKeyNotFound;
+ }
+ }
+
+ template <typename Func1, typename Func2>
+ Status GetOrInsert(const Scalar& value, Func1&& on_found, Func2&& on_not_found,
+ int32_t* out_memo_index) {
+ auto cmp_func = [value](const Payload* payload) -> bool {
+ return ScalarHelper<Scalar, 0>::CompareScalars(value, payload->value);
+ };
+ hash_t h = ComputeHash(value);
+ auto p = hash_table_.Lookup(h, cmp_func);
+ int32_t memo_index;
+ if (p.second) {
+ memo_index = p.first->payload.memo_index;
+ on_found(memo_index);
+ } else {
+ memo_index = size();
+ RETURN_NOT_OK(hash_table_.Insert(p.first, h, {value, memo_index}));
+ on_not_found(memo_index);
+ }
+ *out_memo_index = memo_index;
+ return Status::OK();
+ }
+
+ Status GetOrInsert(const Scalar& value, int32_t* out_memo_index) {
+ return GetOrInsert(
+ value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+ }
+
+ int32_t GetNull() const { return null_index_; }
+
+ template <typename Func1, typename Func2>
+ int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+ int32_t memo_index = GetNull();
+ if (memo_index != kKeyNotFound) {
+ on_found(memo_index);
+ } else {
+ null_index_ = memo_index = size();
+ on_not_found(memo_index);
+ }
+ return memo_index;
+ }
+
+ int32_t GetOrInsertNull() {
+ return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+ }
+
+ // The number of entries in the memo table +1 if null was added.
+ // (which is also 1 + the largest memo index)
+ int32_t size() const override {
+ return static_cast<int32_t>(hash_table_.size()) + (GetNull() != kKeyNotFound);
+ }
+
+ // Copy values starting from index `start` into `out_data`
+ void CopyValues(int32_t start, Scalar* out_data) const {
+ hash_table_.VisitEntries([=](const HashTableEntry* entry) {
+ int32_t index = entry->payload.memo_index - start;
+ if (index >= 0) {
+ out_data[index] = entry->payload.value;
+ }
+ });
// Zero-initialize the null entry
if (null_index_ != kKeyNotFound) {
int32_t index = null_index_ - start;
@@ -467,420 +467,420 @@ class ScalarMemoTable : public MemoTable {
out_data[index] = Scalar{};
}
}
- }
-
- void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
-
- protected:
- struct Payload {
- Scalar value;
- int32_t memo_index;
- };
-
- using HashTableType = HashTableTemplateType<Payload>;
- using HashTableEntry = typename HashTableType::Entry;
- HashTableType hash_table_;
- int32_t null_index_ = kKeyNotFound;
-
- hash_t ComputeHash(const Scalar& value) const {
- return ScalarHelper<Scalar, 0>::ComputeHash(value);
- }
-};
-
-// ----------------------------------------------------------------------
-// A memoization table for small scalar values, using direct indexing
-
-template <typename Scalar, typename Enable = void>
-struct SmallScalarTraits {};
-
-template <>
-struct SmallScalarTraits<bool> {
- static constexpr int32_t cardinality = 2;
-
- static uint32_t AsIndex(bool value) { return value ? 1 : 0; }
-};
-
-template <typename Scalar>
-struct SmallScalarTraits<Scalar, enable_if_t<std::is_integral<Scalar>::value>> {
- using Unsigned = typename std::make_unsigned<Scalar>::type;
-
- static constexpr int32_t cardinality = 1U + std::numeric_limits<Unsigned>::max();
-
- static uint32_t AsIndex(Scalar value) { return static_cast<Unsigned>(value); }
-};
-
-template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
-class SmallScalarMemoTable : public MemoTable {
- public:
- explicit SmallScalarMemoTable(MemoryPool* pool, int64_t entries = 0) {
- std::fill(value_to_index_, value_to_index_ + cardinality + 1, kKeyNotFound);
- index_to_value_.reserve(cardinality);
- }
-
- int32_t Get(const Scalar value) const {
- auto value_index = AsIndex(value);
- return value_to_index_[value_index];
- }
-
- template <typename Func1, typename Func2>
- Status GetOrInsert(const Scalar value, Func1&& on_found, Func2&& on_not_found,
- int32_t* out_memo_index) {
- auto value_index = AsIndex(value);
- auto memo_index = value_to_index_[value_index];
- if (memo_index == kKeyNotFound) {
- memo_index = static_cast<int32_t>(index_to_value_.size());
- index_to_value_.push_back(value);
- value_to_index_[value_index] = memo_index;
- DCHECK_LT(memo_index, cardinality + 1);
- on_not_found(memo_index);
- } else {
- on_found(memo_index);
- }
- *out_memo_index = memo_index;
- return Status::OK();
- }
-
- Status GetOrInsert(const Scalar value, int32_t* out_memo_index) {
- return GetOrInsert(
- value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
- }
-
- int32_t GetNull() const { return value_to_index_[cardinality]; }
-
- template <typename Func1, typename Func2>
- int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
- auto memo_index = GetNull();
- if (memo_index == kKeyNotFound) {
- memo_index = value_to_index_[cardinality] = size();
- index_to_value_.push_back(0);
- on_not_found(memo_index);
- } else {
- on_found(memo_index);
- }
- return memo_index;
- }
-
- int32_t GetOrInsertNull() {
- return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
- }
-
- // The number of entries in the memo table
- // (which is also 1 + the largest memo index)
- int32_t size() const override { return static_cast<int32_t>(index_to_value_.size()); }
-
- // Copy values starting from index `start` into `out_data`
- void CopyValues(int32_t start, Scalar* out_data) const {
- DCHECK_GE(start, 0);
- DCHECK_LE(static_cast<size_t>(start), index_to_value_.size());
- int64_t offset = start * static_cast<int32_t>(sizeof(Scalar));
- memcpy(out_data, index_to_value_.data() + offset, (size() - start) * sizeof(Scalar));
- }
-
- void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
-
- const std::vector<Scalar>& values() const { return index_to_value_; }
-
- protected:
- static constexpr auto cardinality = SmallScalarTraits<Scalar>::cardinality;
- static_assert(cardinality <= 256, "cardinality too large for direct-addressed table");
-
- uint32_t AsIndex(Scalar value) const {
- return SmallScalarTraits<Scalar>::AsIndex(value);
- }
-
- // The last index is reserved for the null element.
- int32_t value_to_index_[cardinality + 1];
- std::vector<Scalar> index_to_value_;
-};
-
-// ----------------------------------------------------------------------
-// A memoization table for variable-sized binary data.
-
-template <typename BinaryBuilderT>
-class BinaryMemoTable : public MemoTable {
- public:
- using builder_offset_type = typename BinaryBuilderT::offset_type;
- explicit BinaryMemoTable(MemoryPool* pool, int64_t entries = 0,
- int64_t values_size = -1)
- : hash_table_(pool, static_cast<uint64_t>(entries)), binary_builder_(pool) {
- const int64_t data_size = (values_size < 0) ? entries * 4 : values_size;
- DCHECK_OK(binary_builder_.Resize(entries));
- DCHECK_OK(binary_builder_.ReserveData(data_size));
- }
-
- int32_t Get(const void* data, builder_offset_type length) const {
- hash_t h = ComputeStringHash<0>(data, length);
- auto p = Lookup(h, data, length);
- if (p.second) {
- return p.first->payload.memo_index;
- } else {
- return kKeyNotFound;
- }
- }
-
- int32_t Get(const util::string_view& value) const {
- return Get(value.data(), static_cast<builder_offset_type>(value.length()));
- }
-
- template <typename Func1, typename Func2>
- Status GetOrInsert(const void* data, builder_offset_type length, Func1&& on_found,
- Func2&& on_not_found, int32_t* out_memo_index) {
- hash_t h = ComputeStringHash<0>(data, length);
- auto p = Lookup(h, data, length);
- int32_t memo_index;
- if (p.second) {
- memo_index = p.first->payload.memo_index;
- on_found(memo_index);
- } else {
- memo_index = size();
- // Insert string value
- RETURN_NOT_OK(binary_builder_.Append(static_cast<const char*>(data), length));
- // Insert hash entry
- RETURN_NOT_OK(
- hash_table_.Insert(const_cast<HashTableEntry*>(p.first), h, {memo_index}));
-
- on_not_found(memo_index);
- }
- *out_memo_index = memo_index;
- return Status::OK();
- }
-
- template <typename Func1, typename Func2>
- Status GetOrInsert(const util::string_view& value, Func1&& on_found,
- Func2&& on_not_found, int32_t* out_memo_index) {
- return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
- std::forward<Func1>(on_found), std::forward<Func2>(on_not_found),
- out_memo_index);
- }
-
- Status GetOrInsert(const void* data, builder_offset_type length,
- int32_t* out_memo_index) {
- return GetOrInsert(
- data, length, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
- }
-
- Status GetOrInsert(const util::string_view& value, int32_t* out_memo_index) {
- return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
- out_memo_index);
- }
-
- int32_t GetNull() const { return null_index_; }
-
- template <typename Func1, typename Func2>
- int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
- int32_t memo_index = GetNull();
- if (memo_index == kKeyNotFound) {
- memo_index = null_index_ = size();
- DCHECK_OK(binary_builder_.AppendNull());
- on_not_found(memo_index);
- } else {
- on_found(memo_index);
- }
- return memo_index;
- }
-
- int32_t GetOrInsertNull() {
- return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
- }
-
- // The number of entries in the memo table
- // (which is also 1 + the largest memo index)
- int32_t size() const override {
- return static_cast<int32_t>(hash_table_.size() + (GetNull() != kKeyNotFound));
- }
-
- int64_t values_size() const { return binary_builder_.value_data_length(); }
-
- // Copy (n + 1) offsets starting from index `start` into `out_data`
- template <class Offset>
- void CopyOffsets(int32_t start, Offset* out_data) const {
- DCHECK_LE(start, size());
-
- const builder_offset_type* offsets = binary_builder_.offsets_data();
+ }
+
+ void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
+
+ protected:
+ struct Payload {
+ Scalar value;
+ int32_t memo_index;
+ };
+
+ using HashTableType = HashTableTemplateType<Payload>;
+ using HashTableEntry = typename HashTableType::Entry;
+ HashTableType hash_table_;
+ int32_t null_index_ = kKeyNotFound;
+
+ hash_t ComputeHash(const Scalar& value) const {
+ return ScalarHelper<Scalar, 0>::ComputeHash(value);
+ }
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for small scalar values, using direct indexing
+
+template <typename Scalar, typename Enable = void>
+struct SmallScalarTraits {};
+
+template <>
+struct SmallScalarTraits<bool> {
+ static constexpr int32_t cardinality = 2;
+
+ static uint32_t AsIndex(bool value) { return value ? 1 : 0; }
+};
+
+template <typename Scalar>
+struct SmallScalarTraits<Scalar, enable_if_t<std::is_integral<Scalar>::value>> {
+ using Unsigned = typename std::make_unsigned<Scalar>::type;
+
+ static constexpr int32_t cardinality = 1U + std::numeric_limits<Unsigned>::max();
+
+ static uint32_t AsIndex(Scalar value) { return static_cast<Unsigned>(value); }
+};
+
+template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
+class SmallScalarMemoTable : public MemoTable {
+ public:
+ explicit SmallScalarMemoTable(MemoryPool* pool, int64_t entries = 0) {
+ std::fill(value_to_index_, value_to_index_ + cardinality + 1, kKeyNotFound);
+ index_to_value_.reserve(cardinality);
+ }
+
+ int32_t Get(const Scalar value) const {
+ auto value_index = AsIndex(value);
+ return value_to_index_[value_index];
+ }
+
+ template <typename Func1, typename Func2>
+ Status GetOrInsert(const Scalar value, Func1&& on_found, Func2&& on_not_found,
+ int32_t* out_memo_index) {
+ auto value_index = AsIndex(value);
+ auto memo_index = value_to_index_[value_index];
+ if (memo_index == kKeyNotFound) {
+ memo_index = static_cast<int32_t>(index_to_value_.size());
+ index_to_value_.push_back(value);
+ value_to_index_[value_index] = memo_index;
+ DCHECK_LT(memo_index, cardinality + 1);
+ on_not_found(memo_index);
+ } else {
+ on_found(memo_index);
+ }
+ *out_memo_index = memo_index;
+ return Status::OK();
+ }
+
+ Status GetOrInsert(const Scalar value, int32_t* out_memo_index) {
+ return GetOrInsert(
+ value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+ }
+
+ int32_t GetNull() const { return value_to_index_[cardinality]; }
+
+ template <typename Func1, typename Func2>
+ int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+ auto memo_index = GetNull();
+ if (memo_index == kKeyNotFound) {
+ memo_index = value_to_index_[cardinality] = size();
+ index_to_value_.push_back(0);
+ on_not_found(memo_index);
+ } else {
+ on_found(memo_index);
+ }
+ return memo_index;
+ }
+
+ int32_t GetOrInsertNull() {
+ return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+ }
+
+ // The number of entries in the memo table
+ // (which is also 1 + the largest memo index)
+ int32_t size() const override { return static_cast<int32_t>(index_to_value_.size()); }
+
+ // Copy values starting from index `start` into `out_data`
+ void CopyValues(int32_t start, Scalar* out_data) const {
+ DCHECK_GE(start, 0);
+ DCHECK_LE(static_cast<size_t>(start), index_to_value_.size());
+ int64_t offset = start * static_cast<int32_t>(sizeof(Scalar));
+ memcpy(out_data, index_to_value_.data() + offset, (size() - start) * sizeof(Scalar));
+ }
+
+ void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
+
+ const std::vector<Scalar>& values() const { return index_to_value_; }
+
+ protected:
+ static constexpr auto cardinality = SmallScalarTraits<Scalar>::cardinality;
+ static_assert(cardinality <= 256, "cardinality too large for direct-addressed table");
+
+ uint32_t AsIndex(Scalar value) const {
+ return SmallScalarTraits<Scalar>::AsIndex(value);
+ }
+
+ // The last index is reserved for the null element.
+ int32_t value_to_index_[cardinality + 1];
+ std::vector<Scalar> index_to_value_;
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for variable-sized binary data.
+
+template <typename BinaryBuilderT>
+class BinaryMemoTable : public MemoTable {
+ public:
+ using builder_offset_type = typename BinaryBuilderT::offset_type;
+ explicit BinaryMemoTable(MemoryPool* pool, int64_t entries = 0,
+ int64_t values_size = -1)
+ : hash_table_(pool, static_cast<uint64_t>(entries)), binary_builder_(pool) {
+ const int64_t data_size = (values_size < 0) ? entries * 4 : values_size;
+ DCHECK_OK(binary_builder_.Resize(entries));
+ DCHECK_OK(binary_builder_.ReserveData(data_size));
+ }
+
+ int32_t Get(const void* data, builder_offset_type length) const {
+ hash_t h = ComputeStringHash<0>(data, length);
+ auto p = Lookup(h, data, length);
+ if (p.second) {
+ return p.first->payload.memo_index;
+ } else {
+ return kKeyNotFound;
+ }
+ }
+
+ int32_t Get(const util::string_view& value) const {
+ return Get(value.data(), static_cast<builder_offset_type>(value.length()));
+ }
+
+ template <typename Func1, typename Func2>
+ Status GetOrInsert(const void* data, builder_offset_type length, Func1&& on_found,
+ Func2&& on_not_found, int32_t* out_memo_index) {
+ hash_t h = ComputeStringHash<0>(data, length);
+ auto p = Lookup(h, data, length);
+ int32_t memo_index;
+ if (p.second) {
+ memo_index = p.first->payload.memo_index;
+ on_found(memo_index);
+ } else {
+ memo_index = size();
+ // Insert string value
+ RETURN_NOT_OK(binary_builder_.Append(static_cast<const char*>(data), length));
+ // Insert hash entry
+ RETURN_NOT_OK(
+ hash_table_.Insert(const_cast<HashTableEntry*>(p.first), h, {memo_index}));
+
+ on_not_found(memo_index);
+ }
+ *out_memo_index = memo_index;
+ return Status::OK();
+ }
+
+ template <typename Func1, typename Func2>
+ Status GetOrInsert(const util::string_view& value, Func1&& on_found,
+ Func2&& on_not_found, int32_t* out_memo_index) {
+ return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
+ std::forward<Func1>(on_found), std::forward<Func2>(on_not_found),
+ out_memo_index);
+ }
+
+ Status GetOrInsert(const void* data, builder_offset_type length,
+ int32_t* out_memo_index) {
+ return GetOrInsert(
+ data, length, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+ }
+
+ Status GetOrInsert(const util::string_view& value, int32_t* out_memo_index) {
+ return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
+ out_memo_index);
+ }
+
+ int32_t GetNull() const { return null_index_; }
+
+ template <typename Func1, typename Func2>
+ int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+ int32_t memo_index = GetNull();
+ if (memo_index == kKeyNotFound) {
+ memo_index = null_index_ = size();
+ DCHECK_OK(binary_builder_.AppendNull());
+ on_not_found(memo_index);
+ } else {
+ on_found(memo_index);
+ }
+ return memo_index;
+ }
+
+ int32_t GetOrInsertNull() {
+ return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+ }
+
+ // The number of entries in the memo table
+ // (which is also 1 + the largest memo index)
+ int32_t size() const override {
+ return static_cast<int32_t>(hash_table_.size() + (GetNull() != kKeyNotFound));
+ }
+
+ int64_t values_size() const { return binary_builder_.value_data_length(); }
+
+ // Copy (n + 1) offsets starting from index `start` into `out_data`
+ template <class Offset>
+ void CopyOffsets(int32_t start, Offset* out_data) const {
+ DCHECK_LE(start, size());
+
+ const builder_offset_type* offsets = binary_builder_.offsets_data();
const builder_offset_type delta =
start < binary_builder_.length() ? offsets[start] : 0;
- for (int32_t i = start; i < size(); ++i) {
- const builder_offset_type adjusted_offset = offsets[i] - delta;
- Offset cast_offset = static_cast<Offset>(adjusted_offset);
- assert(static_cast<builder_offset_type>(cast_offset) ==
- adjusted_offset); // avoid truncation
- *out_data++ = cast_offset;
- }
-
- // Copy last value since BinaryBuilder only materializes it on in Finish()
- *out_data = static_cast<Offset>(binary_builder_.value_data_length() - delta);
- }
-
- template <class Offset>
- void CopyOffsets(Offset* out_data) const {
- CopyOffsets(0, out_data);
- }
-
- // Copy values starting from index `start` into `out_data`
- void CopyValues(int32_t start, uint8_t* out_data) const {
- CopyValues(start, -1, out_data);
- }
-
- // Same as above, but check output size in debug mode
- void CopyValues(int32_t start, int64_t out_size, uint8_t* out_data) const {
- DCHECK_LE(start, size());
-
- // The absolute byte offset of `start` value in the binary buffer.
- const builder_offset_type offset = binary_builder_.offset(start);
- const auto length = binary_builder_.value_data_length() - static_cast<size_t>(offset);
-
- if (out_size != -1) {
- assert(static_cast<int64_t>(length) <= out_size);
- }
-
- auto view = binary_builder_.GetView(start);
- memcpy(out_data, view.data(), length);
- }
-
- void CopyValues(uint8_t* out_data) const { CopyValues(0, -1, out_data); }
-
- void CopyValues(int64_t out_size, uint8_t* out_data) const {
- CopyValues(0, out_size, out_data);
- }
-
- void CopyFixedWidthValues(int32_t start, int32_t width_size, int64_t out_size,
- uint8_t* out_data) const {
- // This method exists to cope with the fact that the BinaryMemoTable does
- // not know the fixed width when inserting the null value. The data
- // buffer hold a zero length string for the null value (if found).
- //
- // Thus, the method will properly inject an empty value of the proper width
- // in the output buffer.
- //
- if (start >= size()) {
- return;
- }
-
- int32_t null_index = GetNull();
- if (null_index < start) {
- // Nothing to skip, proceed as usual.
- CopyValues(start, out_size, out_data);
- return;
- }
-
- builder_offset_type left_offset = binary_builder_.offset(start);
-
- // Ensure that the data length is exactly missing width_size bytes to fit
- // in the expected output (n_values * width_size).
-#ifndef NDEBUG
- int64_t data_length = values_size() - static_cast<size_t>(left_offset);
- assert(data_length + width_size == out_size);
- ARROW_UNUSED(data_length);
-#endif
-
- auto in_data = binary_builder_.value_data() + left_offset;
- // The null use 0-length in the data, slice the data in 2 and skip by
- // width_size in out_data. [part_1][width_size][part_2]
- auto null_data_offset = binary_builder_.offset(null_index);
- auto left_size = null_data_offset - left_offset;
- if (left_size > 0) {
- memcpy(out_data, in_data + left_offset, left_size);
- }
+ for (int32_t i = start; i < size(); ++i) {
+ const builder_offset_type adjusted_offset = offsets[i] - delta;
+ Offset cast_offset = static_cast<Offset>(adjusted_offset);
+ assert(static_cast<builder_offset_type>(cast_offset) ==
+ adjusted_offset); // avoid truncation
+ *out_data++ = cast_offset;
+ }
+
+ // Copy last value since BinaryBuilder only materializes it on in Finish()
+ *out_data = static_cast<Offset>(binary_builder_.value_data_length() - delta);
+ }
+
+ template <class Offset>
+ void CopyOffsets(Offset* out_data) const {
+ CopyOffsets(0, out_data);
+ }
+
+ // Copy values starting from index `start` into `out_data`
+ void CopyValues(int32_t start, uint8_t* out_data) const {
+ CopyValues(start, -1, out_data);
+ }
+
+ // Same as above, but check output size in debug mode
+ void CopyValues(int32_t start, int64_t out_size, uint8_t* out_data) const {
+ DCHECK_LE(start, size());
+
+ // The absolute byte offset of `start` value in the binary buffer.
+ const builder_offset_type offset = binary_builder_.offset(start);
+ const auto length = binary_builder_.value_data_length() - static_cast<size_t>(offset);
+
+ if (out_size != -1) {
+ assert(static_cast<int64_t>(length) <= out_size);
+ }
+
+ auto view = binary_builder_.GetView(start);
+ memcpy(out_data, view.data(), length);
+ }
+
+ void CopyValues(uint8_t* out_data) const { CopyValues(0, -1, out_data); }
+
+ void CopyValues(int64_t out_size, uint8_t* out_data) const {
+ CopyValues(0, out_size, out_data);
+ }
+
+ void CopyFixedWidthValues(int32_t start, int32_t width_size, int64_t out_size,
+ uint8_t* out_data) const {
+ // This method exists to cope with the fact that the BinaryMemoTable does
+ // not know the fixed width when inserting the null value. The data
+ // buffer hold a zero length string for the null value (if found).
+ //
+ // Thus, the method will properly inject an empty value of the proper width
+ // in the output buffer.
+ //
+ if (start >= size()) {
+ return;
+ }
+
+ int32_t null_index = GetNull();
+ if (null_index < start) {
+ // Nothing to skip, proceed as usual.
+ CopyValues(start, out_size, out_data);
+ return;
+ }
+
+ builder_offset_type left_offset = binary_builder_.offset(start);
+
+ // Ensure that the data length is exactly missing width_size bytes to fit
+ // in the expected output (n_values * width_size).
+#ifndef NDEBUG
+ int64_t data_length = values_size() - static_cast<size_t>(left_offset);
+ assert(data_length + width_size == out_size);
+ ARROW_UNUSED(data_length);
+#endif
+
+ auto in_data = binary_builder_.value_data() + left_offset;
+ // The null use 0-length in the data, slice the data in 2 and skip by
+ // width_size in out_data. [part_1][width_size][part_2]
+ auto null_data_offset = binary_builder_.offset(null_index);
+ auto left_size = null_data_offset - left_offset;
+ if (left_size > 0) {
+ memcpy(out_data, in_data + left_offset, left_size);
+ }
// Zero-initialize the null entry
memset(out_data + left_size, 0, width_size);
-
- auto right_size = values_size() - static_cast<size_t>(null_data_offset);
- if (right_size > 0) {
- // skip the null fixed size value.
- auto out_offset = left_size + width_size;
- assert(out_data + out_offset + right_size == out_data + out_size);
- memcpy(out_data + out_offset, in_data + null_data_offset, right_size);
- }
- }
-
- // Visit the stored values in insertion order.
- // The visitor function should have the signature `void(util::string_view)`
- // or `void(const util::string_view&)`.
- template <typename VisitFunc>
- void VisitValues(int32_t start, VisitFunc&& visit) const {
- for (int32_t i = start; i < size(); ++i) {
- visit(binary_builder_.GetView(i));
- }
- }
-
- protected:
- struct Payload {
- int32_t memo_index;
- };
-
- using HashTableType = HashTable<Payload>;
- using HashTableEntry = typename HashTable<Payload>::Entry;
- HashTableType hash_table_;
- BinaryBuilderT binary_builder_;
-
- int32_t null_index_ = kKeyNotFound;
-
- std::pair<const HashTableEntry*, bool> Lookup(hash_t h, const void* data,
- builder_offset_type length) const {
- auto cmp_func = [=](const Payload* payload) {
- util::string_view lhs = binary_builder_.GetView(payload->memo_index);
- util::string_view rhs(static_cast<const char*>(data), length);
- return lhs == rhs;
- };
- return hash_table_.Lookup(h, cmp_func);
- }
-};
-
-template <typename T, typename Enable = void>
-struct HashTraits {};
-
-template <>
-struct HashTraits<BooleanType> {
- using MemoTableType = SmallScalarMemoTable<bool>;
-};
-
-template <typename T>
-struct HashTraits<T, enable_if_8bit_int<T>> {
- using c_type = typename T::c_type;
- using MemoTableType = SmallScalarMemoTable<typename T::c_type>;
-};
-
-template <typename T>
-struct HashTraits<T, enable_if_t<has_c_type<T>::value && !is_8bit_int<T>::value>> {
- using c_type = typename T::c_type;
- using MemoTableType = ScalarMemoTable<c_type, HashTable>;
-};
-
-template <typename T>
-struct HashTraits<T, enable_if_t<has_string_view<T>::value &&
- !std::is_base_of<LargeBinaryType, T>::value>> {
- using MemoTableType = BinaryMemoTable<BinaryBuilder>;
-};
-
+
+ auto right_size = values_size() - static_cast<size_t>(null_data_offset);
+ if (right_size > 0) {
+ // skip the null fixed size value.
+ auto out_offset = left_size + width_size;
+ assert(out_data + out_offset + right_size == out_data + out_size);
+ memcpy(out_data + out_offset, in_data + null_data_offset, right_size);
+ }
+ }
+
+ // Visit the stored values in insertion order.
+ // The visitor function should have the signature `void(util::string_view)`
+ // or `void(const util::string_view&)`.
+ template <typename VisitFunc>
+ void VisitValues(int32_t start, VisitFunc&& visit) const {
+ for (int32_t i = start; i < size(); ++i) {
+ visit(binary_builder_.GetView(i));
+ }
+ }
+
+ protected:
+ struct Payload {
+ int32_t memo_index;
+ };
+
+ using HashTableType = HashTable<Payload>;
+ using HashTableEntry = typename HashTable<Payload>::Entry;
+ HashTableType hash_table_;
+ BinaryBuilderT binary_builder_;
+
+ int32_t null_index_ = kKeyNotFound;
+
+ std::pair<const HashTableEntry*, bool> Lookup(hash_t h, const void* data,
+ builder_offset_type length) const {
+ auto cmp_func = [=](const Payload* payload) {
+ util::string_view lhs = binary_builder_.GetView(payload->memo_index);
+ util::string_view rhs(static_cast<const char*>(data), length);
+ return lhs == rhs;
+ };
+ return hash_table_.Lookup(h, cmp_func);
+ }
+};
+
+template <typename T, typename Enable = void>
+struct HashTraits {};
+
+template <>
+struct HashTraits<BooleanType> {
+ using MemoTableType = SmallScalarMemoTable<bool>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_8bit_int<T>> {
+ using c_type = typename T::c_type;
+ using MemoTableType = SmallScalarMemoTable<typename T::c_type>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<has_c_type<T>::value && !is_8bit_int<T>::value>> {
+ using c_type = typename T::c_type;
+ using MemoTableType = ScalarMemoTable<c_type, HashTable>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<has_string_view<T>::value &&
+ !std::is_base_of<LargeBinaryType, T>::value>> {
+ using MemoTableType = BinaryMemoTable<BinaryBuilder>;
+};
+
template <typename T>
struct HashTraits<T, enable_if_decimal<T>> {
- using MemoTableType = BinaryMemoTable<BinaryBuilder>;
-};
-
-template <typename T>
-struct HashTraits<T, enable_if_t<std::is_base_of<LargeBinaryType, T>::value>> {
- using MemoTableType = BinaryMemoTable<LargeBinaryBuilder>;
-};
-
-template <typename MemoTableType>
-static inline Status ComputeNullBitmap(MemoryPool* pool, const MemoTableType& memo_table,
- int64_t start_offset, int64_t* null_count,
- std::shared_ptr<Buffer>* null_bitmap) {
- int64_t dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
- int64_t null_index = memo_table.GetNull();
-
- *null_count = 0;
- *null_bitmap = nullptr;
-
- if (null_index != kKeyNotFound && null_index >= start_offset) {
- null_index -= start_offset;
- *null_count = 1;
- ARROW_ASSIGN_OR_RAISE(*null_bitmap,
- internal::BitmapAllButOne(pool, dict_length, null_index));
- }
-
- return Status::OK();
-}
-
-} // namespace internal
-} // namespace arrow
+ using MemoTableType = BinaryMemoTable<BinaryBuilder>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<std::is_base_of<LargeBinaryType, T>::value>> {
+ using MemoTableType = BinaryMemoTable<LargeBinaryBuilder>;
+};
+
+template <typename MemoTableType>
+static inline Status ComputeNullBitmap(MemoryPool* pool, const MemoTableType& memo_table,
+ int64_t start_offset, int64_t* null_count,
+ std::shared_ptr<Buffer>* null_bitmap) {
+ int64_t dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
+ int64_t null_index = memo_table.GetNull();
+
+ *null_count = 0;
+ *null_bitmap = nullptr;
+
+ if (null_index != kKeyNotFound && null_index >= start_offset) {
+ null_index -= start_offset;
+ *null_count = 1;
+ ARROW_ASSIGN_OR_RAISE(*null_bitmap,
+ internal::BitmapAllButOne(pool, dict_length, null_index));
+ }
+
+ return Status::OK();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
index 1d494671a9f..01d13351f91 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
@@ -1,45 +1,45 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
#include "arrow/util/config.h"
-#include "arrow/util/macros.h"
-
+#include "arrow/util/macros.h"
+
#ifndef ARROW_USE_NATIVE_INT128
#include <boost/multiprecision/cpp_int.hpp>
-#endif
-
-namespace arrow {
-namespace internal {
-
-// NOTE: __int128_t and boost::multiprecision::int128_t are not interchangeable.
-// For example, __int128_t does not have any member function, and does not have
-// operator<<(std::ostream, __int128_t). On the other hand, the behavior of
-// boost::multiprecision::int128_t might be surprising with some configs (e.g.,
-// static_cast<uint64_t>(boost::multiprecision::uint128_t) might return
-// ~uint64_t{0} instead of the lower 64 bits of the input).
-// Try to minimize the usage of int128_t and uint128_t.
-#ifdef ARROW_USE_NATIVE_INT128
-using int128_t = __int128_t;
-using uint128_t = __uint128_t;
-#else
-using boost::multiprecision::int128_t;
-using boost::multiprecision::uint128_t;
-#endif
-
-} // namespace internal
-} // namespace arrow
+#endif
+
+namespace arrow {
+namespace internal {
+
+// NOTE: __int128_t and boost::multiprecision::int128_t are not interchangeable.
+// For example, __int128_t does not have any member function, and does not have
+// operator<<(std::ostream, __int128_t). On the other hand, the behavior of
+// boost::multiprecision::int128_t might be surprising with some configs (e.g.,
+// static_cast<uint64_t>(boost::multiprecision::uint128_t) might return
+// ~uint64_t{0} instead of the lower 64 bits of the input).
+// Try to minimize the usage of int128_t and uint128_t.
+#ifdef ARROW_USE_NATIVE_INT128
+using int128_t = __int128_t;
+using uint128_t = __uint128_t;
+#else
+using boost::multiprecision::int128_t;
+using boost::multiprecision::uint128_t;
+#endif
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
index 24c5fe56eff..8388b3e38d1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
@@ -1,471 +1,471 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/int_util.h"
-
-#include <algorithm>
-#include <cstring>
-#include <limits>
-
-#include "arrow/array/data.h"
-#include "arrow/datum.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit_block_counter.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/int_util.h"
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+
+#include "arrow/array/data.h"
+#include "arrow/datum.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/ubsan.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
#include "arrow/visitor_inline.h"
-
-namespace arrow {
-namespace internal {
-
-using internal::checked_cast;
-
-static constexpr uint64_t max_uint8 =
- static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
-static constexpr uint64_t max_uint16 =
- static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
-static constexpr uint64_t max_uint32 =
- static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
-static constexpr uint64_t max_uint64 = std::numeric_limits<uint64_t>::max();
-
-static constexpr uint64_t mask_uint8 = ~0xffULL;
-static constexpr uint64_t mask_uint16 = ~0xffffULL;
-static constexpr uint64_t mask_uint32 = ~0xffffffffULL;
-
-//
-// Unsigned integer width detection
-//
-
-static const uint64_t max_uints[] = {0, max_uint8, max_uint16, 0, max_uint32,
- 0, 0, 0, max_uint64};
-
-// Check if we would need to expand the underlying storage type
+
+namespace arrow {
+namespace internal {
+
+using internal::checked_cast;
+
+static constexpr uint64_t max_uint8 =
+ static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
+static constexpr uint64_t max_uint16 =
+ static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
+static constexpr uint64_t max_uint32 =
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
+static constexpr uint64_t max_uint64 = std::numeric_limits<uint64_t>::max();
+
+static constexpr uint64_t mask_uint8 = ~0xffULL;
+static constexpr uint64_t mask_uint16 = ~0xffffULL;
+static constexpr uint64_t mask_uint32 = ~0xffffffffULL;
+
+//
+// Unsigned integer width detection
+//
+
+static const uint64_t max_uints[] = {0, max_uint8, max_uint16, 0, max_uint32,
+ 0, 0, 0, max_uint64};
+
+// Check if we would need to expand the underlying storage type
static inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) {
- // Optimize for the common case where width doesn't change
- if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
- return current_width;
- }
- if (current_width == 1 && val <= max_uint8) {
- return 1;
- } else if (current_width <= 2 && val <= max_uint16) {
- return 2;
- } else if (current_width <= 4 && val <= max_uint32) {
- return 4;
- } else {
- return 8;
- }
-}
-
-uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width) {
- uint8_t width = min_width;
- if (min_width < 8) {
- auto p = values;
- const auto end = p + length;
- while (p <= end - 16) {
- // This is probably SIMD-izable
- auto u = p[0];
- auto v = p[1];
- auto w = p[2];
- auto x = p[3];
- u |= p[4];
- v |= p[5];
- w |= p[6];
- x |= p[7];
- u |= p[8];
- v |= p[9];
- w |= p[10];
- x |= p[11];
- u |= p[12];
- v |= p[13];
- w |= p[14];
- x |= p[15];
- p += 16;
- width = ExpandedUIntWidth(u | v | w | x, width);
- if (ARROW_PREDICT_FALSE(width == 8)) {
- break;
- }
- }
- if (p <= end - 8) {
- auto u = p[0];
- auto v = p[1];
- auto w = p[2];
- auto x = p[3];
- u |= p[4];
- v |= p[5];
- w |= p[6];
- x |= p[7];
- p += 8;
- width = ExpandedUIntWidth(u | v | w | x, width);
- }
- while (p < end) {
- width = ExpandedUIntWidth(*p++, width);
- }
- }
- return width;
-}
-
-uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
- int64_t length, uint8_t min_width) {
- if (valid_bytes == nullptr) {
- return DetectUIntWidth(values, length, min_width);
- }
- uint8_t width = min_width;
- if (min_width < 8) {
- auto p = values;
- const auto end = p + length;
- auto b = valid_bytes;
-
-#define MASK(p, b, i) p[i] * (b[i] != 0)
-
- while (p <= end - 8) {
- // This is probably be SIMD-izable
- auto u = MASK(p, b, 0);
- auto v = MASK(p, b, 1);
- auto w = MASK(p, b, 2);
- auto x = MASK(p, b, 3);
- u |= MASK(p, b, 4);
- v |= MASK(p, b, 5);
- w |= MASK(p, b, 6);
- x |= MASK(p, b, 7);
- b += 8;
- p += 8;
- width = ExpandedUIntWidth(u | v | w | x, width);
- if (ARROW_PREDICT_FALSE(width == 8)) {
- break;
- }
- }
- uint64_t mask = 0;
- while (p < end) {
- mask |= MASK(p, b, 0);
- ++b;
- ++p;
- }
- width = ExpandedUIntWidth(mask, width);
-
-#undef MASK
- }
- return width;
-}
-
-//
-// Signed integer width detection
-//
-
-uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width) {
- if (min_width == 8) {
- return min_width;
- }
- uint8_t width = min_width;
-
- auto p = values;
- const auto end = p + length;
- // Strategy: to determine whether `x` is between -0x80 and 0x7f,
- // we determine whether `x + 0x80` is between 0x00 and 0xff. The
- // latter can be done with a simple AND mask with ~0xff and, more
- // importantly, can be computed in a single step over multiple ORed
- // values (so we can branch once every N items instead of once every item).
- // This strategy could probably lend itself to explicit SIMD-ization,
- // if more performance is needed.
- constexpr uint64_t addend8 = 0x80ULL;
- constexpr uint64_t addend16 = 0x8000ULL;
- constexpr uint64_t addend32 = 0x80000000ULL;
-
- auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
- auto v = *p++;
- if (ARROW_PREDICT_FALSE(((v + addend) & test_mask) != 0)) {
- --p;
- return false;
- } else {
- return true;
- }
- };
-
- auto test_four_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
- auto mask = (p[0] + addend) | (p[1] + addend) | (p[2] + addend) | (p[3] + addend);
- p += 4;
- if (ARROW_PREDICT_FALSE((mask & test_mask) != 0)) {
- p -= 4;
- return false;
- } else {
- return true;
- }
- };
-
- if (width == 1) {
- while (p <= end - 4) {
- if (!test_four_items(addend8, mask_uint8)) {
- width = 2;
- goto width2;
- }
- }
- while (p < end) {
- if (!test_one_item(addend8, mask_uint8)) {
- width = 2;
- goto width2;
- }
- }
- return 1;
- }
-width2:
- if (width == 2) {
- while (p <= end - 4) {
- if (!test_four_items(addend16, mask_uint16)) {
- width = 4;
- goto width4;
- }
- }
- while (p < end) {
- if (!test_one_item(addend16, mask_uint16)) {
- width = 4;
- goto width4;
- }
- }
- return 2;
- }
-width4:
- if (width == 4) {
- while (p <= end - 4) {
- if (!test_four_items(addend32, mask_uint32)) {
- width = 8;
- goto width8;
- }
- }
- while (p < end) {
- if (!test_one_item(addend32, mask_uint32)) {
- width = 8;
- goto width8;
- }
- }
- return 4;
- }
-width8:
- return 8;
-}
-
-uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
- uint8_t min_width) {
- if (valid_bytes == nullptr) {
- return DetectIntWidth(values, length, min_width);
- }
-
- if (min_width == 8) {
- return min_width;
- }
- uint8_t width = min_width;
-
- auto p = values;
- const auto end = p + length;
- auto b = valid_bytes;
- // Strategy is similar to the no-nulls case above, but we also
- // have to zero any incoming items that have a zero validity byte.
- constexpr uint64_t addend8 = 0x80ULL;
- constexpr uint64_t addend16 = 0x8000ULL;
- constexpr uint64_t addend32 = 0x80000000ULL;
-
-#define MASK(p, b, addend, i) (p[i] + addend) * (b[i] != 0)
-
- auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
- auto v = MASK(p, b, addend, 0);
- ++b;
- ++p;
- if (ARROW_PREDICT_FALSE((v & test_mask) != 0)) {
- --b;
- --p;
- return false;
- } else {
- return true;
- }
- };
-
- auto test_eight_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
- auto mask1 = MASK(p, b, addend, 0) | MASK(p, b, addend, 1) | MASK(p, b, addend, 2) |
- MASK(p, b, addend, 3);
- auto mask2 = MASK(p, b, addend, 4) | MASK(p, b, addend, 5) | MASK(p, b, addend, 6) |
- MASK(p, b, addend, 7);
- b += 8;
- p += 8;
- if (ARROW_PREDICT_FALSE(((mask1 | mask2) & test_mask) != 0)) {
- b -= 8;
- p -= 8;
- return false;
- } else {
- return true;
- }
- };
-
-#undef MASK
-
- if (width == 1) {
- while (p <= end - 8) {
- if (!test_eight_items(addend8, mask_uint8)) {
- width = 2;
- goto width2;
- }
- }
- while (p < end) {
- if (!test_one_item(addend8, mask_uint8)) {
- width = 2;
- goto width2;
- }
- }
- return 1;
- }
-width2:
- if (width == 2) {
- while (p <= end - 8) {
- if (!test_eight_items(addend16, mask_uint16)) {
- width = 4;
- goto width4;
- }
- }
- while (p < end) {
- if (!test_one_item(addend16, mask_uint16)) {
- width = 4;
- goto width4;
- }
- }
- return 2;
- }
-width4:
- if (width == 4) {
- while (p <= end - 8) {
- if (!test_eight_items(addend32, mask_uint32)) {
- width = 8;
- goto width8;
- }
- }
- while (p < end) {
- if (!test_one_item(addend32, mask_uint32)) {
- width = 8;
- goto width8;
- }
- }
- return 4;
- }
-width8:
- return 8;
-}
-
-template <typename Source, typename Dest>
+ // Optimize for the common case where width doesn't change
+ if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
+ return current_width;
+ }
+ if (current_width == 1 && val <= max_uint8) {
+ return 1;
+ } else if (current_width <= 2 && val <= max_uint16) {
+ return 2;
+ } else if (current_width <= 4 && val <= max_uint32) {
+ return 4;
+ } else {
+ return 8;
+ }
+}
+
+uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width) {
+ uint8_t width = min_width;
+ if (min_width < 8) {
+ auto p = values;
+ const auto end = p + length;
+ while (p <= end - 16) {
+ // This is probably SIMD-izable
+ auto u = p[0];
+ auto v = p[1];
+ auto w = p[2];
+ auto x = p[3];
+ u |= p[4];
+ v |= p[5];
+ w |= p[6];
+ x |= p[7];
+ u |= p[8];
+ v |= p[9];
+ w |= p[10];
+ x |= p[11];
+ u |= p[12];
+ v |= p[13];
+ w |= p[14];
+ x |= p[15];
+ p += 16;
+ width = ExpandedUIntWidth(u | v | w | x, width);
+ if (ARROW_PREDICT_FALSE(width == 8)) {
+ break;
+ }
+ }
+ if (p <= end - 8) {
+ auto u = p[0];
+ auto v = p[1];
+ auto w = p[2];
+ auto x = p[3];
+ u |= p[4];
+ v |= p[5];
+ w |= p[6];
+ x |= p[7];
+ p += 8;
+ width = ExpandedUIntWidth(u | v | w | x, width);
+ }
+ while (p < end) {
+ width = ExpandedUIntWidth(*p++, width);
+ }
+ }
+ return width;
+}
+
+uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
+ int64_t length, uint8_t min_width) {
+ if (valid_bytes == nullptr) {
+ return DetectUIntWidth(values, length, min_width);
+ }
+ uint8_t width = min_width;
+ if (min_width < 8) {
+ auto p = values;
+ const auto end = p + length;
+ auto b = valid_bytes;
+
+#define MASK(p, b, i) p[i] * (b[i] != 0)
+
+ while (p <= end - 8) {
+ // This is probably be SIMD-izable
+ auto u = MASK(p, b, 0);
+ auto v = MASK(p, b, 1);
+ auto w = MASK(p, b, 2);
+ auto x = MASK(p, b, 3);
+ u |= MASK(p, b, 4);
+ v |= MASK(p, b, 5);
+ w |= MASK(p, b, 6);
+ x |= MASK(p, b, 7);
+ b += 8;
+ p += 8;
+ width = ExpandedUIntWidth(u | v | w | x, width);
+ if (ARROW_PREDICT_FALSE(width == 8)) {
+ break;
+ }
+ }
+ uint64_t mask = 0;
+ while (p < end) {
+ mask |= MASK(p, b, 0);
+ ++b;
+ ++p;
+ }
+ width = ExpandedUIntWidth(mask, width);
+
+#undef MASK
+ }
+ return width;
+}
+
+//
+// Signed integer width detection
+//
+
+uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width) {
+ if (min_width == 8) {
+ return min_width;
+ }
+ uint8_t width = min_width;
+
+ auto p = values;
+ const auto end = p + length;
+ // Strategy: to determine whether `x` is between -0x80 and 0x7f,
+ // we determine whether `x + 0x80` is between 0x00 and 0xff. The
+ // latter can be done with a simple AND mask with ~0xff and, more
+ // importantly, can be computed in a single step over multiple ORed
+ // values (so we can branch once every N items instead of once every item).
+ // This strategy could probably lend itself to explicit SIMD-ization,
+ // if more performance is needed.
+ constexpr uint64_t addend8 = 0x80ULL;
+ constexpr uint64_t addend16 = 0x8000ULL;
+ constexpr uint64_t addend32 = 0x80000000ULL;
+
+ auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
+ auto v = *p++;
+ if (ARROW_PREDICT_FALSE(((v + addend) & test_mask) != 0)) {
+ --p;
+ return false;
+ } else {
+ return true;
+ }
+ };
+
+ auto test_four_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
+ auto mask = (p[0] + addend) | (p[1] + addend) | (p[2] + addend) | (p[3] + addend);
+ p += 4;
+ if (ARROW_PREDICT_FALSE((mask & test_mask) != 0)) {
+ p -= 4;
+ return false;
+ } else {
+ return true;
+ }
+ };
+
+ if (width == 1) {
+ while (p <= end - 4) {
+ if (!test_four_items(addend8, mask_uint8)) {
+ width = 2;
+ goto width2;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend8, mask_uint8)) {
+ width = 2;
+ goto width2;
+ }
+ }
+ return 1;
+ }
+width2:
+ if (width == 2) {
+ while (p <= end - 4) {
+ if (!test_four_items(addend16, mask_uint16)) {
+ width = 4;
+ goto width4;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend16, mask_uint16)) {
+ width = 4;
+ goto width4;
+ }
+ }
+ return 2;
+ }
+width4:
+ if (width == 4) {
+ while (p <= end - 4) {
+ if (!test_four_items(addend32, mask_uint32)) {
+ width = 8;
+ goto width8;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend32, mask_uint32)) {
+ width = 8;
+ goto width8;
+ }
+ }
+ return 4;
+ }
+width8:
+ return 8;
+}
+
+uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
+ uint8_t min_width) {
+ if (valid_bytes == nullptr) {
+ return DetectIntWidth(values, length, min_width);
+ }
+
+ if (min_width == 8) {
+ return min_width;
+ }
+ uint8_t width = min_width;
+
+ auto p = values;
+ const auto end = p + length;
+ auto b = valid_bytes;
+ // Strategy is similar to the no-nulls case above, but we also
+ // have to zero any incoming items that have a zero validity byte.
+ constexpr uint64_t addend8 = 0x80ULL;
+ constexpr uint64_t addend16 = 0x8000ULL;
+ constexpr uint64_t addend32 = 0x80000000ULL;
+
+#define MASK(p, b, addend, i) (p[i] + addend) * (b[i] != 0)
+
+ auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
+ auto v = MASK(p, b, addend, 0);
+ ++b;
+ ++p;
+ if (ARROW_PREDICT_FALSE((v & test_mask) != 0)) {
+ --b;
+ --p;
+ return false;
+ } else {
+ return true;
+ }
+ };
+
+ auto test_eight_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
+ auto mask1 = MASK(p, b, addend, 0) | MASK(p, b, addend, 1) | MASK(p, b, addend, 2) |
+ MASK(p, b, addend, 3);
+ auto mask2 = MASK(p, b, addend, 4) | MASK(p, b, addend, 5) | MASK(p, b, addend, 6) |
+ MASK(p, b, addend, 7);
+ b += 8;
+ p += 8;
+ if (ARROW_PREDICT_FALSE(((mask1 | mask2) & test_mask) != 0)) {
+ b -= 8;
+ p -= 8;
+ return false;
+ } else {
+ return true;
+ }
+ };
+
+#undef MASK
+
+ if (width == 1) {
+ while (p <= end - 8) {
+ if (!test_eight_items(addend8, mask_uint8)) {
+ width = 2;
+ goto width2;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend8, mask_uint8)) {
+ width = 2;
+ goto width2;
+ }
+ }
+ return 1;
+ }
+width2:
+ if (width == 2) {
+ while (p <= end - 8) {
+ if (!test_eight_items(addend16, mask_uint16)) {
+ width = 4;
+ goto width4;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend16, mask_uint16)) {
+ width = 4;
+ goto width4;
+ }
+ }
+ return 2;
+ }
+width4:
+ if (width == 4) {
+ while (p <= end - 8) {
+ if (!test_eight_items(addend32, mask_uint32)) {
+ width = 8;
+ goto width8;
+ }
+ }
+ while (p < end) {
+ if (!test_one_item(addend32, mask_uint32)) {
+ width = 8;
+ goto width8;
+ }
+ }
+ return 4;
+ }
+width8:
+ return 8;
+}
+
+template <typename Source, typename Dest>
static inline void CastIntsInternal(const Source* src, Dest* dest, int64_t length) {
- while (length >= 4) {
- dest[0] = static_cast<Dest>(src[0]);
- dest[1] = static_cast<Dest>(src[1]);
- dest[2] = static_cast<Dest>(src[2]);
- dest[3] = static_cast<Dest>(src[3]);
- length -= 4;
- src += 4;
- dest += 4;
- }
- while (length > 0) {
- *dest++ = static_cast<Dest>(*src++);
- --length;
- }
-}
-
-void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
+ while (length >= 4) {
+ dest[0] = static_cast<Dest>(src[0]);
+ dest[1] = static_cast<Dest>(src[1]);
+ dest[2] = static_cast<Dest>(src[2]);
+ dest[3] = static_cast<Dest>(src[3]);
+ length -= 4;
+ src += 4;
+ dest += 4;
+ }
+ while (length > 0) {
+ *dest++ = static_cast<Dest>(*src++);
+ --length;
+ }
+}
+
+void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
+}
+
+void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
+}
+
+void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
- memcpy(dest, source, length * sizeof(int64_t));
-}
-
-void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
+}
+
+void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
+ memcpy(dest, source, length * sizeof(int64_t));
+}
+
+void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
+}
+
+void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
+}
+
+void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
-}
-
-void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
- memcpy(dest, source, length * sizeof(int64_t));
-}
-
+}
+
+void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
+ memcpy(dest, source, length * sizeof(int64_t));
+}
+
void UpcastInts(const int32_t* source, int64_t* dest, int64_t length) {
CastIntsInternal(source, dest, length);
}
-template <typename InputInt, typename OutputInt>
-void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
- const int32_t* transpose_map) {
- while (length >= 4) {
- dest[0] = static_cast<OutputInt>(transpose_map[src[0]]);
- dest[1] = static_cast<OutputInt>(transpose_map[src[1]]);
- dest[2] = static_cast<OutputInt>(transpose_map[src[2]]);
- dest[3] = static_cast<OutputInt>(transpose_map[src[3]]);
- length -= 4;
- src += 4;
- dest += 4;
- }
- while (length > 0) {
- *dest++ = static_cast<OutputInt>(transpose_map[*src++]);
- --length;
- }
-}
-
-#define INSTANTIATE(SRC, DEST) \
- template ARROW_EXPORT void TransposeInts( \
- const SRC* source, DEST* dest, int64_t length, const int32_t* transpose_map);
-
-#define INSTANTIATE_ALL_DEST(DEST) \
- INSTANTIATE(uint8_t, DEST) \
- INSTANTIATE(int8_t, DEST) \
- INSTANTIATE(uint16_t, DEST) \
- INSTANTIATE(int16_t, DEST) \
- INSTANTIATE(uint32_t, DEST) \
- INSTANTIATE(int32_t, DEST) \
- INSTANTIATE(uint64_t, DEST) \
- INSTANTIATE(int64_t, DEST)
-
-#define INSTANTIATE_ALL() \
- INSTANTIATE_ALL_DEST(uint8_t) \
- INSTANTIATE_ALL_DEST(int8_t) \
- INSTANTIATE_ALL_DEST(uint16_t) \
- INSTANTIATE_ALL_DEST(int16_t) \
- INSTANTIATE_ALL_DEST(uint32_t) \
- INSTANTIATE_ALL_DEST(int32_t) \
- INSTANTIATE_ALL_DEST(uint64_t) \
- INSTANTIATE_ALL_DEST(int64_t)
-
-INSTANTIATE_ALL()
-
-#undef INSTANTIATE
-#undef INSTANTIATE_ALL
-#undef INSTANTIATE_ALL_DEST
-
+template <typename InputInt, typename OutputInt>
+void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
+ const int32_t* transpose_map) {
+ while (length >= 4) {
+ dest[0] = static_cast<OutputInt>(transpose_map[src[0]]);
+ dest[1] = static_cast<OutputInt>(transpose_map[src[1]]);
+ dest[2] = static_cast<OutputInt>(transpose_map[src[2]]);
+ dest[3] = static_cast<OutputInt>(transpose_map[src[3]]);
+ length -= 4;
+ src += 4;
+ dest += 4;
+ }
+ while (length > 0) {
+ *dest++ = static_cast<OutputInt>(transpose_map[*src++]);
+ --length;
+ }
+}
+
+#define INSTANTIATE(SRC, DEST) \
+ template ARROW_EXPORT void TransposeInts( \
+ const SRC* source, DEST* dest, int64_t length, const int32_t* transpose_map);
+
+#define INSTANTIATE_ALL_DEST(DEST) \
+ INSTANTIATE(uint8_t, DEST) \
+ INSTANTIATE(int8_t, DEST) \
+ INSTANTIATE(uint16_t, DEST) \
+ INSTANTIATE(int16_t, DEST) \
+ INSTANTIATE(uint32_t, DEST) \
+ INSTANTIATE(int32_t, DEST) \
+ INSTANTIATE(uint64_t, DEST) \
+ INSTANTIATE(int64_t, DEST)
+
+#define INSTANTIATE_ALL() \
+ INSTANTIATE_ALL_DEST(uint8_t) \
+ INSTANTIATE_ALL_DEST(int8_t) \
+ INSTANTIATE_ALL_DEST(uint16_t) \
+ INSTANTIATE_ALL_DEST(int16_t) \
+ INSTANTIATE_ALL_DEST(uint32_t) \
+ INSTANTIATE_ALL_DEST(int32_t) \
+ INSTANTIATE_ALL_DEST(uint64_t) \
+ INSTANTIATE_ALL_DEST(int64_t)
+
+INSTANTIATE_ALL()
+
+#undef INSTANTIATE
+#undef INSTANTIATE_ALL
+#undef INSTANTIATE_ALL_DEST
+
namespace {
template <typename SrcType>
@@ -525,428 +525,428 @@ Status TransposeInts(const DataType& src_type, const DataType& dest_type,
return transposer(src_type);
}
-template <typename T>
+template <typename T>
static std::string FormatInt(T val) {
- return std::to_string(val);
-}
-
-template <typename IndexCType, bool IsSigned = std::is_signed<IndexCType>::value>
+ return std::to_string(val);
+}
+
+template <typename IndexCType, bool IsSigned = std::is_signed<IndexCType>::value>
static Status CheckIndexBoundsImpl(const ArrayData& indices, uint64_t upper_limit) {
- // For unsigned integers, if the values array is larger than the maximum
- // index value (e.g. especially for UINT8 / UINT16), then there is no need to
- // boundscheck.
- if (!IsSigned &&
- upper_limit > static_cast<uint64_t>(std::numeric_limits<IndexCType>::max())) {
- return Status::OK();
- }
-
- const IndexCType* indices_data = indices.GetValues<IndexCType>(1);
- const uint8_t* bitmap = nullptr;
- if (indices.buffers[0]) {
- bitmap = indices.buffers[0]->data();
- }
- auto IsOutOfBounds = [&](IndexCType val) -> bool {
- return ((IsSigned && val < 0) ||
- (val >= 0 && static_cast<uint64_t>(val) >= upper_limit));
- };
+ // For unsigned integers, if the values array is larger than the maximum
+ // index value (e.g. especially for UINT8 / UINT16), then there is no need to
+ // boundscheck.
+ if (!IsSigned &&
+ upper_limit > static_cast<uint64_t>(std::numeric_limits<IndexCType>::max())) {
+ return Status::OK();
+ }
+
+ const IndexCType* indices_data = indices.GetValues<IndexCType>(1);
+ const uint8_t* bitmap = nullptr;
+ if (indices.buffers[0]) {
+ bitmap = indices.buffers[0]->data();
+ }
+ auto IsOutOfBounds = [&](IndexCType val) -> bool {
+ return ((IsSigned && val < 0) ||
+ (val >= 0 && static_cast<uint64_t>(val) >= upper_limit));
+ };
return VisitSetBitRuns(
bitmap, indices.offset, indices.length, [&](int64_t offset, int64_t length) {
bool block_out_of_bounds = false;
for (int64_t i = 0; i < length; ++i) {
block_out_of_bounds |= IsOutOfBounds(indices_data[offset + i]);
- }
+ }
if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
for (int64_t i = 0; i < length; ++i) {
if (IsOutOfBounds(indices_data[offset + i])) {
return Status::IndexError("Index ", FormatInt(indices_data[offset + i]),
" out of bounds");
}
- }
- }
+ }
+ }
return Status::OK();
});
-}
-
-/// \brief Branchless boundschecking of the indices. Processes batches of
-/// indices at a time and shortcircuits when encountering an out-of-bounds
-/// index in a batch
-Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) {
- switch (indices.type->id()) {
- case Type::INT8:
- return CheckIndexBoundsImpl<int8_t>(indices, upper_limit);
- case Type::INT16:
- return CheckIndexBoundsImpl<int16_t>(indices, upper_limit);
- case Type::INT32:
- return CheckIndexBoundsImpl<int32_t>(indices, upper_limit);
- case Type::INT64:
- return CheckIndexBoundsImpl<int64_t>(indices, upper_limit);
- case Type::UINT8:
- return CheckIndexBoundsImpl<uint8_t>(indices, upper_limit);
- case Type::UINT16:
- return CheckIndexBoundsImpl<uint16_t>(indices, upper_limit);
- case Type::UINT32:
- return CheckIndexBoundsImpl<uint32_t>(indices, upper_limit);
- case Type::UINT64:
- return CheckIndexBoundsImpl<uint64_t>(indices, upper_limit);
- default:
- return Status::Invalid("Invalid index type for boundschecking");
- }
-}
-
-// ----------------------------------------------------------------------
-// Utilities for casting from one integer type to another
-
+}
+
+/// \brief Branchless boundschecking of the indices. Processes batches of
+/// indices at a time and shortcircuits when encountering an out-of-bounds
+/// index in a batch
+Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) {
+ switch (indices.type->id()) {
+ case Type::INT8:
+ return CheckIndexBoundsImpl<int8_t>(indices, upper_limit);
+ case Type::INT16:
+ return CheckIndexBoundsImpl<int16_t>(indices, upper_limit);
+ case Type::INT32:
+ return CheckIndexBoundsImpl<int32_t>(indices, upper_limit);
+ case Type::INT64:
+ return CheckIndexBoundsImpl<int64_t>(indices, upper_limit);
+ case Type::UINT8:
+ return CheckIndexBoundsImpl<uint8_t>(indices, upper_limit);
+ case Type::UINT16:
+ return CheckIndexBoundsImpl<uint16_t>(indices, upper_limit);
+ case Type::UINT32:
+ return CheckIndexBoundsImpl<uint32_t>(indices, upper_limit);
+ case Type::UINT64:
+ return CheckIndexBoundsImpl<uint64_t>(indices, upper_limit);
+ default:
+ return Status::Invalid("Invalid index type for boundschecking");
+ }
+}
+
+// ----------------------------------------------------------------------
+// Utilities for casting from one integer type to another
+
namespace {
-template <typename InType, typename CType = typename InType::c_type>
-Status IntegersInRange(const Datum& datum, CType bound_lower, CType bound_upper) {
- if (std::numeric_limits<CType>::lowest() >= bound_lower &&
- std::numeric_limits<CType>::max() <= bound_upper) {
- return Status::OK();
- }
-
- auto IsOutOfBounds = [&](CType val) -> bool {
- return val < bound_lower || val > bound_upper;
- };
- auto IsOutOfBoundsMaybeNull = [&](CType val, bool is_valid) -> bool {
- return is_valid && (val < bound_lower || val > bound_upper);
- };
- auto GetErrorMessage = [&](CType val) {
- return Status::Invalid("Integer value ", FormatInt(val),
- " not in range: ", FormatInt(bound_lower), " to ",
- FormatInt(bound_upper));
- };
-
- if (datum.kind() == Datum::SCALAR) {
- const auto& scalar = datum.scalar_as<typename TypeTraits<InType>::ScalarType>();
- if (IsOutOfBoundsMaybeNull(scalar.value, scalar.is_valid)) {
- return GetErrorMessage(scalar.value);
- }
- return Status::OK();
- }
-
- const ArrayData& indices = *datum.array();
- const CType* indices_data = indices.GetValues<CType>(1);
- const uint8_t* bitmap = nullptr;
- if (indices.buffers[0]) {
- bitmap = indices.buffers[0]->data();
- }
- OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, indices.length);
- int64_t position = 0;
- int64_t offset_position = indices.offset;
- while (position < indices.length) {
- BitBlockCount block = indices_bit_counter.NextBlock();
- bool block_out_of_bounds = false;
- if (block.popcount == block.length) {
- // Fast path: branchless
- int64_t i = 0;
- for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
- // Let the compiler unroll this
- for (int j = 0; j < 8; ++j) {
- block_out_of_bounds |= IsOutOfBounds(indices_data[i++]);
- }
- }
- for (; i < block.length; ++i) {
- block_out_of_bounds |= IsOutOfBounds(indices_data[i]);
- }
- } else if (block.popcount > 0) {
- // Indices have nulls, must only boundscheck non-null values
- int64_t i = 0;
- for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
- // Let the compiler unroll this
- for (int j = 0; j < 8; ++j) {
- block_out_of_bounds |= IsOutOfBoundsMaybeNull(
- indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
- ++i;
- }
- }
- for (; i < block.length; ++i) {
- block_out_of_bounds |= IsOutOfBoundsMaybeNull(
- indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
- }
- }
- if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
- if (indices.GetNullCount() > 0) {
- for (int64_t i = 0; i < block.length; ++i) {
- if (IsOutOfBoundsMaybeNull(indices_data[i],
- BitUtil::GetBit(bitmap, offset_position + i))) {
- return GetErrorMessage(indices_data[i]);
- }
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i) {
- if (IsOutOfBounds(indices_data[i])) {
- return GetErrorMessage(indices_data[i]);
- }
- }
- }
- }
- indices_data += block.length;
- position += block.length;
- offset_position += block.length;
- }
- return Status::OK();
-}
-
-template <typename Type>
-Status CheckIntegersInRangeImpl(const Datum& datum, const Scalar& bound_lower,
- const Scalar& bound_upper) {
- using ScalarType = typename TypeTraits<Type>::ScalarType;
- return IntegersInRange<Type>(datum, checked_cast<const ScalarType&>(bound_lower).value,
- checked_cast<const ScalarType&>(bound_upper).value);
-}
-
+template <typename InType, typename CType = typename InType::c_type>
+Status IntegersInRange(const Datum& datum, CType bound_lower, CType bound_upper) {
+ if (std::numeric_limits<CType>::lowest() >= bound_lower &&
+ std::numeric_limits<CType>::max() <= bound_upper) {
+ return Status::OK();
+ }
+
+ auto IsOutOfBounds = [&](CType val) -> bool {
+ return val < bound_lower || val > bound_upper;
+ };
+ auto IsOutOfBoundsMaybeNull = [&](CType val, bool is_valid) -> bool {
+ return is_valid && (val < bound_lower || val > bound_upper);
+ };
+ auto GetErrorMessage = [&](CType val) {
+ return Status::Invalid("Integer value ", FormatInt(val),
+ " not in range: ", FormatInt(bound_lower), " to ",
+ FormatInt(bound_upper));
+ };
+
+ if (datum.kind() == Datum::SCALAR) {
+ const auto& scalar = datum.scalar_as<typename TypeTraits<InType>::ScalarType>();
+ if (IsOutOfBoundsMaybeNull(scalar.value, scalar.is_valid)) {
+ return GetErrorMessage(scalar.value);
+ }
+ return Status::OK();
+ }
+
+ const ArrayData& indices = *datum.array();
+ const CType* indices_data = indices.GetValues<CType>(1);
+ const uint8_t* bitmap = nullptr;
+ if (indices.buffers[0]) {
+ bitmap = indices.buffers[0]->data();
+ }
+ OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, indices.length);
+ int64_t position = 0;
+ int64_t offset_position = indices.offset;
+ while (position < indices.length) {
+ BitBlockCount block = indices_bit_counter.NextBlock();
+ bool block_out_of_bounds = false;
+ if (block.popcount == block.length) {
+ // Fast path: branchless
+ int64_t i = 0;
+ for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
+ // Let the compiler unroll this
+ for (int j = 0; j < 8; ++j) {
+ block_out_of_bounds |= IsOutOfBounds(indices_data[i++]);
+ }
+ }
+ for (; i < block.length; ++i) {
+ block_out_of_bounds |= IsOutOfBounds(indices_data[i]);
+ }
+ } else if (block.popcount > 0) {
+ // Indices have nulls, must only boundscheck non-null values
+ int64_t i = 0;
+ for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
+ // Let the compiler unroll this
+ for (int j = 0; j < 8; ++j) {
+ block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+ indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+ ++i;
+ }
+ }
+ for (; i < block.length; ++i) {
+ block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+ indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+ }
+ }
+ if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
+ if (indices.GetNullCount() > 0) {
+ for (int64_t i = 0; i < block.length; ++i) {
+ if (IsOutOfBoundsMaybeNull(indices_data[i],
+ BitUtil::GetBit(bitmap, offset_position + i))) {
+ return GetErrorMessage(indices_data[i]);
+ }
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i) {
+ if (IsOutOfBounds(indices_data[i])) {
+ return GetErrorMessage(indices_data[i]);
+ }
+ }
+ }
+ }
+ indices_data += block.length;
+ position += block.length;
+ offset_position += block.length;
+ }
+ return Status::OK();
+}
+
+template <typename Type>
+Status CheckIntegersInRangeImpl(const Datum& datum, const Scalar& bound_lower,
+ const Scalar& bound_upper) {
+ using ScalarType = typename TypeTraits<Type>::ScalarType;
+ return IntegersInRange<Type>(datum, checked_cast<const ScalarType&>(bound_lower).value,
+ checked_cast<const ScalarType&>(bound_upper).value);
+}
+
} // namespace
-Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
- const Scalar& bound_upper) {
- Type::type type_id = datum.type()->id();
-
- if (bound_lower.type->id() != type_id || bound_upper.type->id() != type_id ||
- !bound_lower.is_valid || !bound_upper.is_valid) {
- return Status::Invalid("Scalar bound types must be non-null and same type as data");
- }
-
- switch (type_id) {
- case Type::INT8:
- return CheckIntegersInRangeImpl<Int8Type>(datum, bound_lower, bound_upper);
- case Type::INT16:
- return CheckIntegersInRangeImpl<Int16Type>(datum, bound_lower, bound_upper);
- case Type::INT32:
- return CheckIntegersInRangeImpl<Int32Type>(datum, bound_lower, bound_upper);
- case Type::INT64:
- return CheckIntegersInRangeImpl<Int64Type>(datum, bound_lower, bound_upper);
- case Type::UINT8:
- return CheckIntegersInRangeImpl<UInt8Type>(datum, bound_lower, bound_upper);
- case Type::UINT16:
- return CheckIntegersInRangeImpl<UInt16Type>(datum, bound_lower, bound_upper);
- case Type::UINT32:
- return CheckIntegersInRangeImpl<UInt32Type>(datum, bound_lower, bound_upper);
- case Type::UINT64:
- return CheckIntegersInRangeImpl<UInt64Type>(datum, bound_lower, bound_upper);
- default:
- return Status::TypeError("Invalid index type for boundschecking");
- }
-}
-
+Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
+ const Scalar& bound_upper) {
+ Type::type type_id = datum.type()->id();
+
+ if (bound_lower.type->id() != type_id || bound_upper.type->id() != type_id ||
+ !bound_lower.is_valid || !bound_upper.is_valid) {
+ return Status::Invalid("Scalar bound types must be non-null and same type as data");
+ }
+
+ switch (type_id) {
+ case Type::INT8:
+ return CheckIntegersInRangeImpl<Int8Type>(datum, bound_lower, bound_upper);
+ case Type::INT16:
+ return CheckIntegersInRangeImpl<Int16Type>(datum, bound_lower, bound_upper);
+ case Type::INT32:
+ return CheckIntegersInRangeImpl<Int32Type>(datum, bound_lower, bound_upper);
+ case Type::INT64:
+ return CheckIntegersInRangeImpl<Int64Type>(datum, bound_lower, bound_upper);
+ case Type::UINT8:
+ return CheckIntegersInRangeImpl<UInt8Type>(datum, bound_lower, bound_upper);
+ case Type::UINT16:
+ return CheckIntegersInRangeImpl<UInt16Type>(datum, bound_lower, bound_upper);
+ case Type::UINT32:
+ return CheckIntegersInRangeImpl<UInt32Type>(datum, bound_lower, bound_upper);
+ case Type::UINT64:
+ return CheckIntegersInRangeImpl<UInt64Type>(datum, bound_lower, bound_upper);
+ default:
+ return Status::TypeError("Invalid index type for boundschecking");
+ }
+}
+
namespace {
-template <typename O, typename I, typename Enable = void>
-struct is_number_downcast {
- static constexpr bool value = false;
-};
-
-template <typename O, typename I>
-struct is_number_downcast<
- O, I, enable_if_t<is_number_type<O>::value && is_number_type<I>::value>> {
- using O_T = typename O::c_type;
- using I_T = typename I::c_type;
-
- static constexpr bool value =
- ((!std::is_same<O, I>::value) &&
- // Both types are of the same sign-ness.
- ((std::is_signed<O_T>::value == std::is_signed<I_T>::value) &&
- // Both types are of the same integral-ness.
- (std::is_floating_point<O_T>::value == std::is_floating_point<I_T>::value)) &&
- // Smaller output size
- (sizeof(O_T) < sizeof(I_T)));
-};
-
-template <typename O, typename I, typename Enable = void>
-struct is_number_upcast {
- static constexpr bool value = false;
-};
-
-template <typename O, typename I>
-struct is_number_upcast<
- O, I, enable_if_t<is_number_type<O>::value && is_number_type<I>::value>> {
- using O_T = typename O::c_type;
- using I_T = typename I::c_type;
-
- static constexpr bool value =
- ((!std::is_same<O, I>::value) &&
- // Both types are of the same sign-ness.
- ((std::is_signed<O_T>::value == std::is_signed<I_T>::value) &&
- // Both types are of the same integral-ness.
- (std::is_floating_point<O_T>::value == std::is_floating_point<I_T>::value)) &&
- // Larger output size
- (sizeof(O_T) > sizeof(I_T)));
-};
-
-template <typename O, typename I, typename Enable = void>
-struct is_integral_signed_to_unsigned {
- static constexpr bool value = false;
-};
-
-template <typename O, typename I>
-struct is_integral_signed_to_unsigned<
- O, I, enable_if_t<is_integer_type<O>::value && is_integer_type<I>::value>> {
- using O_T = typename O::c_type;
- using I_T = typename I::c_type;
-
- static constexpr bool value =
- ((!std::is_same<O, I>::value) &&
- ((std::is_unsigned<O_T>::value && std::is_signed<I_T>::value)));
-};
-
-template <typename O, typename I, typename Enable = void>
-struct is_integral_unsigned_to_signed {
- static constexpr bool value = false;
-};
-
-template <typename O, typename I>
-struct is_integral_unsigned_to_signed<
- O, I, enable_if_t<is_integer_type<O>::value && is_integer_type<I>::value>> {
- using O_T = typename O::c_type;
- using I_T = typename I::c_type;
-
- static constexpr bool value =
- ((!std::is_same<O, I>::value) &&
- ((std::is_signed<O_T>::value && std::is_unsigned<I_T>::value)));
-};
-
-// This set of functions SafeMinimum/SafeMaximum would be simplified with
-// C++17 and `if constexpr`.
-
-// clang-format doesn't handle this construct properly. Thus the macro, but it
-// also improves readability.
-//
-// The effective return type of the function is always `I::c_type`, this is
-// just how enable_if works with functions.
-#define RET_TYPE(TRAIT) enable_if_t<TRAIT<O, I>::value, typename I::c_type>
-
-template <typename O, typename I>
-constexpr RET_TYPE(std::is_same) SafeMinimum() {
- using out_type = typename O::c_type;
-
- return std::numeric_limits<out_type>::lowest();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(std::is_same) SafeMaximum() {
- using out_type = typename O::c_type;
-
- return std::numeric_limits<out_type>::max();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_number_downcast) SafeMinimum() {
- using out_type = typename O::c_type;
-
- return std::numeric_limits<out_type>::lowest();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_number_downcast) SafeMaximum() {
- using out_type = typename O::c_type;
-
- return std::numeric_limits<out_type>::max();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_number_upcast) SafeMinimum() {
- using in_type = typename I::c_type;
- return std::numeric_limits<in_type>::lowest();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_number_upcast) SafeMaximum() {
- using in_type = typename I::c_type;
- return std::numeric_limits<in_type>::max();
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_integral_unsigned_to_signed) SafeMinimum() {
- return 0;
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_integral_unsigned_to_signed) SafeMaximum() {
- using in_type = typename I::c_type;
- using out_type = typename O::c_type;
-
- // Equality is missing because in_type::max() > out_type::max() when types
- // are of the same width.
- return static_cast<in_type>(sizeof(in_type) < sizeof(out_type)
- ? std::numeric_limits<in_type>::max()
- : std::numeric_limits<out_type>::max());
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_integral_signed_to_unsigned) SafeMinimum() {
- return 0;
-}
-
-template <typename O, typename I>
-constexpr RET_TYPE(is_integral_signed_to_unsigned) SafeMaximum() {
- using in_type = typename I::c_type;
- using out_type = typename O::c_type;
-
- return static_cast<in_type>(sizeof(in_type) <= sizeof(out_type)
- ? std::numeric_limits<in_type>::max()
- : std::numeric_limits<out_type>::max());
-}
-
-#undef RET_TYPE
-
-#define GET_MIN_MAX_CASE(TYPE, OUT_TYPE) \
- case Type::TYPE: \
- *min = SafeMinimum<OUT_TYPE, InType>(); \
- *max = SafeMaximum<OUT_TYPE, InType>(); \
- break
-
-template <typename InType, typename T = typename InType::c_type>
-void GetSafeMinMax(Type::type out_type, T* min, T* max) {
- switch (out_type) {
- GET_MIN_MAX_CASE(INT8, Int8Type);
- GET_MIN_MAX_CASE(INT16, Int16Type);
- GET_MIN_MAX_CASE(INT32, Int32Type);
- GET_MIN_MAX_CASE(INT64, Int64Type);
- GET_MIN_MAX_CASE(UINT8, UInt8Type);
- GET_MIN_MAX_CASE(UINT16, UInt16Type);
- GET_MIN_MAX_CASE(UINT32, UInt32Type);
- GET_MIN_MAX_CASE(UINT64, UInt64Type);
- default:
- break;
- }
-}
-
-template <typename Type, typename CType = typename Type::c_type,
- typename ScalarType = typename TypeTraits<Type>::ScalarType>
-Status IntegersCanFitImpl(const Datum& datum, const DataType& target_type) {
- CType bound_min{}, bound_max{};
- GetSafeMinMax<Type>(target_type.id(), &bound_min, &bound_max);
- return CheckIntegersInRange(datum, ScalarType(bound_min), ScalarType(bound_max));
-}
-
+template <typename O, typename I, typename Enable = void>
+struct is_number_downcast {
+ static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_number_downcast<
+ O, I, enable_if_t<is_number_type<O>::value && is_number_type<I>::value>> {
+ using O_T = typename O::c_type;
+ using I_T = typename I::c_type;
+
+ static constexpr bool value =
+ ((!std::is_same<O, I>::value) &&
+ // Both types are of the same sign-ness.
+ ((std::is_signed<O_T>::value == std::is_signed<I_T>::value) &&
+ // Both types are of the same integral-ness.
+ (std::is_floating_point<O_T>::value == std::is_floating_point<I_T>::value)) &&
+ // Smaller output size
+ (sizeof(O_T) < sizeof(I_T)));
+};
+
+template <typename O, typename I, typename Enable = void>
+struct is_number_upcast {
+ static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_number_upcast<
+ O, I, enable_if_t<is_number_type<O>::value && is_number_type<I>::value>> {
+ using O_T = typename O::c_type;
+ using I_T = typename I::c_type;
+
+ static constexpr bool value =
+ ((!std::is_same<O, I>::value) &&
+ // Both types are of the same sign-ness.
+ ((std::is_signed<O_T>::value == std::is_signed<I_T>::value) &&
+ // Both types are of the same integral-ness.
+ (std::is_floating_point<O_T>::value == std::is_floating_point<I_T>::value)) &&
+ // Larger output size
+ (sizeof(O_T) > sizeof(I_T)));
+};
+
+template <typename O, typename I, typename Enable = void>
+struct is_integral_signed_to_unsigned {
+ static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_integral_signed_to_unsigned<
+ O, I, enable_if_t<is_integer_type<O>::value && is_integer_type<I>::value>> {
+ using O_T = typename O::c_type;
+ using I_T = typename I::c_type;
+
+ static constexpr bool value =
+ ((!std::is_same<O, I>::value) &&
+ ((std::is_unsigned<O_T>::value && std::is_signed<I_T>::value)));
+};
+
+template <typename O, typename I, typename Enable = void>
+struct is_integral_unsigned_to_signed {
+ static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_integral_unsigned_to_signed<
+ O, I, enable_if_t<is_integer_type<O>::value && is_integer_type<I>::value>> {
+ using O_T = typename O::c_type;
+ using I_T = typename I::c_type;
+
+ static constexpr bool value =
+ ((!std::is_same<O, I>::value) &&
+ ((std::is_signed<O_T>::value && std::is_unsigned<I_T>::value)));
+};
+
+// This set of functions SafeMinimum/SafeMaximum would be simplified with
+// C++17 and `if constexpr`.
+
+// clang-format doesn't handle this construct properly. Thus the macro, but it
+// also improves readability.
+//
+// The effective return type of the function is always `I::c_type`, this is
+// just how enable_if works with functions.
+#define RET_TYPE(TRAIT) enable_if_t<TRAIT<O, I>::value, typename I::c_type>
+
+template <typename O, typename I>
+constexpr RET_TYPE(std::is_same) SafeMinimum() {
+ using out_type = typename O::c_type;
+
+ return std::numeric_limits<out_type>::lowest();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(std::is_same) SafeMaximum() {
+ using out_type = typename O::c_type;
+
+ return std::numeric_limits<out_type>::max();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_number_downcast) SafeMinimum() {
+ using out_type = typename O::c_type;
+
+ return std::numeric_limits<out_type>::lowest();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_number_downcast) SafeMaximum() {
+ using out_type = typename O::c_type;
+
+ return std::numeric_limits<out_type>::max();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_number_upcast) SafeMinimum() {
+ using in_type = typename I::c_type;
+ return std::numeric_limits<in_type>::lowest();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_number_upcast) SafeMaximum() {
+ using in_type = typename I::c_type;
+ return std::numeric_limits<in_type>::max();
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_integral_unsigned_to_signed) SafeMinimum() {
+ return 0;
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_integral_unsigned_to_signed) SafeMaximum() {
+ using in_type = typename I::c_type;
+ using out_type = typename O::c_type;
+
+ // Equality is missing because in_type::max() > out_type::max() when types
+ // are of the same width.
+ return static_cast<in_type>(sizeof(in_type) < sizeof(out_type)
+ ? std::numeric_limits<in_type>::max()
+ : std::numeric_limits<out_type>::max());
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_integral_signed_to_unsigned) SafeMinimum() {
+ return 0;
+}
+
+template <typename O, typename I>
+constexpr RET_TYPE(is_integral_signed_to_unsigned) SafeMaximum() {
+ using in_type = typename I::c_type;
+ using out_type = typename O::c_type;
+
+ return static_cast<in_type>(sizeof(in_type) <= sizeof(out_type)
+ ? std::numeric_limits<in_type>::max()
+ : std::numeric_limits<out_type>::max());
+}
+
+#undef RET_TYPE
+
+#define GET_MIN_MAX_CASE(TYPE, OUT_TYPE) \
+ case Type::TYPE: \
+ *min = SafeMinimum<OUT_TYPE, InType>(); \
+ *max = SafeMaximum<OUT_TYPE, InType>(); \
+ break
+
+template <typename InType, typename T = typename InType::c_type>
+void GetSafeMinMax(Type::type out_type, T* min, T* max) {
+ switch (out_type) {
+ GET_MIN_MAX_CASE(INT8, Int8Type);
+ GET_MIN_MAX_CASE(INT16, Int16Type);
+ GET_MIN_MAX_CASE(INT32, Int32Type);
+ GET_MIN_MAX_CASE(INT64, Int64Type);
+ GET_MIN_MAX_CASE(UINT8, UInt8Type);
+ GET_MIN_MAX_CASE(UINT16, UInt16Type);
+ GET_MIN_MAX_CASE(UINT32, UInt32Type);
+ GET_MIN_MAX_CASE(UINT64, UInt64Type);
+ default:
+ break;
+ }
+}
+
+template <typename Type, typename CType = typename Type::c_type,
+ typename ScalarType = typename TypeTraits<Type>::ScalarType>
+Status IntegersCanFitImpl(const Datum& datum, const DataType& target_type) {
+ CType bound_min{}, bound_max{};
+ GetSafeMinMax<Type>(target_type.id(), &bound_min, &bound_max);
+ return CheckIntegersInRange(datum, ScalarType(bound_min), ScalarType(bound_max));
+}
+
} // namespace
-Status IntegersCanFit(const Datum& datum, const DataType& target_type) {
- if (!is_integer(target_type.id())) {
- return Status::Invalid("Target type is not an integer type: ", target_type);
- }
-
- switch (datum.type()->id()) {
- case Type::INT8:
- return IntegersCanFitImpl<Int8Type>(datum, target_type);
- case Type::INT16:
- return IntegersCanFitImpl<Int16Type>(datum, target_type);
- case Type::INT32:
- return IntegersCanFitImpl<Int32Type>(datum, target_type);
- case Type::INT64:
- return IntegersCanFitImpl<Int64Type>(datum, target_type);
- case Type::UINT8:
- return IntegersCanFitImpl<UInt8Type>(datum, target_type);
- case Type::UINT16:
- return IntegersCanFitImpl<UInt16Type>(datum, target_type);
- case Type::UINT32:
- return IntegersCanFitImpl<UInt32Type>(datum, target_type);
- case Type::UINT64:
- return IntegersCanFitImpl<UInt64Type>(datum, target_type);
- default:
- return Status::TypeError("Invalid index type for boundschecking");
- }
-}
-
-} // namespace internal
-} // namespace arrow
+Status IntegersCanFit(const Datum& datum, const DataType& target_type) {
+ if (!is_integer(target_type.id())) {
+ return Status::Invalid("Target type is not an integer type: ", target_type);
+ }
+
+ switch (datum.type()->id()) {
+ case Type::INT8:
+ return IntegersCanFitImpl<Int8Type>(datum, target_type);
+ case Type::INT16:
+ return IntegersCanFitImpl<Int16Type>(datum, target_type);
+ case Type::INT32:
+ return IntegersCanFitImpl<Int32Type>(datum, target_type);
+ case Type::INT64:
+ return IntegersCanFitImpl<Int64Type>(datum, target_type);
+ case Type::UINT8:
+ return IntegersCanFitImpl<UInt8Type>(datum, target_type);
+ case Type::UINT16:
+ return IntegersCanFitImpl<UInt16Type>(datum, target_type);
+ case Type::UINT32:
+ return IntegersCanFitImpl<UInt32Type>(datum, target_type);
+ case Type::UINT64:
+ return IntegersCanFitImpl<UInt64Type>(datum, target_type);
+ default:
+ return Status::TypeError("Invalid index type for boundschecking");
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
index bf9226cdf12..3b553e042ae 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
@@ -1,79 +1,79 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
#include <type_traits>
-
-#include "arrow/status.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class DataType;
-struct ArrayData;
-struct Datum;
-struct Scalar;
-
-namespace internal {
-
-ARROW_EXPORT
-uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width = 1);
-
-ARROW_EXPORT
-uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
- int64_t length, uint8_t min_width = 1);
-
-ARROW_EXPORT
-uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width = 1);
-
-ARROW_EXPORT
-uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
- uint8_t min_width = 1);
-
-ARROW_EXPORT
-void DowncastInts(const int64_t* source, int8_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastInts(const int64_t* source, int16_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastInts(const int64_t* source, int32_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastInts(const int64_t* source, int64_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
-
-ARROW_EXPORT
-void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
-
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class DataType;
+struct ArrayData;
+struct Datum;
+struct Scalar;
+
+namespace internal {
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
+ int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
+ uint8_t min_width = 1);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int64_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
+
ARROW_EXPORT
void UpcastInts(const int32_t* source, int64_t* dest, int64_t length);
-template <typename InputInt, typename OutputInt>
+template <typename InputInt, typename OutputInt>
inline typename std::enable_if<(sizeof(InputInt) >= sizeof(OutputInt))>::type CastInts(
const InputInt* source, OutputInt* dest, int64_t length) {
DowncastInts(source, dest, length);
@@ -86,32 +86,32 @@ inline typename std::enable_if<(sizeof(InputInt) < sizeof(OutputInt))>::type Cas
}
template <typename InputInt, typename OutputInt>
-ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length,
- const int32_t* transpose_map);
-
+ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length,
+ const int32_t* transpose_map);
+
ARROW_EXPORT
Status TransposeInts(const DataType& src_type, const DataType& dest_type,
const uint8_t* src, uint8_t* dest, int64_t src_offset,
int64_t dest_offset, int64_t length, const int32_t* transpose_map);
-/// \brief Do vectorized boundschecking of integer-type array indices. The
-/// indices must be non-nonnegative and strictly less than the passed upper
-/// limit (which is usually the length of an array that is being indexed-into).
-ARROW_EXPORT
-Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit);
-
-/// \brief Boundscheck integer values to determine if they are all between the
-/// passed upper and lower limits (inclusive). Upper and lower bounds must be
-/// the same type as the data and are not currently casted.
-ARROW_EXPORT
-Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
- const Scalar& bound_upper);
-
-/// \brief Use CheckIntegersInRange to determine whether the passed integers
-/// can fit safely in the passed integer type. This helps quickly determine if
-/// integer narrowing (e.g. int64->int32) is safe to do.
-ARROW_EXPORT
-Status IntegersCanFit(const Datum& datum, const DataType& target_type);
-
-} // namespace internal
-} // namespace arrow
+/// \brief Do vectorized boundschecking of integer-type array indices. The
+/// indices must be non-nonnegative and strictly less than the passed upper
+/// limit (which is usually the length of an array that is being indexed-into).
+ARROW_EXPORT
+Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit);
+
+/// \brief Boundscheck integer values to determine if they are all between the
+/// passed upper and lower limits (inclusive). Upper and lower bounds must be
+/// the same type as the data and are not currently casted.
+ARROW_EXPORT
+Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
+ const Scalar& bound_upper);
+
+/// \brief Use CheckIntegersInRange to determine whether the passed integers
+/// can fit safely in the passed integer type. This helps quickly determine if
+/// integer narrowing (e.g. int64->int32) is safe to do.
+ARROW_EXPORT
+Status IntegersCanFit(const Datum& datum, const DataType& target_type);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
index 4136706629f..8924bbeb478 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
@@ -1,68 +1,68 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <limits>
-#include <type_traits>
-
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-// "safe-math.h" includes <intsafe.h> from the Windows headers.
-#include "arrow/util/windows_compatibility.h"
-#include "arrow/vendored/portable-snippets/safe-math.h"
-// clang-format off (avoid include reordering)
-#include "arrow/util/windows_fixup.h"
-// clang-format on
-
-namespace arrow {
-namespace internal {
-
-// Define functions AddWithOverflow, SubtractWithOverflow, MultiplyWithOverflow
-// with the signature `bool(T u, T v, T* out)` where T is an integer type.
-// On overflow, these functions return true. Otherwise, false is returned
-// and `out` is updated with the result of the operation.
-
-#define OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
- static inline bool _func_name(_type u, _type v, _type* out) { \
- return !psnip_safe_##_psnip_type##_##_psnip_op(out, u, v); \
- }
-
-#define OPS_WITH_OVERFLOW(_func_name, _psnip_op) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, uint8_t, uint8) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, uint16_t, uint16) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, uint32_t, uint32) \
- OP_WITH_OVERFLOW(_func_name, _psnip_op, uint64_t, uint64)
-
-OPS_WITH_OVERFLOW(AddWithOverflow, add)
-OPS_WITH_OVERFLOW(SubtractWithOverflow, sub)
-OPS_WITH_OVERFLOW(MultiplyWithOverflow, mul)
-OPS_WITH_OVERFLOW(DivideWithOverflow, div)
-
-#undef OP_WITH_OVERFLOW
-#undef OPS_WITH_OVERFLOW
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+// "safe-math.h" includes <intsafe.h> from the Windows headers.
+#include "arrow/util/windows_compatibility.h"
+#include "arrow/vendored/portable-snippets/safe-math.h"
+// clang-format off (avoid include reordering)
+#include "arrow/util/windows_fixup.h"
+// clang-format on
+
+namespace arrow {
+namespace internal {
+
+// Define functions AddWithOverflow, SubtractWithOverflow, MultiplyWithOverflow
+// with the signature `bool(T u, T v, T* out)` where T is an integer type.
+// On overflow, these functions return true. Otherwise, false is returned
+// and `out` is updated with the result of the operation.
+
+#define OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+ static inline bool _func_name(_type u, _type v, _type* out) { \
+ return !psnip_safe_##_psnip_type##_##_psnip_op(out, u, v); \
+ }
+
+#define OPS_WITH_OVERFLOW(_func_name, _psnip_op) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, uint8_t, uint8) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, uint16_t, uint16) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, uint32_t, uint32) \
+ OP_WITH_OVERFLOW(_func_name, _psnip_op, uint64_t, uint64)
+
+OPS_WITH_OVERFLOW(AddWithOverflow, add)
+OPS_WITH_OVERFLOW(SubtractWithOverflow, sub)
+OPS_WITH_OVERFLOW(MultiplyWithOverflow, mul)
+OPS_WITH_OVERFLOW(DivideWithOverflow, div)
+
+#undef OP_WITH_OVERFLOW
+#undef OPS_WITH_OVERFLOW
+
// Define function NegateWithOverflow with the signature `bool(T u, T* out)`
// where T is a signed integer type. On overflow, these functions return true.
// Otherwise, false is returned and `out` is updated with the result of the
@@ -84,22 +84,22 @@ SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg)
#undef UNARY_OP_WITH_OVERFLOW
#undef SIGNED_UNARY_OPS_WITH_OVERFLOW
-/// Signed addition with well-defined behaviour on overflow (as unsigned)
-template <typename SignedInt>
-SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
- using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
- return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
- static_cast<UnsignedInt>(v));
-}
-
-/// Signed subtraction with well-defined behaviour on overflow (as unsigned)
-template <typename SignedInt>
-SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
- using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
- return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
- static_cast<UnsignedInt>(v));
-}
-
+/// Signed addition with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
+ using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+ return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
+ static_cast<UnsignedInt>(v));
+}
+
+/// Signed subtraction with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
+ using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+ return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
+ static_cast<UnsignedInt>(v));
+}
+
/// Signed negation with well-defined behaviour on overflow (as unsigned)
template <typename SignedInt>
SignedInt SafeSignedNegate(SignedInt u) {
@@ -107,47 +107,47 @@ SignedInt SafeSignedNegate(SignedInt u) {
return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
}
-/// Signed left shift with well-defined behaviour on negative numbers or overflow
-template <typename SignedInt, typename Shift>
-SignedInt SafeLeftShift(SignedInt u, Shift shift) {
- using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
- return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
-}
-
-/// Upcast an integer to the largest possible width (currently 64 bits)
-
-template <typename Integer>
-typename std::enable_if<
- std::is_integral<Integer>::value && std::is_signed<Integer>::value, int64_t>::type
-UpcastInt(Integer v) {
- return v;
-}
-
-template <typename Integer>
-typename std::enable_if<
- std::is_integral<Integer>::value && std::is_unsigned<Integer>::value, uint64_t>::type
-UpcastInt(Integer v) {
- return v;
-}
-
-static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset,
- int64_t slice_length, const char* object_name) {
- if (ARROW_PREDICT_FALSE(slice_offset < 0)) {
- return Status::Invalid("Negative ", object_name, " slice offset");
- }
- if (ARROW_PREDICT_FALSE(slice_length < 0)) {
- return Status::Invalid("Negative ", object_name, " slice length");
- }
- int64_t offset_plus_length;
- if (ARROW_PREDICT_FALSE(
- internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) {
- return Status::Invalid(object_name, " slice would overflow");
- }
- if (ARROW_PREDICT_FALSE(slice_offset + slice_length > object_length)) {
- return Status::Invalid(object_name, " slice would exceed ", object_name, " length");
- }
- return Status::OK();
-}
-
-} // namespace internal
-} // namespace arrow
+/// Signed left shift with well-defined behaviour on negative numbers or overflow
+template <typename SignedInt, typename Shift>
+SignedInt SafeLeftShift(SignedInt u, Shift shift) {
+ using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+ return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
+}
+
+/// Upcast an integer to the largest possible width (currently 64 bits)
+
+template <typename Integer>
+typename std::enable_if<
+ std::is_integral<Integer>::value && std::is_signed<Integer>::value, int64_t>::type
+UpcastInt(Integer v) {
+ return v;
+}
+
+template <typename Integer>
+typename std::enable_if<
+ std::is_integral<Integer>::value && std::is_unsigned<Integer>::value, uint64_t>::type
+UpcastInt(Integer v) {
+ return v;
+}
+
+static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset,
+ int64_t slice_length, const char* object_name) {
+ if (ARROW_PREDICT_FALSE(slice_offset < 0)) {
+ return Status::Invalid("Negative ", object_name, " slice offset");
+ }
+ if (ARROW_PREDICT_FALSE(slice_length < 0)) {
+ return Status::Invalid("Negative ", object_name, " slice length");
+ }
+ int64_t offset_plus_length;
+ if (ARROW_PREDICT_FALSE(
+ internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) {
+ return Status::Invalid(object_name, " slice would overflow");
+ }
+ if (ARROW_PREDICT_FALSE(slice_offset + slice_length > object_length)) {
+ return Status::Invalid(object_name, " slice would exceed ", object_name, " length");
+ }
+ return Status::OK();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
index f6566ea7e36..e420760a0e5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
@@ -1,27 +1,27 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Ensure 64-bit off_t for platforms where it matters
-#ifdef _FILE_OFFSET_BITS
-#undef _FILE_OFFSET_BITS
-#endif
-
-#define _FILE_OFFSET_BITS 64
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Ensure 64-bit off_t for platforms where it matters
+#ifdef _FILE_OFFSET_BITS
+#undef _FILE_OFFSET_BITS
+#endif
+
+#define _FILE_OFFSET_BITS 64
+
#if defined(sun) || defined(__sun)
// According to https://bugs.python.org/issue1759169#msg82201, __EXTENSIONS__
// is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
@@ -31,219 +31,219 @@
#define __EXTENSIONS__
#endif
-#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
-
-#include <algorithm>
-#include <cerrno>
-#include <cstdint>
-#include <cstring>
-#include <iostream>
-#include <random>
-#include <sstream>
-#include <string>
+#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <random>
+#include <sstream>
+#include <string>
#include <thread>
-#include <utility>
-#include <vector>
-
-#include <fcntl.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/types.h> // IWYU pragma: keep
-
-// ----------------------------------------------------------------------
-// file compatibility stuff
-
-#ifdef _WIN32
-#include <io.h>
-#include <share.h>
-#else // POSIX-like platforms
-#include <dirent.h>
-#endif
-
-#ifdef _WIN32
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
-#else // POSIX-like platforms
-#include <sys/mman.h>
-#include <unistd.h>
-#endif
-
-// define max read/write count
-#ifdef _WIN32
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
-#else
-
-#ifdef __APPLE__
-// due to macOS bug, we need to set read/write max
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
-#else
-// see notes on Linux read/write manpage
-#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
-#endif
-
-#endif
-
-#include "arrow/buffer.h"
-#include "arrow/result.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
-
-// For filename conversion
-#if defined(_WIN32)
-#include "arrow/util/utf8.h"
-#endif
-
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace internal {
-
-namespace {
-
-template <typename CharT>
-std::basic_string<CharT> ReplaceChars(std::basic_string<CharT> s, CharT find, CharT rep) {
- if (find != rep) {
- for (size_t i = 0; i < s.length(); ++i) {
- if (s[i] == find) {
- s[i] = rep;
- }
- }
- }
- return s;
-}
-
-Result<NativePathString> StringToNative(const std::string& s) {
-#if _WIN32
- return ::arrow::util::UTF8ToWideString(s);
-#else
- return s;
-#endif
-}
-
-#if _WIN32
-Result<std::string> NativeToString(const NativePathString& ws) {
- return ::arrow::util::WideStringToUTF8(ws);
-}
-#endif
-
-#if _WIN32
-const wchar_t kNativeSep = L'\\';
-const wchar_t kGenericSep = L'/';
-const wchar_t* kAllSeps = L"\\/";
-#else
-const char kNativeSep = '/';
-const char kGenericSep = '/';
-const char* kAllSeps = "/";
-#endif
-
-NativePathString NativeSlashes(NativePathString s) {
- return ReplaceChars(std::move(s), kGenericSep, kNativeSep);
-}
-
-NativePathString GenericSlashes(NativePathString s) {
- return ReplaceChars(std::move(s), kNativeSep, kGenericSep);
-}
-
-NativePathString NativeParent(const NativePathString& s) {
- auto last_sep = s.find_last_of(kAllSeps);
- if (last_sep == s.length() - 1) {
- // Last separator is a trailing separator, skip all trailing separators
- // and try again
- auto before_last_seps = s.find_last_not_of(kAllSeps);
- if (before_last_seps == NativePathString::npos) {
- // Only separators in path
- return s;
- }
- last_sep = s.find_last_of(kAllSeps, before_last_seps);
- }
- if (last_sep == NativePathString::npos) {
- // No (other) separator in path
- return s;
- }
- // There may be multiple contiguous separators, skip all of them
- auto before_last_seps = s.find_last_not_of(kAllSeps, last_sep);
- if (before_last_seps == NativePathString::npos) {
- // All separators are at start of string, keep them all
- return s.substr(0, last_sep + 1);
- } else {
- return s.substr(0, before_last_seps + 1);
- }
-}
-
-Status ValidatePath(const std::string& s) {
- if (s.find_first_of('\0') != std::string::npos) {
- return Status::Invalid("Embedded NUL char in path: '", s, "'");
- }
- return Status::OK();
-}
-
-} // namespace
-
-std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
-
-#if _WIN32
-std::string WinErrorMessage(int errnum) {
- char buf[1024];
- auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, errnum, 0, buf, sizeof(buf), NULL);
- if (nchars == 0) {
- // Fallback
- std::stringstream ss;
- ss << "Windows error #" << errnum;
- return ss.str();
- }
- return std::string(buf, nchars);
-}
-#endif
-
-namespace {
-
-const char kErrnoDetailTypeId[] = "arrow::ErrnoDetail";
-
-class ErrnoDetail : public StatusDetail {
- public:
- explicit ErrnoDetail(int errnum) : errnum_(errnum) {}
-
- const char* type_id() const override { return kErrnoDetailTypeId; }
-
- std::string ToString() const override {
- std::stringstream ss;
- ss << "[errno " << errnum_ << "] " << ErrnoMessage(errnum_);
- return ss.str();
- }
-
- int errnum() const { return errnum_; }
-
- protected:
- int errnum_;
-};
-
-#if _WIN32
-const char kWinErrorDetailTypeId[] = "arrow::WinErrorDetail";
-
-class WinErrorDetail : public StatusDetail {
- public:
- explicit WinErrorDetail(int errnum) : errnum_(errnum) {}
-
- const char* type_id() const override { return kWinErrorDetailTypeId; }
-
- std::string ToString() const override {
- std::stringstream ss;
- ss << "[Windows error " << errnum_ << "] " << WinErrorMessage(errnum_);
- return ss.str();
- }
-
- int errnum() const { return errnum_; }
-
- protected:
- int errnum_;
-};
-#endif
-
+#include <utility>
+#include <vector>
+
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h> // IWYU pragma: keep
+
+// ----------------------------------------------------------------------
+// file compatibility stuff
+
+#ifdef _WIN32
+#include <io.h>
+#include <share.h>
+#else // POSIX-like platforms
+#include <dirent.h>
+#endif
+
+#ifdef _WIN32
+#include "arrow/io/mman.h"
+#undef Realloc
+#undef Free
+#else // POSIX-like platforms
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+// define max read/write count
+#ifdef _WIN32
+#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#else
+
+#ifdef __APPLE__
+// due to macOS bug, we need to set read/write max
+#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#else
+// see notes on Linux read/write manpage
+#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
+#endif
+
+#endif
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+
+// For filename conversion
+#if defined(_WIN32)
+#include "arrow/util/utf8.h"
+#endif
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace internal {
+
+namespace {
+
+template <typename CharT>
+std::basic_string<CharT> ReplaceChars(std::basic_string<CharT> s, CharT find, CharT rep) {
+ if (find != rep) {
+ for (size_t i = 0; i < s.length(); ++i) {
+ if (s[i] == find) {
+ s[i] = rep;
+ }
+ }
+ }
+ return s;
+}
+
+Result<NativePathString> StringToNative(const std::string& s) {
+#if _WIN32
+ return ::arrow::util::UTF8ToWideString(s);
+#else
+ return s;
+#endif
+}
+
+#if _WIN32
+Result<std::string> NativeToString(const NativePathString& ws) {
+ return ::arrow::util::WideStringToUTF8(ws);
+}
+#endif
+
+#if _WIN32
+const wchar_t kNativeSep = L'\\';
+const wchar_t kGenericSep = L'/';
+const wchar_t* kAllSeps = L"\\/";
+#else
+const char kNativeSep = '/';
+const char kGenericSep = '/';
+const char* kAllSeps = "/";
+#endif
+
+NativePathString NativeSlashes(NativePathString s) {
+ return ReplaceChars(std::move(s), kGenericSep, kNativeSep);
+}
+
+NativePathString GenericSlashes(NativePathString s) {
+ return ReplaceChars(std::move(s), kNativeSep, kGenericSep);
+}
+
+NativePathString NativeParent(const NativePathString& s) {
+ auto last_sep = s.find_last_of(kAllSeps);
+ if (last_sep == s.length() - 1) {
+ // Last separator is a trailing separator, skip all trailing separators
+ // and try again
+ auto before_last_seps = s.find_last_not_of(kAllSeps);
+ if (before_last_seps == NativePathString::npos) {
+ // Only separators in path
+ return s;
+ }
+ last_sep = s.find_last_of(kAllSeps, before_last_seps);
+ }
+ if (last_sep == NativePathString::npos) {
+ // No (other) separator in path
+ return s;
+ }
+ // There may be multiple contiguous separators, skip all of them
+ auto before_last_seps = s.find_last_not_of(kAllSeps, last_sep);
+ if (before_last_seps == NativePathString::npos) {
+ // All separators are at start of string, keep them all
+ return s.substr(0, last_sep + 1);
+ } else {
+ return s.substr(0, before_last_seps + 1);
+ }
+}
+
+Status ValidatePath(const std::string& s) {
+ if (s.find_first_of('\0') != std::string::npos) {
+ return Status::Invalid("Embedded NUL char in path: '", s, "'");
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
+
+#if _WIN32
+std::string WinErrorMessage(int errnum) {
+ char buf[1024];
+ auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL, errnum, 0, buf, sizeof(buf), NULL);
+ if (nchars == 0) {
+ // Fallback
+ std::stringstream ss;
+ ss << "Windows error #" << errnum;
+ return ss.str();
+ }
+ return std::string(buf, nchars);
+}
+#endif
+
+namespace {
+
+const char kErrnoDetailTypeId[] = "arrow::ErrnoDetail";
+
+class ErrnoDetail : public StatusDetail {
+ public:
+ explicit ErrnoDetail(int errnum) : errnum_(errnum) {}
+
+ const char* type_id() const override { return kErrnoDetailTypeId; }
+
+ std::string ToString() const override {
+ std::stringstream ss;
+ ss << "[errno " << errnum_ << "] " << ErrnoMessage(errnum_);
+ return ss.str();
+ }
+
+ int errnum() const { return errnum_; }
+
+ protected:
+ int errnum_;
+};
+
+#if _WIN32
+const char kWinErrorDetailTypeId[] = "arrow::WinErrorDetail";
+
+class WinErrorDetail : public StatusDetail {
+ public:
+ explicit WinErrorDetail(int errnum) : errnum_(errnum) {}
+
+ const char* type_id() const override { return kWinErrorDetailTypeId; }
+
+ std::string ToString() const override {
+ std::stringstream ss;
+ ss << "[Windows error " << errnum_ << "] " << WinErrorMessage(errnum_);
+ return ss.str();
+ }
+
+ int errnum() const { return errnum_; }
+
+ protected:
+ int errnum_;
+};
+#endif
+
const char kSignalDetailTypeId[] = "arrow::SignalDetail";
class SignalDetail : public StatusDetail {
@@ -264,40 +264,40 @@ class SignalDetail : public StatusDetail {
int signum_;
};
-} // namespace
-
-std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
- return std::make_shared<ErrnoDetail>(errnum);
-}
-
-#if _WIN32
-std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
- return std::make_shared<WinErrorDetail>(errnum);
-}
-#endif
-
+} // namespace
+
+std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
+ return std::make_shared<ErrnoDetail>(errnum);
+}
+
+#if _WIN32
+std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
+ return std::make_shared<WinErrorDetail>(errnum);
+}
+#endif
+
std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum) {
return std::make_shared<SignalDetail>(signum);
}
-int ErrnoFromStatus(const Status& status) {
- const auto detail = status.detail();
- if (detail != nullptr && detail->type_id() == kErrnoDetailTypeId) {
- return checked_cast<const ErrnoDetail&>(*detail).errnum();
- }
- return 0;
-}
-
-int WinErrorFromStatus(const Status& status) {
-#if _WIN32
- const auto detail = status.detail();
- if (detail != nullptr && detail->type_id() == kWinErrorDetailTypeId) {
- return checked_cast<const WinErrorDetail&>(*detail).errnum();
- }
-#endif
- return 0;
-}
-
+int ErrnoFromStatus(const Status& status) {
+ const auto detail = status.detail();
+ if (detail != nullptr && detail->type_id() == kErrnoDetailTypeId) {
+ return checked_cast<const ErrnoDetail&>(*detail).errnum();
+ }
+ return 0;
+}
+
+int WinErrorFromStatus(const Status& status) {
+#if _WIN32
+ const auto detail = status.detail();
+ if (detail != nullptr && detail->type_id() == kWinErrorDetailTypeId) {
+ return checked_cast<const WinErrorDetail&>(*detail).errnum();
+ }
+#endif
+ return 0;
+}
+
int SignalFromStatus(const Status& status) {
const auto detail = status.detail();
if (detail != nullptr && detail->type_id() == kSignalDetailTypeId) {
@@ -306,719 +306,719 @@ int SignalFromStatus(const Status& status) {
return 0;
}
-//
-// PlatformFilename implementation
-//
-
-struct PlatformFilename::Impl {
- Impl() = default;
- explicit Impl(NativePathString p) : native_(NativeSlashes(std::move(p))) {}
-
- NativePathString native_;
-
- // '/'-separated
- NativePathString generic() const { return GenericSlashes(native_); }
-};
-
-PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
-
-PlatformFilename::~PlatformFilename() {}
-
-PlatformFilename::PlatformFilename(Impl impl) : impl_(new Impl(std::move(impl))) {}
-
-PlatformFilename::PlatformFilename(const PlatformFilename& other)
- : PlatformFilename(Impl{other.impl_->native_}) {}
-
-PlatformFilename::PlatformFilename(PlatformFilename&& other)
- : impl_(std::move(other.impl_)) {}
-
-PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
- this->impl_.reset(new Impl{other.impl_->native_});
- return *this;
-}
-
-PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
- this->impl_ = std::move(other.impl_);
- return *this;
-}
-
-PlatformFilename::PlatformFilename(const NativePathString& path)
- : PlatformFilename(Impl{path}) {}
-
-PlatformFilename::PlatformFilename(const NativePathString::value_type* path)
- : PlatformFilename(NativePathString(path)) {}
-
-bool PlatformFilename::operator==(const PlatformFilename& other) const {
- return impl_->native_ == other.impl_->native_;
-}
-
-bool PlatformFilename::operator!=(const PlatformFilename& other) const {
- return impl_->native_ != other.impl_->native_;
-}
-
-const NativePathString& PlatformFilename::ToNative() const { return impl_->native_; }
-
-std::string PlatformFilename::ToString() const {
-#if _WIN32
- auto result = NativeToString(impl_->generic());
- if (!result.ok()) {
- std::stringstream ss;
- ss << "<Unrepresentable filename: " << result.status().ToString() << ">";
- return ss.str();
- }
- return *std::move(result);
-#else
- return impl_->generic();
-#endif
-}
-
-PlatformFilename PlatformFilename::Parent() const {
- return PlatformFilename(NativeParent(ToNative()));
-}
-
-Result<PlatformFilename> PlatformFilename::FromString(const std::string& file_name) {
- RETURN_NOT_OK(ValidatePath(file_name));
- ARROW_ASSIGN_OR_RAISE(auto ns, StringToNative(file_name));
- return PlatformFilename(std::move(ns));
-}
-
-PlatformFilename PlatformFilename::Join(const PlatformFilename& child) const {
- if (impl_->native_.empty() || impl_->native_.back() == kNativeSep) {
- return PlatformFilename(Impl{impl_->native_ + child.impl_->native_});
- } else {
- return PlatformFilename(Impl{impl_->native_ + kNativeSep + child.impl_->native_});
- }
-}
-
-Result<PlatformFilename> PlatformFilename::Join(const std::string& child_name) const {
- ARROW_ASSIGN_OR_RAISE(auto child, PlatformFilename::FromString(child_name));
- return Join(child);
-}
-
-//
-// Filesystem access routines
-//
-
-namespace {
-
-Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
-#ifdef _WIN32
+//
+// PlatformFilename implementation
+//
+
+struct PlatformFilename::Impl {
+ Impl() = default;
+ explicit Impl(NativePathString p) : native_(NativeSlashes(std::move(p))) {}
+
+ NativePathString native_;
+
+ // '/'-separated
+ NativePathString generic() const { return GenericSlashes(native_); }
+};
+
+PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
+
+PlatformFilename::~PlatformFilename() {}
+
+PlatformFilename::PlatformFilename(Impl impl) : impl_(new Impl(std::move(impl))) {}
+
+PlatformFilename::PlatformFilename(const PlatformFilename& other)
+ : PlatformFilename(Impl{other.impl_->native_}) {}
+
+PlatformFilename::PlatformFilename(PlatformFilename&& other)
+ : impl_(std::move(other.impl_)) {}
+
+PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
+ this->impl_.reset(new Impl{other.impl_->native_});
+ return *this;
+}
+
+PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
+ this->impl_ = std::move(other.impl_);
+ return *this;
+}
+
+PlatformFilename::PlatformFilename(const NativePathString& path)
+ : PlatformFilename(Impl{path}) {}
+
+PlatformFilename::PlatformFilename(const NativePathString::value_type* path)
+ : PlatformFilename(NativePathString(path)) {}
+
+bool PlatformFilename::operator==(const PlatformFilename& other) const {
+ return impl_->native_ == other.impl_->native_;
+}
+
+bool PlatformFilename::operator!=(const PlatformFilename& other) const {
+ return impl_->native_ != other.impl_->native_;
+}
+
+const NativePathString& PlatformFilename::ToNative() const { return impl_->native_; }
+
+std::string PlatformFilename::ToString() const {
+#if _WIN32
+ auto result = NativeToString(impl_->generic());
+ if (!result.ok()) {
+ std::stringstream ss;
+ ss << "<Unrepresentable filename: " << result.status().ToString() << ">";
+ return ss.str();
+ }
+ return *std::move(result);
+#else
+ return impl_->generic();
+#endif
+}
+
+PlatformFilename PlatformFilename::Parent() const {
+ return PlatformFilename(NativeParent(ToNative()));
+}
+
+Result<PlatformFilename> PlatformFilename::FromString(const std::string& file_name) {
+ RETURN_NOT_OK(ValidatePath(file_name));
+ ARROW_ASSIGN_OR_RAISE(auto ns, StringToNative(file_name));
+ return PlatformFilename(std::move(ns));
+}
+
+PlatformFilename PlatformFilename::Join(const PlatformFilename& child) const {
+ if (impl_->native_.empty() || impl_->native_.back() == kNativeSep) {
+ return PlatformFilename(Impl{impl_->native_ + child.impl_->native_});
+ } else {
+ return PlatformFilename(Impl{impl_->native_ + kNativeSep + child.impl_->native_});
+ }
+}
+
+Result<PlatformFilename> PlatformFilename::Join(const std::string& child_name) const {
+ ARROW_ASSIGN_OR_RAISE(auto child, PlatformFilename::FromString(child_name));
+ return Join(child);
+}
+
+//
+// Filesystem access routines
+//
+
+namespace {
+
+Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
+#ifdef _WIN32
const auto s = dir_path.ToNative().c_str();
if (CreateDirectoryW(s, nullptr)) {
- return true;
- }
- int errnum = GetLastError();
- if (errnum == ERROR_ALREADY_EXISTS) {
+ return true;
+ }
+ int errnum = GetLastError();
+ if (errnum == ERROR_ALREADY_EXISTS) {
const auto attrs = GetFileAttributesW(s);
if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
// Note we propagate the original error, not the GetFileAttributesW() error
return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
dir_path.ToString(), "': non-directory entry exists");
}
- return false;
- }
- if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
- auto parent_path = dir_path.Parent();
- if (parent_path != dir_path) {
- RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
- return DoCreateDir(dir_path, false); // Retry
- }
- }
- return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
- dir_path.ToString(), "'");
-#else
+ return false;
+ }
+ if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
+ auto parent_path = dir_path.Parent();
+ if (parent_path != dir_path) {
+ RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
+ return DoCreateDir(dir_path, false); // Retry
+ }
+ }
+ return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
+ dir_path.ToString(), "'");
+#else
const auto s = dir_path.ToNative().c_str();
if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
- return true;
- }
- if (errno == EEXIST) {
+ return true;
+ }
+ if (errno == EEXIST) {
struct stat st;
if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
// Note we propagate the original errno, not the stat() errno
return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
"': non-directory entry exists");
}
- return false;
- }
- if (create_parents && errno == ENOENT) {
- auto parent_path = dir_path.Parent();
- if (parent_path != dir_path) {
- RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
- return DoCreateDir(dir_path, false); // Retry
- }
- }
- return IOErrorFromErrno(errno, "Cannot create directory '", dir_path.ToString(), "'");
-#endif
-}
-
-} // namespace
-
-Result<bool> CreateDir(const PlatformFilename& dir_path) {
- return DoCreateDir(dir_path, false);
-}
-
-Result<bool> CreateDirTree(const PlatformFilename& dir_path) {
- return DoCreateDir(dir_path, true);
-}
-
-#ifdef _WIN32
-
-namespace {
-
-void FindHandleDeleter(HANDLE* handle) {
- if (!FindClose(*handle)) {
- ARROW_LOG(WARNING) << "Cannot close directory handle: "
- << WinErrorMessage(GetLastError());
- }
-}
-
-std::wstring PathWithoutTrailingSlash(const PlatformFilename& fn) {
- std::wstring path = fn.ToNative();
- while (!path.empty() && path.back() == kNativeSep) {
- path.pop_back();
- }
- return path;
-}
-
-Result<std::vector<WIN32_FIND_DATAW>> ListDirInternal(const PlatformFilename& dir_path) {
- WIN32_FIND_DATAW find_data;
- std::wstring pattern = PathWithoutTrailingSlash(dir_path) + L"\\*.*";
- HANDLE handle = FindFirstFileW(pattern.c_str(), &find_data);
- if (handle == INVALID_HANDLE_VALUE) {
- return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
- dir_path.ToString(), "'");
- }
-
- std::unique_ptr<HANDLE, decltype(&FindHandleDeleter)> handle_guard(&handle,
- FindHandleDeleter);
-
- std::vector<WIN32_FIND_DATAW> results;
- do {
- // Skip "." and ".."
- if (find_data.cFileName[0] == L'.') {
- if (find_data.cFileName[1] == L'\0' ||
- (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == L'\0')) {
- continue;
- }
- }
- results.push_back(find_data);
- } while (FindNextFileW(handle, &find_data));
-
- int errnum = GetLastError();
- if (errnum != ERROR_NO_MORE_FILES) {
- return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
- dir_path.ToString(), "'");
- }
- return results;
-}
-
-Status FindOneFile(const PlatformFilename& fn, WIN32_FIND_DATAW* find_data,
- bool* exists = nullptr) {
- HANDLE handle = FindFirstFileW(PathWithoutTrailingSlash(fn).c_str(), find_data);
- if (handle == INVALID_HANDLE_VALUE) {
- int errnum = GetLastError();
- if (exists == nullptr ||
- (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND)) {
- return IOErrorFromWinError(GetLastError(), "Cannot get information for path '",
- fn.ToString(), "'");
- }
- *exists = false;
- } else {
- if (exists != nullptr) {
- *exists = true;
- }
- FindHandleDeleter(&handle);
- }
- return Status::OK();
-}
-
-} // namespace
-
-Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
- ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
-
- std::vector<PlatformFilename> results;
- results.reserve(entries.size());
- for (const auto& entry : entries) {
- results.emplace_back(std::wstring(entry.cFileName));
- }
- return results;
-}
-
-#else
-
-Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
- DIR* dir = opendir(dir_path.ToNative().c_str());
- if (dir == nullptr) {
- return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
- }
-
- auto dir_deleter = [](DIR* dir) -> void {
- if (closedir(dir) != 0) {
- ARROW_LOG(WARNING) << "Cannot close directory handle: " << ErrnoMessage(errno);
- }
- };
- std::unique_ptr<DIR, decltype(dir_deleter)> dir_guard(dir, dir_deleter);
-
- std::vector<PlatformFilename> results;
- errno = 0;
- struct dirent* entry = readdir(dir);
- while (entry != nullptr) {
- std::string path = entry->d_name;
- if (path != "." && path != "..") {
- results.emplace_back(std::move(path));
- }
- entry = readdir(dir);
- }
- if (errno != 0) {
- return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
- }
- return results;
-}
-
-#endif
-
-namespace {
-
-#ifdef _WIN32
-
-Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
-
-// Remove a directory entry that's always a directory
-Status DeleteDirEntryDir(const PlatformFilename& path, const WIN32_FIND_DATAW& entry,
- bool remove_top_dir = true) {
- if ((entry.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) {
- // It's a directory that doesn't have a reparse point => recurse
- RETURN_NOT_OK(DeleteDirTreeInternal(path));
- }
- if (remove_top_dir) {
- // Remove now empty directory or reparse point (e.g. symlink to dir)
- if (!RemoveDirectoryW(path.ToNative().c_str())) {
- return IOErrorFromWinError(GetLastError(), "Cannot delete directory entry '",
- path.ToString(), "': ");
- }
- }
- return Status::OK();
-}
-
-Status DeleteDirEntry(const PlatformFilename& path, const WIN32_FIND_DATAW& entry) {
- if ((entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) {
- return DeleteDirEntryDir(path, entry);
- }
- // It's a non-directory entry, most likely a regular file
- if (!DeleteFileW(path.ToNative().c_str())) {
- return IOErrorFromWinError(GetLastError(), "Cannot delete file '", path.ToString(),
- "': ");
- }
- return Status::OK();
-}
-
-Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
- ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
- for (const auto& entry : entries) {
- PlatformFilename path = dir_path.Join(PlatformFilename(entry.cFileName));
- RETURN_NOT_OK(DeleteDirEntry(path, entry));
- }
- return Status::OK();
-}
-
-Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
- bool remove_top_dir) {
- bool exists = true;
- WIN32_FIND_DATAW entry;
- if (allow_not_found) {
- RETURN_NOT_OK(FindOneFile(dir_path, &entry, &exists));
- } else {
- // Will raise if dir_path does not exist
- RETURN_NOT_OK(FindOneFile(dir_path, &entry));
- }
- if (exists) {
- RETURN_NOT_OK(DeleteDirEntryDir(dir_path, entry, remove_top_dir));
- }
- return exists;
-}
-
-#else // POSIX
-
-Status LinkStat(const PlatformFilename& path, struct stat* lst, bool* exists = nullptr) {
- if (lstat(path.ToNative().c_str(), lst) != 0) {
- if (exists == nullptr || (errno != ENOENT && errno != ENOTDIR && errno != ELOOP)) {
- return IOErrorFromErrno(errno, "Cannot get information for path '", path.ToString(),
- "'");
- }
- *exists = false;
- } else if (exists != nullptr) {
- *exists = true;
- }
- return Status::OK();
-}
-
-Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
-
-Status DeleteDirEntryDir(const PlatformFilename& path, const struct stat& lst,
- bool remove_top_dir = true) {
- if (!S_ISLNK(lst.st_mode)) {
- // Not a symlink => delete contents recursively
- DCHECK(S_ISDIR(lst.st_mode));
- RETURN_NOT_OK(DeleteDirTreeInternal(path));
- if (remove_top_dir && rmdir(path.ToNative().c_str()) != 0) {
- return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
- "'");
- }
- } else {
- // Remove symlink
- if (remove_top_dir && unlink(path.ToNative().c_str()) != 0) {
- return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
- "'");
- }
- }
- return Status::OK();
-}
-
-Status DeleteDirEntry(const PlatformFilename& path, const struct stat& lst) {
- if (S_ISDIR(lst.st_mode)) {
- return DeleteDirEntryDir(path, lst);
- }
- if (unlink(path.ToNative().c_str()) != 0) {
- return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
- "'");
- }
- return Status::OK();
-}
-
-Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
- ARROW_ASSIGN_OR_RAISE(auto children, ListDir(dir_path));
- for (const auto& child : children) {
- struct stat lst;
- PlatformFilename full_path = dir_path.Join(child);
- RETURN_NOT_OK(LinkStat(full_path, &lst));
- RETURN_NOT_OK(DeleteDirEntry(full_path, lst));
- }
- return Status::OK();
-}
-
-Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
- bool remove_top_dir) {
- bool exists = true;
- struct stat lst;
- if (allow_not_found) {
- RETURN_NOT_OK(LinkStat(dir_path, &lst, &exists));
- } else {
- // Will raise if dir_path does not exist
- RETURN_NOT_OK(LinkStat(dir_path, &lst));
- }
- if (exists) {
- if (!S_ISDIR(lst.st_mode) && !S_ISLNK(lst.st_mode)) {
- return Status::IOError("Cannot delete directory '", dir_path.ToString(),
- "': not a directory");
- }
- RETURN_NOT_OK(DeleteDirEntryDir(dir_path, lst, remove_top_dir));
- }
- return exists;
-}
-
-#endif
-
-} // namespace
-
-Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found) {
- return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/false);
-}
-
-Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found) {
- return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/true);
-}
-
-Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found) {
-#ifdef _WIN32
- if (DeleteFileW(file_path.ToNative().c_str())) {
- return true;
- } else {
- int errnum = GetLastError();
- if (!allow_not_found || errnum != ERROR_FILE_NOT_FOUND) {
- return IOErrorFromWinError(GetLastError(), "Cannot delete file '",
- file_path.ToString(), "'");
- }
- }
-#else
- if (unlink(file_path.ToNative().c_str()) == 0) {
- return true;
- } else {
- if (!allow_not_found || errno != ENOENT) {
- return IOErrorFromErrno(errno, "Cannot delete file '", file_path.ToString(), "'");
- }
- }
-#endif
- return false;
-}
-
-Result<bool> FileExists(const PlatformFilename& path) {
-#ifdef _WIN32
- if (GetFileAttributesW(path.ToNative().c_str()) != INVALID_FILE_ATTRIBUTES) {
- return true;
- } else {
- int errnum = GetLastError();
- if (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND) {
- return IOErrorFromWinError(GetLastError(), "Failed getting information for path '",
- path.ToString(), "'");
- }
- return false;
- }
-#else
- struct stat st;
- if (stat(path.ToNative().c_str(), &st) == 0) {
- return true;
- } else {
- if (errno != ENOENT && errno != ENOTDIR) {
- return IOErrorFromErrno(errno, "Failed getting information for path '",
- path.ToString(), "'");
- }
- return false;
- }
-#endif
-}
-
-//
-// Functions for creating file descriptors
-//
-
-#define CHECK_LSEEK(retval) \
- if ((retval) == -1) return Status::IOError("lseek failed");
-
-static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
-#if defined(_WIN32)
- return _lseeki64(fd, pos, whence);
-#else
- return lseek(fd, pos, whence);
-#endif
-}
-
-static inline Result<int> CheckFileOpResult(int fd_ret, int errno_actual,
- const PlatformFilename& file_name,
- const char* opname) {
- if (fd_ret == -1) {
-#ifdef _WIN32
- int winerr = GetLastError();
- if (winerr != ERROR_SUCCESS) {
- return IOErrorFromWinError(GetLastError(), "Failed to ", opname, " file '",
- file_name.ToString(), "'");
- }
-#endif
- return IOErrorFromErrno(errno_actual, "Failed to ", opname, " file '",
- file_name.ToString(), "'");
- }
- return fd_ret;
-}
-
-Result<int> FileOpenReadable(const PlatformFilename& file_name) {
- int fd, errno_actual;
-#if defined(_WIN32)
- SetLastError(0);
- HANDLE file_handle = CreateFileW(file_name.ToNative().c_str(), GENERIC_READ,
- FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-
- DWORD last_error = GetLastError();
- if (last_error == ERROR_SUCCESS) {
- errno_actual = 0;
- fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle),
- _O_RDONLY | _O_BINARY | _O_NOINHERIT);
- } else {
- return IOErrorFromWinError(last_error, "Failed to open local file '",
- file_name.ToString(), "'");
- }
-#else
- fd = open(file_name.ToNative().c_str(), O_RDONLY);
- errno_actual = errno;
-
- if (fd >= 0) {
- // open(O_RDONLY) succeeds on directories, check for it
- struct stat st;
- int ret = fstat(fd, &st);
- if (ret == -1) {
- ARROW_UNUSED(FileClose(fd));
- // Will propagate error below
- } else if (S_ISDIR(st.st_mode)) {
- ARROW_UNUSED(FileClose(fd));
- return Status::IOError("Cannot open for reading: path '", file_name.ToString(),
- "' is a directory");
- }
- }
-#endif
-
- return CheckFileOpResult(fd, errno_actual, file_name, "open local");
-}
-
-Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only,
- bool truncate, bool append) {
- int fd, errno_actual;
-
-#if defined(_WIN32)
- SetLastError(0);
- int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
- DWORD desired_access = GENERIC_WRITE;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
- DWORD creation_disposition = OPEN_ALWAYS;
-
- if (append) {
- oflag |= _O_APPEND;
- }
-
- if (truncate) {
- oflag |= _O_TRUNC;
- creation_disposition = CREATE_ALWAYS;
- }
-
- if (write_only) {
- oflag |= _O_WRONLY;
- } else {
- oflag |= _O_RDWR;
- desired_access |= GENERIC_READ;
- }
-
- HANDLE file_handle =
- CreateFileW(file_name.ToNative().c_str(), desired_access, share_mode, NULL,
- creation_disposition, FILE_ATTRIBUTE_NORMAL, NULL);
-
- DWORD last_error = GetLastError();
- if (last_error == ERROR_SUCCESS || last_error == ERROR_ALREADY_EXISTS) {
- errno_actual = 0;
- fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle), oflag);
- } else {
- return IOErrorFromWinError(last_error, "Failed to open local file '",
- file_name.ToString(), "'");
- }
-#else
- int oflag = O_CREAT;
-
- if (truncate) {
- oflag |= O_TRUNC;
- }
- if (append) {
- oflag |= O_APPEND;
- }
-
- if (write_only) {
- oflag |= O_WRONLY;
- } else {
- oflag |= O_RDWR;
- }
-
- fd = open(file_name.ToNative().c_str(), oflag, 0666);
- errno_actual = errno;
-#endif
-
- RETURN_NOT_OK(CheckFileOpResult(fd, errno_actual, file_name, "open local"));
- if (append) {
- // Seek to end, as O_APPEND does not necessarily do it
- auto ret = lseek64_compat(fd, 0, SEEK_END);
- if (ret == -1) {
- ARROW_UNUSED(FileClose(fd));
- return Status::IOError("lseek failed");
- }
- }
- return fd;
-}
-
-Result<int64_t> FileTell(int fd) {
- int64_t current_pos;
-#if defined(_WIN32)
- current_pos = _telli64(fd);
- if (current_pos == -1) {
- return Status::IOError("_telli64 failed");
- }
-#else
- current_pos = lseek64_compat(fd, 0, SEEK_CUR);
- CHECK_LSEEK(current_pos);
-#endif
- return current_pos;
-}
-
-Result<Pipe> CreatePipe() {
- int ret;
- int fd[2];
-#if defined(_WIN32)
- ret = _pipe(fd, 4096, _O_BINARY);
-#else
- ret = pipe(fd);
-#endif
-
- if (ret == -1) {
- return IOErrorFromErrno(errno, "Error creating pipe");
- }
- return Pipe{fd[0], fd[1]};
-}
-
-static Status StatusFromMmapErrno(const char* prefix) {
-#ifdef _WIN32
- errno = __map_mman_error(GetLastError(), EPERM);
-#endif
- return IOErrorFromErrno(errno, prefix);
-}
-
-namespace {
-
-int64_t GetPageSizeInternal() {
-#if defined(__APPLE__)
- return getpagesize();
-#elif defined(_WIN32)
- SYSTEM_INFO si;
- GetSystemInfo(&si);
- return si.dwPageSize;
-#else
- errno = 0;
- const auto ret = sysconf(_SC_PAGESIZE);
- if (ret == -1) {
- ARROW_LOG(FATAL) << "sysconf(_SC_PAGESIZE) failed: " << ErrnoMessage(errno);
- }
- return static_cast<int64_t>(ret);
-#endif
-}
-
-} // namespace
-
-int64_t GetPageSize() {
- static const int64_t kPageSize = GetPageSizeInternal(); // cache it
- return kPageSize;
-}
-
-//
-// Compatible way to remap a memory map
-//
-
-Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
- void** new_addr) {
- // should only be called with writable files
- *new_addr = MAP_FAILED;
-#ifdef _WIN32
- // flags are ignored on windows
- HANDLE fm, h;
-
- if (!UnmapViewOfFile(addr)) {
- return StatusFromMmapErrno("UnmapViewOfFile failed");
- }
-
- h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
- if (h == INVALID_HANDLE_VALUE) {
- return StatusFromMmapErrno("Cannot get file handle");
- }
-
- uint64_t new_size64 = new_size;
- LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
- LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
-
- SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
- SetEndOfFile(h);
- fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
- if (fm == NULL) {
- return StatusFromMmapErrno("CreateFileMapping failed");
- }
- *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
- CloseHandle(fm);
- if (new_addr == NULL) {
- return StatusFromMmapErrno("MapViewOfFile failed");
- }
- return Status::OK();
+ return false;
+ }
+ if (create_parents && errno == ENOENT) {
+ auto parent_path = dir_path.Parent();
+ if (parent_path != dir_path) {
+ RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
+ return DoCreateDir(dir_path, false); // Retry
+ }
+ }
+ return IOErrorFromErrno(errno, "Cannot create directory '", dir_path.ToString(), "'");
+#endif
+}
+
+} // namespace
+
+Result<bool> CreateDir(const PlatformFilename& dir_path) {
+ return DoCreateDir(dir_path, false);
+}
+
+Result<bool> CreateDirTree(const PlatformFilename& dir_path) {
+ return DoCreateDir(dir_path, true);
+}
+
+#ifdef _WIN32
+
+namespace {
+
+void FindHandleDeleter(HANDLE* handle) {
+ if (!FindClose(*handle)) {
+ ARROW_LOG(WARNING) << "Cannot close directory handle: "
+ << WinErrorMessage(GetLastError());
+ }
+}
+
+std::wstring PathWithoutTrailingSlash(const PlatformFilename& fn) {
+ std::wstring path = fn.ToNative();
+ while (!path.empty() && path.back() == kNativeSep) {
+ path.pop_back();
+ }
+ return path;
+}
+
+Result<std::vector<WIN32_FIND_DATAW>> ListDirInternal(const PlatformFilename& dir_path) {
+ WIN32_FIND_DATAW find_data;
+ std::wstring pattern = PathWithoutTrailingSlash(dir_path) + L"\\*.*";
+ HANDLE handle = FindFirstFileW(pattern.c_str(), &find_data);
+ if (handle == INVALID_HANDLE_VALUE) {
+ return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
+ dir_path.ToString(), "'");
+ }
+
+ std::unique_ptr<HANDLE, decltype(&FindHandleDeleter)> handle_guard(&handle,
+ FindHandleDeleter);
+
+ std::vector<WIN32_FIND_DATAW> results;
+ do {
+ // Skip "." and ".."
+ if (find_data.cFileName[0] == L'.') {
+ if (find_data.cFileName[1] == L'\0' ||
+ (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == L'\0')) {
+ continue;
+ }
+ }
+ results.push_back(find_data);
+ } while (FindNextFileW(handle, &find_data));
+
+ int errnum = GetLastError();
+ if (errnum != ERROR_NO_MORE_FILES) {
+ return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
+ dir_path.ToString(), "'");
+ }
+ return results;
+}
+
+Status FindOneFile(const PlatformFilename& fn, WIN32_FIND_DATAW* find_data,
+ bool* exists = nullptr) {
+ HANDLE handle = FindFirstFileW(PathWithoutTrailingSlash(fn).c_str(), find_data);
+ if (handle == INVALID_HANDLE_VALUE) {
+ int errnum = GetLastError();
+ if (exists == nullptr ||
+ (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND)) {
+ return IOErrorFromWinError(GetLastError(), "Cannot get information for path '",
+ fn.ToString(), "'");
+ }
+ *exists = false;
+ } else {
+ if (exists != nullptr) {
+ *exists = true;
+ }
+ FindHandleDeleter(&handle);
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
+ ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
+
+ std::vector<PlatformFilename> results;
+ results.reserve(entries.size());
+ for (const auto& entry : entries) {
+ results.emplace_back(std::wstring(entry.cFileName));
+ }
+ return results;
+}
+
+#else
+
+Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
+ DIR* dir = opendir(dir_path.ToNative().c_str());
+ if (dir == nullptr) {
+ return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
+ }
+
+ auto dir_deleter = [](DIR* dir) -> void {
+ if (closedir(dir) != 0) {
+ ARROW_LOG(WARNING) << "Cannot close directory handle: " << ErrnoMessage(errno);
+ }
+ };
+ std::unique_ptr<DIR, decltype(dir_deleter)> dir_guard(dir, dir_deleter);
+
+ std::vector<PlatformFilename> results;
+ errno = 0;
+ struct dirent* entry = readdir(dir);
+ while (entry != nullptr) {
+ std::string path = entry->d_name;
+ if (path != "." && path != "..") {
+ results.emplace_back(std::move(path));
+ }
+ entry = readdir(dir);
+ }
+ if (errno != 0) {
+ return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
+ }
+ return results;
+}
+
+#endif
+
+namespace {
+
+#ifdef _WIN32
+
+Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
+
+// Remove a directory entry that's always a directory
+Status DeleteDirEntryDir(const PlatformFilename& path, const WIN32_FIND_DATAW& entry,
+ bool remove_top_dir = true) {
+ if ((entry.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) {
+ // It's a directory that doesn't have a reparse point => recurse
+ RETURN_NOT_OK(DeleteDirTreeInternal(path));
+ }
+ if (remove_top_dir) {
+ // Remove now empty directory or reparse point (e.g. symlink to dir)
+ if (!RemoveDirectoryW(path.ToNative().c_str())) {
+ return IOErrorFromWinError(GetLastError(), "Cannot delete directory entry '",
+ path.ToString(), "': ");
+ }
+ }
+ return Status::OK();
+}
+
+Status DeleteDirEntry(const PlatformFilename& path, const WIN32_FIND_DATAW& entry) {
+ if ((entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) {
+ return DeleteDirEntryDir(path, entry);
+ }
+ // It's a non-directory entry, most likely a regular file
+ if (!DeleteFileW(path.ToNative().c_str())) {
+ return IOErrorFromWinError(GetLastError(), "Cannot delete file '", path.ToString(),
+ "': ");
+ }
+ return Status::OK();
+}
+
+Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
+ ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
+ for (const auto& entry : entries) {
+ PlatformFilename path = dir_path.Join(PlatformFilename(entry.cFileName));
+ RETURN_NOT_OK(DeleteDirEntry(path, entry));
+ }
+ return Status::OK();
+}
+
+Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
+ bool remove_top_dir) {
+ bool exists = true;
+ WIN32_FIND_DATAW entry;
+ if (allow_not_found) {
+ RETURN_NOT_OK(FindOneFile(dir_path, &entry, &exists));
+ } else {
+ // Will raise if dir_path does not exist
+ RETURN_NOT_OK(FindOneFile(dir_path, &entry));
+ }
+ if (exists) {
+ RETURN_NOT_OK(DeleteDirEntryDir(dir_path, entry, remove_top_dir));
+ }
+ return exists;
+}
+
+#else // POSIX
+
+Status LinkStat(const PlatformFilename& path, struct stat* lst, bool* exists = nullptr) {
+ if (lstat(path.ToNative().c_str(), lst) != 0) {
+ if (exists == nullptr || (errno != ENOENT && errno != ENOTDIR && errno != ELOOP)) {
+ return IOErrorFromErrno(errno, "Cannot get information for path '", path.ToString(),
+ "'");
+ }
+ *exists = false;
+ } else if (exists != nullptr) {
+ *exists = true;
+ }
+ return Status::OK();
+}
+
+Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
+
+Status DeleteDirEntryDir(const PlatformFilename& path, const struct stat& lst,
+ bool remove_top_dir = true) {
+ if (!S_ISLNK(lst.st_mode)) {
+ // Not a symlink => delete contents recursively
+ DCHECK(S_ISDIR(lst.st_mode));
+ RETURN_NOT_OK(DeleteDirTreeInternal(path));
+ if (remove_top_dir && rmdir(path.ToNative().c_str()) != 0) {
+ return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
+ "'");
+ }
+ } else {
+ // Remove symlink
+ if (remove_top_dir && unlink(path.ToNative().c_str()) != 0) {
+ return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
+ "'");
+ }
+ }
+ return Status::OK();
+}
+
+Status DeleteDirEntry(const PlatformFilename& path, const struct stat& lst) {
+ if (S_ISDIR(lst.st_mode)) {
+ return DeleteDirEntryDir(path, lst);
+ }
+ if (unlink(path.ToNative().c_str()) != 0) {
+ return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
+ "'");
+ }
+ return Status::OK();
+}
+
+Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
+ ARROW_ASSIGN_OR_RAISE(auto children, ListDir(dir_path));
+ for (const auto& child : children) {
+ struct stat lst;
+ PlatformFilename full_path = dir_path.Join(child);
+ RETURN_NOT_OK(LinkStat(full_path, &lst));
+ RETURN_NOT_OK(DeleteDirEntry(full_path, lst));
+ }
+ return Status::OK();
+}
+
+Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
+ bool remove_top_dir) {
+ bool exists = true;
+ struct stat lst;
+ if (allow_not_found) {
+ RETURN_NOT_OK(LinkStat(dir_path, &lst, &exists));
+ } else {
+ // Will raise if dir_path does not exist
+ RETURN_NOT_OK(LinkStat(dir_path, &lst));
+ }
+ if (exists) {
+ if (!S_ISDIR(lst.st_mode) && !S_ISLNK(lst.st_mode)) {
+ return Status::IOError("Cannot delete directory '", dir_path.ToString(),
+ "': not a directory");
+ }
+ RETURN_NOT_OK(DeleteDirEntryDir(dir_path, lst, remove_top_dir));
+ }
+ return exists;
+}
+
+#endif
+
+} // namespace
+
+Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found) {
+ return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/false);
+}
+
+Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found) {
+ return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/true);
+}
+
+Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found) {
+#ifdef _WIN32
+ if (DeleteFileW(file_path.ToNative().c_str())) {
+ return true;
+ } else {
+ int errnum = GetLastError();
+ if (!allow_not_found || errnum != ERROR_FILE_NOT_FOUND) {
+ return IOErrorFromWinError(GetLastError(), "Cannot delete file '",
+ file_path.ToString(), "'");
+ }
+ }
+#else
+ if (unlink(file_path.ToNative().c_str()) == 0) {
+ return true;
+ } else {
+ if (!allow_not_found || errno != ENOENT) {
+ return IOErrorFromErrno(errno, "Cannot delete file '", file_path.ToString(), "'");
+ }
+ }
+#endif
+ return false;
+}
+
+Result<bool> FileExists(const PlatformFilename& path) {
+#ifdef _WIN32
+ if (GetFileAttributesW(path.ToNative().c_str()) != INVALID_FILE_ATTRIBUTES) {
+ return true;
+ } else {
+ int errnum = GetLastError();
+ if (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND) {
+ return IOErrorFromWinError(GetLastError(), "Failed getting information for path '",
+ path.ToString(), "'");
+ }
+ return false;
+ }
+#else
+ struct stat st;
+ if (stat(path.ToNative().c_str(), &st) == 0) {
+ return true;
+ } else {
+ if (errno != ENOENT && errno != ENOTDIR) {
+ return IOErrorFromErrno(errno, "Failed getting information for path '",
+ path.ToString(), "'");
+ }
+ return false;
+ }
+#endif
+}
+
+//
+// Functions for creating file descriptors
+//
+
+#define CHECK_LSEEK(retval) \
+ if ((retval) == -1) return Status::IOError("lseek failed");
+
+static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
+#if defined(_WIN32)
+ return _lseeki64(fd, pos, whence);
+#else
+ return lseek(fd, pos, whence);
+#endif
+}
+
+static inline Result<int> CheckFileOpResult(int fd_ret, int errno_actual,
+ const PlatformFilename& file_name,
+ const char* opname) {
+ if (fd_ret == -1) {
+#ifdef _WIN32
+ int winerr = GetLastError();
+ if (winerr != ERROR_SUCCESS) {
+ return IOErrorFromWinError(GetLastError(), "Failed to ", opname, " file '",
+ file_name.ToString(), "'");
+ }
+#endif
+ return IOErrorFromErrno(errno_actual, "Failed to ", opname, " file '",
+ file_name.ToString(), "'");
+ }
+ return fd_ret;
+}
+
+Result<int> FileOpenReadable(const PlatformFilename& file_name) {
+ int fd, errno_actual;
+#if defined(_WIN32)
+ SetLastError(0);
+ HANDLE file_handle = CreateFileW(file_name.ToNative().c_str(), GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+ DWORD last_error = GetLastError();
+ if (last_error == ERROR_SUCCESS) {
+ errno_actual = 0;
+ fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle),
+ _O_RDONLY | _O_BINARY | _O_NOINHERIT);
+ } else {
+ return IOErrorFromWinError(last_error, "Failed to open local file '",
+ file_name.ToString(), "'");
+ }
+#else
+ fd = open(file_name.ToNative().c_str(), O_RDONLY);
+ errno_actual = errno;
+
+ if (fd >= 0) {
+ // open(O_RDONLY) succeeds on directories, check for it
+ struct stat st;
+ int ret = fstat(fd, &st);
+ if (ret == -1) {
+ ARROW_UNUSED(FileClose(fd));
+ // Will propagate error below
+ } else if (S_ISDIR(st.st_mode)) {
+ ARROW_UNUSED(FileClose(fd));
+ return Status::IOError("Cannot open for reading: path '", file_name.ToString(),
+ "' is a directory");
+ }
+ }
+#endif
+
+ return CheckFileOpResult(fd, errno_actual, file_name, "open local");
+}
+
+Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only,
+ bool truncate, bool append) {
+ int fd, errno_actual;
+
+#if defined(_WIN32)
+ SetLastError(0);
+ int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
+ DWORD desired_access = GENERIC_WRITE;
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ DWORD creation_disposition = OPEN_ALWAYS;
+
+ if (append) {
+ oflag |= _O_APPEND;
+ }
+
+ if (truncate) {
+ oflag |= _O_TRUNC;
+ creation_disposition = CREATE_ALWAYS;
+ }
+
+ if (write_only) {
+ oflag |= _O_WRONLY;
+ } else {
+ oflag |= _O_RDWR;
+ desired_access |= GENERIC_READ;
+ }
+
+ HANDLE file_handle =
+ CreateFileW(file_name.ToNative().c_str(), desired_access, share_mode, NULL,
+ creation_disposition, FILE_ATTRIBUTE_NORMAL, NULL);
+
+ DWORD last_error = GetLastError();
+ if (last_error == ERROR_SUCCESS || last_error == ERROR_ALREADY_EXISTS) {
+ errno_actual = 0;
+ fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle), oflag);
+ } else {
+ return IOErrorFromWinError(last_error, "Failed to open local file '",
+ file_name.ToString(), "'");
+ }
+#else
+ int oflag = O_CREAT;
+
+ if (truncate) {
+ oflag |= O_TRUNC;
+ }
+ if (append) {
+ oflag |= O_APPEND;
+ }
+
+ if (write_only) {
+ oflag |= O_WRONLY;
+ } else {
+ oflag |= O_RDWR;
+ }
+
+ fd = open(file_name.ToNative().c_str(), oflag, 0666);
+ errno_actual = errno;
+#endif
+
+ RETURN_NOT_OK(CheckFileOpResult(fd, errno_actual, file_name, "open local"));
+ if (append) {
+ // Seek to end, as O_APPEND does not necessarily do it
+ auto ret = lseek64_compat(fd, 0, SEEK_END);
+ if (ret == -1) {
+ ARROW_UNUSED(FileClose(fd));
+ return Status::IOError("lseek failed");
+ }
+ }
+ return fd;
+}
+
+Result<int64_t> FileTell(int fd) {
+ int64_t current_pos;
+#if defined(_WIN32)
+ current_pos = _telli64(fd);
+ if (current_pos == -1) {
+ return Status::IOError("_telli64 failed");
+ }
+#else
+ current_pos = lseek64_compat(fd, 0, SEEK_CUR);
+ CHECK_LSEEK(current_pos);
+#endif
+ return current_pos;
+}
+
+Result<Pipe> CreatePipe() {
+ int ret;
+ int fd[2];
+#if defined(_WIN32)
+ ret = _pipe(fd, 4096, _O_BINARY);
+#else
+ ret = pipe(fd);
+#endif
+
+ if (ret == -1) {
+ return IOErrorFromErrno(errno, "Error creating pipe");
+ }
+ return Pipe{fd[0], fd[1]};
+}
+
+static Status StatusFromMmapErrno(const char* prefix) {
+#ifdef _WIN32
+ errno = __map_mman_error(GetLastError(), EPERM);
+#endif
+ return IOErrorFromErrno(errno, prefix);
+}
+
+namespace {
+
+int64_t GetPageSizeInternal() {
+#if defined(__APPLE__)
+ return getpagesize();
+#elif defined(_WIN32)
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ return si.dwPageSize;
+#else
+ errno = 0;
+ const auto ret = sysconf(_SC_PAGESIZE);
+ if (ret == -1) {
+ ARROW_LOG(FATAL) << "sysconf(_SC_PAGESIZE) failed: " << ErrnoMessage(errno);
+ }
+ return static_cast<int64_t>(ret);
+#endif
+}
+
+} // namespace
+
+int64_t GetPageSize() {
+ static const int64_t kPageSize = GetPageSizeInternal(); // cache it
+ return kPageSize;
+}
+
+//
+// Compatible way to remap a memory map
+//
+
+Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
+ void** new_addr) {
+ // should only be called with writable files
+ *new_addr = MAP_FAILED;
+#ifdef _WIN32
+ // flags are ignored on windows
+ HANDLE fm, h;
+
+ if (!UnmapViewOfFile(addr)) {
+ return StatusFromMmapErrno("UnmapViewOfFile failed");
+ }
+
+ h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
+ if (h == INVALID_HANDLE_VALUE) {
+ return StatusFromMmapErrno("Cannot get file handle");
+ }
+
+ uint64_t new_size64 = new_size;
+ LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
+ LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
+
+ SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
+ SetEndOfFile(h);
+ fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
+ if (fm == NULL) {
+ return StatusFromMmapErrno("CreateFileMapping failed");
+ }
+ *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
+ CloseHandle(fm);
+ if (new_addr == NULL) {
+ return StatusFromMmapErrno("MapViewOfFile failed");
+ }
+ return Status::OK();
#elif defined(__linux__)
if (ftruncate(fildes, new_size) == -1) {
return StatusFromMmapErrno("ftruncate failed");
@@ -1028,450 +1028,450 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
return StatusFromMmapErrno("mremap failed");
}
return Status::OK();
-#else
- // we have to close the mmap first, truncate the file to the new size
- // and recreate the mmap
- if (munmap(addr, old_size) == -1) {
- return StatusFromMmapErrno("munmap failed");
- }
- if (ftruncate(fildes, new_size) == -1) {
- return StatusFromMmapErrno("ftruncate failed");
- }
- // we set READ / WRITE flags on the new map, since we could only have
- // unlarged a RW map in the first place
- *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
- if (*new_addr == MAP_FAILED) {
- return StatusFromMmapErrno("mmap failed");
- }
- return Status::OK();
-#endif
-}
-
-Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
- const auto page_size = static_cast<size_t>(GetPageSize());
- DCHECK_GT(page_size, 0);
- const size_t page_mask = ~(page_size - 1);
- DCHECK_EQ(page_mask & page_size, page_size);
-
- auto align_region = [=](const MemoryRegion& region) -> MemoryRegion {
- const auto addr = reinterpret_cast<uintptr_t>(region.addr);
- const auto aligned_addr = addr & page_mask;
- DCHECK_LT(addr - aligned_addr, page_size);
- return {reinterpret_cast<void*>(aligned_addr),
- region.size + static_cast<size_t>(addr - aligned_addr)};
- };
-
-#ifdef _WIN32
- // PrefetchVirtualMemory() is available on Windows 8 or later
- struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY
- void* VirtualAddress;
- size_t NumberOfBytes;
-
- PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit
- : VirtualAddress(region.addr), NumberOfBytes(region.size) {}
- };
- using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG);
- static const auto prefetch_virtual_memory = reinterpret_cast<PrefetchVirtualMemoryFunc>(
- GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory"));
- if (prefetch_virtual_memory != nullptr) {
- std::vector<PrefetchEntry> entries;
- entries.reserve(regions.size());
- for (const auto& region : regions) {
- if (region.size != 0) {
- entries.emplace_back(align_region(region));
- }
- }
- if (!entries.empty() &&
- !prefetch_virtual_memory(GetCurrentProcess(),
- static_cast<ULONG_PTR>(entries.size()), entries.data(),
- 0)) {
- return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed");
- }
- }
- return Status::OK();
+#else
+ // we have to close the mmap first, truncate the file to the new size
+ // and recreate the mmap
+ if (munmap(addr, old_size) == -1) {
+ return StatusFromMmapErrno("munmap failed");
+ }
+ if (ftruncate(fildes, new_size) == -1) {
+ return StatusFromMmapErrno("ftruncate failed");
+ }
+ // we set READ / WRITE flags on the new map, since we could only have
+ // unlarged a RW map in the first place
+ *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
+ if (*new_addr == MAP_FAILED) {
+ return StatusFromMmapErrno("mmap failed");
+ }
+ return Status::OK();
+#endif
+}
+
+Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
+ const auto page_size = static_cast<size_t>(GetPageSize());
+ DCHECK_GT(page_size, 0);
+ const size_t page_mask = ~(page_size - 1);
+ DCHECK_EQ(page_mask & page_size, page_size);
+
+ auto align_region = [=](const MemoryRegion& region) -> MemoryRegion {
+ const auto addr = reinterpret_cast<uintptr_t>(region.addr);
+ const auto aligned_addr = addr & page_mask;
+ DCHECK_LT(addr - aligned_addr, page_size);
+ return {reinterpret_cast<void*>(aligned_addr),
+ region.size + static_cast<size_t>(addr - aligned_addr)};
+ };
+
+#ifdef _WIN32
+ // PrefetchVirtualMemory() is available on Windows 8 or later
+ struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY
+ void* VirtualAddress;
+ size_t NumberOfBytes;
+
+ PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit
+ : VirtualAddress(region.addr), NumberOfBytes(region.size) {}
+ };
+ using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG);
+ static const auto prefetch_virtual_memory = reinterpret_cast<PrefetchVirtualMemoryFunc>(
+ GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory"));
+ if (prefetch_virtual_memory != nullptr) {
+ std::vector<PrefetchEntry> entries;
+ entries.reserve(regions.size());
+ for (const auto& region : regions) {
+ if (region.size != 0) {
+ entries.emplace_back(align_region(region));
+ }
+ }
+ if (!entries.empty() &&
+ !prefetch_virtual_memory(GetCurrentProcess(),
+ static_cast<ULONG_PTR>(entries.size()), entries.data(),
+ 0)) {
+ return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed");
+ }
+ }
+ return Status::OK();
#elif defined(POSIX_MADV_WILLNEED)
- for (const auto& region : regions) {
- if (region.size != 0) {
- const auto aligned = align_region(region);
- int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED);
- // EBADF can be returned on Linux in the following cases:
- // - the kernel version is older than 3.9
- // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577)
- if (err != 0 && err != EBADF) {
- return IOErrorFromErrno(err, "posix_madvise failed");
- }
- }
- }
- return Status::OK();
-#else
- return Status::OK();
-#endif
-}
-
-//
-// Closing files
-//
-
-Status FileClose(int fd) {
- int ret;
-
-#if defined(_WIN32)
- ret = static_cast<int>(_close(fd));
+ for (const auto& region : regions) {
+ if (region.size != 0) {
+ const auto aligned = align_region(region);
+ int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED);
+ // EBADF can be returned on Linux in the following cases:
+ // - the kernel version is older than 3.9
+ // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577)
+ if (err != 0 && err != EBADF) {
+ return IOErrorFromErrno(err, "posix_madvise failed");
+ }
+ }
+ }
+ return Status::OK();
#else
- ret = static_cast<int>(close(fd));
-#endif
-
- if (ret == -1) {
- return Status::IOError("error closing file");
- }
return Status::OK();
-}
-
-//
-// Seeking and telling
-//
-
-Status FileSeek(int fd, int64_t pos, int whence) {
- int64_t ret = lseek64_compat(fd, pos, whence);
- CHECK_LSEEK(ret);
- return Status::OK();
-}
-
-Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
-
-Result<int64_t> FileGetSize(int fd) {
-#if defined(_WIN32)
- struct __stat64 st;
-#else
- struct stat st;
-#endif
- st.st_size = -1;
-
-#if defined(_WIN32)
- int ret = _fstat64(fd, &st);
-#else
- int ret = fstat(fd, &st);
-#endif
-
- if (ret == -1) {
- return Status::IOError("error stat()ing file");
- }
- if (st.st_size == 0) {
- // Maybe the file doesn't support getting its size, double-check by
- // trying to tell() (seekable files usually have a size, while
- // non-seekable files don't)
- RETURN_NOT_OK(FileTell(fd));
- } else if (st.st_size < 0) {
- return Status::IOError("error getting file size");
- }
- return st.st_size;
-}
-
-//
-// Reading data
-//
-
-static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
-#if defined(_WIN32)
- HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
- DWORD dwBytesRead = 0;
- OVERLAPPED overlapped = {0};
- overlapped.Offset = static_cast<uint32_t>(pos);
- overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
-
- // Note: ReadFile() will update the file position
- BOOL bRet =
- ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
- if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
- return dwBytesRead;
- } else {
- return -1;
- }
-#else
- return static_cast<int64_t>(
- pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
-#endif
-}
-
-Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes) {
- int64_t bytes_read = 0;
-
- while (bytes_read < nbytes) {
- int64_t chunksize =
- std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
-#if defined(_WIN32)
- int64_t ret =
- static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
-#else
- int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
-#endif
-
- if (ret == -1) {
- return IOErrorFromErrno(errno, "Error reading bytes from file");
- }
- if (ret == 0) {
- // EOF
- break;
- }
- buffer += ret;
- bytes_read += ret;
- }
- return bytes_read;
-}
-
-Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes) {
- int64_t bytes_read = 0;
-
- while (bytes_read < nbytes) {
- int64_t chunksize =
- std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
- int64_t ret = pread_compat(fd, buffer, chunksize, position);
-
- if (ret == -1) {
- return IOErrorFromErrno(errno, "Error reading bytes from file");
- }
- if (ret == 0) {
- // EOF
- break;
- }
- buffer += ret;
- position += ret;
- bytes_read += ret;
- }
- return bytes_read;
-}
-
-//
-// Writing data
-//
-
-Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
- int ret = 0;
- int64_t bytes_written = 0;
-
- while (ret != -1 && bytes_written < nbytes) {
- int64_t chunksize =
- std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
-#if defined(_WIN32)
- ret = static_cast<int>(
- _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
-#else
- ret = static_cast<int>(
- write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
-#endif
-
- if (ret != -1) {
- bytes_written += ret;
- }
- }
-
- if (ret == -1) {
- return IOErrorFromErrno(errno, "Error writing bytes to file");
- }
- return Status::OK();
-}
-
-Status FileTruncate(int fd, const int64_t size) {
- int ret, errno_actual;
-
-#ifdef _WIN32
- errno_actual = _chsize_s(fd, static_cast<size_t>(size));
- ret = errno_actual == 0 ? 0 : -1;
-#else
- ret = ftruncate(fd, static_cast<size_t>(size));
- errno_actual = errno;
-#endif
-
- if (ret == -1) {
- return IOErrorFromErrno(errno_actual, "Error writing bytes to file");
- }
- return Status::OK();
-}
-
-//
-// Environment variables
-//
-
-Result<std::string> GetEnvVar(const char* name) {
-#ifdef _WIN32
- // On Windows, getenv() reads an early copy of the process' environment
- // which doesn't get updated when SetEnvironmentVariable() is called.
- constexpr int32_t bufsize = 2000;
- char c_str[bufsize];
- auto res = GetEnvironmentVariableA(name, c_str, bufsize);
- if (res >= bufsize) {
- return Status::CapacityError("environment variable value too long");
- } else if (res == 0) {
- return Status::KeyError("environment variable undefined");
- }
- return std::string(c_str);
-#else
- char* c_str = getenv(name);
- if (c_str == nullptr) {
- return Status::KeyError("environment variable undefined");
- }
- return std::string(c_str);
-#endif
-}
-
-Result<std::string> GetEnvVar(const std::string& name) { return GetEnvVar(name.c_str()); }
-
-#ifdef _WIN32
-Result<NativePathString> GetEnvVarNative(const std::string& name) {
- NativePathString w_name;
- constexpr int32_t bufsize = 2000;
- wchar_t w_str[bufsize];
-
- ARROW_ASSIGN_OR_RAISE(w_name, StringToNative(name));
- auto res = GetEnvironmentVariableW(w_name.c_str(), w_str, bufsize);
- if (res >= bufsize) {
- return Status::CapacityError("environment variable value too long");
- } else if (res == 0) {
- return Status::KeyError("environment variable undefined");
- }
- return NativePathString(w_str);
-}
-
-Result<NativePathString> GetEnvVarNative(const char* name) {
- return GetEnvVarNative(std::string(name));
-}
-
-#else
-
-Result<NativePathString> GetEnvVarNative(const std::string& name) {
- return GetEnvVar(name);
-}
-
-Result<NativePathString> GetEnvVarNative(const char* name) { return GetEnvVar(name); }
-#endif
-
-Status SetEnvVar(const char* name, const char* value) {
-#ifdef _WIN32
- if (SetEnvironmentVariableA(name, value)) {
- return Status::OK();
- } else {
- return Status::Invalid("failed setting environment variable");
- }
-#else
- if (setenv(name, value, 1) == 0) {
- return Status::OK();
- } else {
- return Status::Invalid("failed setting environment variable");
- }
-#endif
-}
-
-Status SetEnvVar(const std::string& name, const std::string& value) {
- return SetEnvVar(name.c_str(), value.c_str());
-}
-
-Status DelEnvVar(const char* name) {
-#ifdef _WIN32
- if (SetEnvironmentVariableA(name, nullptr)) {
- return Status::OK();
- } else {
- return Status::Invalid("failed deleting environment variable");
- }
-#else
- if (unsetenv(name) == 0) {
- return Status::OK();
- } else {
- return Status::Invalid("failed deleting environment variable");
- }
-#endif
-}
-
-Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
-
-//
-// Temporary directories
-//
-
-namespace {
-
-#if _WIN32
-NativePathString GetWindowsDirectoryPath() {
- auto size = GetWindowsDirectoryW(nullptr, 0);
- ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
- std::vector<wchar_t> w_str(size);
- size = GetWindowsDirectoryW(w_str.data(), size);
- ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
- return {w_str.data(), size};
-}
-#endif
-
-// Return a list of preferred locations for temporary files
-std::vector<NativePathString> GetPlatformTemporaryDirs() {
- struct TempDirSelector {
- std::string env_var;
- NativePathString path_append;
- };
-
- std::vector<TempDirSelector> selectors;
- NativePathString fallback_tmp;
-
-#if _WIN32
- selectors = {
- {"TMP", L""}, {"TEMP", L""}, {"LOCALAPPDATA", L"Temp"}, {"USERPROFILE", L"Temp"}};
- fallback_tmp = GetWindowsDirectoryPath();
-
-#else
- selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
-#ifdef __ANDROID__
- fallback_tmp = "/data/local/tmp";
-#else
- fallback_tmp = "/tmp";
-#endif
-#endif
-
- std::vector<NativePathString> temp_dirs;
- for (const auto& sel : selectors) {
- auto result = GetEnvVarNative(sel.env_var);
- if (result.status().IsKeyError()) {
- // Environment variable absent, skip
- continue;
- }
- if (!result.ok()) {
- ARROW_LOG(WARNING) << "Failed getting env var '" << sel.env_var
- << "': " << result.status().ToString();
- continue;
- }
- NativePathString p = *std::move(result);
- if (p.empty()) {
- // Environment variable set to empty string, skip
- continue;
- }
- if (sel.path_append.empty()) {
- temp_dirs.push_back(p);
- } else {
- temp_dirs.push_back(p + kNativeSep + sel.path_append);
- }
- }
- temp_dirs.push_back(fallback_tmp);
- return temp_dirs;
-}
-
-std::string MakeRandomName(int num_chars) {
- static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
- std::default_random_engine gen(
- static_cast<std::default_random_engine::result_type>(GetRandomSeed()));
- std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 1));
-
- std::string s;
- s.reserve(num_chars);
- for (int i = 0; i < num_chars; ++i) {
- s += chars[dist(gen)];
- }
- return s;
-}
-
-} // namespace
-
-Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
+#endif
+}
+
+//
+// Closing files
+//
+
+Status FileClose(int fd) {
+ int ret;
+
+#if defined(_WIN32)
+ ret = static_cast<int>(_close(fd));
+#else
+ ret = static_cast<int>(close(fd));
+#endif
+
+ if (ret == -1) {
+ return Status::IOError("error closing file");
+ }
+ return Status::OK();
+}
+
+//
+// Seeking and telling
+//
+
+Status FileSeek(int fd, int64_t pos, int whence) {
+ int64_t ret = lseek64_compat(fd, pos, whence);
+ CHECK_LSEEK(ret);
+ return Status::OK();
+}
+
+Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
+
+Result<int64_t> FileGetSize(int fd) {
+#if defined(_WIN32)
+ struct __stat64 st;
+#else
+ struct stat st;
+#endif
+ st.st_size = -1;
+
+#if defined(_WIN32)
+ int ret = _fstat64(fd, &st);
+#else
+ int ret = fstat(fd, &st);
+#endif
+
+ if (ret == -1) {
+ return Status::IOError("error stat()ing file");
+ }
+ if (st.st_size == 0) {
+ // Maybe the file doesn't support getting its size, double-check by
+ // trying to tell() (seekable files usually have a size, while
+ // non-seekable files don't)
+ RETURN_NOT_OK(FileTell(fd));
+ } else if (st.st_size < 0) {
+ return Status::IOError("error getting file size");
+ }
+ return st.st_size;
+}
+
+//
+// Reading data
+//
+
+static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
+#if defined(_WIN32)
+ HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+ DWORD dwBytesRead = 0;
+ OVERLAPPED overlapped = {0};
+ overlapped.Offset = static_cast<uint32_t>(pos);
+ overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
+
+ // Note: ReadFile() will update the file position
+ BOOL bRet =
+ ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
+ if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
+ return dwBytesRead;
+ } else {
+ return -1;
+ }
+#else
+ return static_cast<int64_t>(
+ pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
+#endif
+}
+
+Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes) {
+ int64_t bytes_read = 0;
+
+ while (bytes_read < nbytes) {
+ int64_t chunksize =
+ std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
+#if defined(_WIN32)
+ int64_t ret =
+ static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
+#else
+ int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
+#endif
+
+ if (ret == -1) {
+ return IOErrorFromErrno(errno, "Error reading bytes from file");
+ }
+ if (ret == 0) {
+ // EOF
+ break;
+ }
+ buffer += ret;
+ bytes_read += ret;
+ }
+ return bytes_read;
+}
+
+Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes) {
+ int64_t bytes_read = 0;
+
+ while (bytes_read < nbytes) {
+ int64_t chunksize =
+ std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
+ int64_t ret = pread_compat(fd, buffer, chunksize, position);
+
+ if (ret == -1) {
+ return IOErrorFromErrno(errno, "Error reading bytes from file");
+ }
+ if (ret == 0) {
+ // EOF
+ break;
+ }
+ buffer += ret;
+ position += ret;
+ bytes_read += ret;
+ }
+ return bytes_read;
+}
+
+//
+// Writing data
+//
+
+Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
+ int ret = 0;
+ int64_t bytes_written = 0;
+
+ while (ret != -1 && bytes_written < nbytes) {
+ int64_t chunksize =
+ std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
+#if defined(_WIN32)
+ ret = static_cast<int>(
+ _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
+#else
+ ret = static_cast<int>(
+ write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
+#endif
+
+ if (ret != -1) {
+ bytes_written += ret;
+ }
+ }
+
+ if (ret == -1) {
+ return IOErrorFromErrno(errno, "Error writing bytes to file");
+ }
+ return Status::OK();
+}
+
+Status FileTruncate(int fd, const int64_t size) {
+ int ret, errno_actual;
+
+#ifdef _WIN32
+ errno_actual = _chsize_s(fd, static_cast<size_t>(size));
+ ret = errno_actual == 0 ? 0 : -1;
+#else
+ ret = ftruncate(fd, static_cast<size_t>(size));
+ errno_actual = errno;
+#endif
+
+ if (ret == -1) {
+ return IOErrorFromErrno(errno_actual, "Error writing bytes to file");
+ }
+ return Status::OK();
+}
+
+//
+// Environment variables
+//
+
+Result<std::string> GetEnvVar(const char* name) {
+#ifdef _WIN32
+ // On Windows, getenv() reads an early copy of the process' environment
+ // which doesn't get updated when SetEnvironmentVariable() is called.
+ constexpr int32_t bufsize = 2000;
+ char c_str[bufsize];
+ auto res = GetEnvironmentVariableA(name, c_str, bufsize);
+ if (res >= bufsize) {
+ return Status::CapacityError("environment variable value too long");
+ } else if (res == 0) {
+ return Status::KeyError("environment variable undefined");
+ }
+ return std::string(c_str);
+#else
+ char* c_str = getenv(name);
+ if (c_str == nullptr) {
+ return Status::KeyError("environment variable undefined");
+ }
+ return std::string(c_str);
+#endif
+}
+
+Result<std::string> GetEnvVar(const std::string& name) { return GetEnvVar(name.c_str()); }
+
+#ifdef _WIN32
+Result<NativePathString> GetEnvVarNative(const std::string& name) {
+ NativePathString w_name;
+ constexpr int32_t bufsize = 2000;
+ wchar_t w_str[bufsize];
+
+ ARROW_ASSIGN_OR_RAISE(w_name, StringToNative(name));
+ auto res = GetEnvironmentVariableW(w_name.c_str(), w_str, bufsize);
+ if (res >= bufsize) {
+ return Status::CapacityError("environment variable value too long");
+ } else if (res == 0) {
+ return Status::KeyError("environment variable undefined");
+ }
+ return NativePathString(w_str);
+}
+
+Result<NativePathString> GetEnvVarNative(const char* name) {
+ return GetEnvVarNative(std::string(name));
+}
+
+#else
+
+Result<NativePathString> GetEnvVarNative(const std::string& name) {
+ return GetEnvVar(name);
+}
+
+Result<NativePathString> GetEnvVarNative(const char* name) { return GetEnvVar(name); }
+#endif
+
+Status SetEnvVar(const char* name, const char* value) {
+#ifdef _WIN32
+ if (SetEnvironmentVariableA(name, value)) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("failed setting environment variable");
+ }
+#else
+ if (setenv(name, value, 1) == 0) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("failed setting environment variable");
+ }
+#endif
+}
+
+Status SetEnvVar(const std::string& name, const std::string& value) {
+ return SetEnvVar(name.c_str(), value.c_str());
+}
+
+Status DelEnvVar(const char* name) {
+#ifdef _WIN32
+ if (SetEnvironmentVariableA(name, nullptr)) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("failed deleting environment variable");
+ }
+#else
+ if (unsetenv(name) == 0) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("failed deleting environment variable");
+ }
+#endif
+}
+
+Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
+
+//
+// Temporary directories
+//
+
+namespace {
+
+#if _WIN32
+NativePathString GetWindowsDirectoryPath() {
+ auto size = GetWindowsDirectoryW(nullptr, 0);
+ ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
+ std::vector<wchar_t> w_str(size);
+ size = GetWindowsDirectoryW(w_str.data(), size);
+ ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
+ return {w_str.data(), size};
+}
+#endif
+
+// Return a list of preferred locations for temporary files
+std::vector<NativePathString> GetPlatformTemporaryDirs() {
+ struct TempDirSelector {
+ std::string env_var;
+ NativePathString path_append;
+ };
+
+ std::vector<TempDirSelector> selectors;
+ NativePathString fallback_tmp;
+
+#if _WIN32
+ selectors = {
+ {"TMP", L""}, {"TEMP", L""}, {"LOCALAPPDATA", L"Temp"}, {"USERPROFILE", L"Temp"}};
+ fallback_tmp = GetWindowsDirectoryPath();
+
+#else
+ selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
+#ifdef __ANDROID__
+ fallback_tmp = "/data/local/tmp";
+#else
+ fallback_tmp = "/tmp";
+#endif
+#endif
+
+ std::vector<NativePathString> temp_dirs;
+ for (const auto& sel : selectors) {
+ auto result = GetEnvVarNative(sel.env_var);
+ if (result.status().IsKeyError()) {
+ // Environment variable absent, skip
+ continue;
+ }
+ if (!result.ok()) {
+ ARROW_LOG(WARNING) << "Failed getting env var '" << sel.env_var
+ << "': " << result.status().ToString();
+ continue;
+ }
+ NativePathString p = *std::move(result);
+ if (p.empty()) {
+ // Environment variable set to empty string, skip
+ continue;
+ }
+ if (sel.path_append.empty()) {
+ temp_dirs.push_back(p);
+ } else {
+ temp_dirs.push_back(p + kNativeSep + sel.path_append);
+ }
+ }
+ temp_dirs.push_back(fallback_tmp);
+ return temp_dirs;
+}
+
+std::string MakeRandomName(int num_chars) {
+ static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
+ std::default_random_engine gen(
+ static_cast<std::default_random_engine::result_type>(GetRandomSeed()));
+ std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 1));
+
+ std::string s;
+ s.reserve(num_chars);
+ for (int i = 0; i < num_chars; ++i) {
+ s += chars[dist(gen)];
+ }
+ return s;
+}
+
+} // namespace
+
+Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
const int kNumChars = 8;
- NativePathString base_name;
-
+ NativePathString base_name;
+
auto MakeBaseName = [&]() {
std::string suffix = MakeRandomName(kNumChars);
return StringToNative(prefix + suffix);
@@ -1499,101 +1499,101 @@ Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& pref
ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
- auto base_dirs = GetPlatformTemporaryDirs();
- DCHECK_NE(base_dirs.size(), 0);
-
+ auto base_dirs = GetPlatformTemporaryDirs();
+ DCHECK_NE(base_dirs.size(), 0);
+
for (const auto& base_dir : base_dirs) {
ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
if (ptr) {
return std::move(ptr);
- }
+ }
// Cannot create in this directory, try the next one
- }
-
+ }
+
return Status::IOError(
"Cannot create temporary subdirectory in any "
"of the platform temporary directories");
-}
-
-TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
-
-TemporaryDir::~TemporaryDir() {
- Status st = DeleteDirTree(path_).status();
- if (!st.ok()) {
- ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
- }
-}
-
-SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) {}
-
-SignalHandler::SignalHandler(Callback cb) {
-#if ARROW_HAVE_SIGACTION
- sa_.sa_handler = cb;
- sa_.sa_flags = 0;
- sigemptyset(&sa_.sa_mask);
-#else
- cb_ = cb;
-#endif
-}
-
-#if ARROW_HAVE_SIGACTION
-SignalHandler::SignalHandler(const struct sigaction& sa) {
- memcpy(&sa_, &sa, sizeof(sa));
-}
-#endif
-
-SignalHandler::Callback SignalHandler::callback() const {
-#if ARROW_HAVE_SIGACTION
- return sa_.sa_handler;
-#else
- return cb_;
-#endif
-}
-
-#if ARROW_HAVE_SIGACTION
-const struct sigaction& SignalHandler::action() const { return sa_; }
-#endif
-
-Result<SignalHandler> GetSignalHandler(int signum) {
-#if ARROW_HAVE_SIGACTION
- struct sigaction sa;
- int ret = sigaction(signum, nullptr, &sa);
- if (ret != 0) {
- // TODO more detailed message using errno
- return Status::IOError("sigaction call failed");
- }
- return SignalHandler(sa);
-#else
- // To read the old handler, set the signal handler to something else temporarily
- SignalHandler::Callback cb = signal(signum, SIG_IGN);
- if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) {
- // TODO more detailed message using errno
- return Status::IOError("signal call failed");
- }
- return SignalHandler(cb);
-#endif
-}
-
-Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler) {
-#if ARROW_HAVE_SIGACTION
- struct sigaction old_sa;
- int ret = sigaction(signum, &handler.action(), &old_sa);
- if (ret != 0) {
- // TODO more detailed message using errno
- return Status::IOError("sigaction call failed");
- }
- return SignalHandler(old_sa);
-#else
- SignalHandler::Callback cb = signal(signum, handler.callback());
- if (cb == SIG_ERR) {
- // TODO more detailed message using errno
- return Status::IOError("signal call failed");
- }
- return SignalHandler(cb);
-#endif
- return Status::OK();
-}
-
+}
+
+TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
+
+TemporaryDir::~TemporaryDir() {
+ Status st = DeleteDirTree(path_).status();
+ if (!st.ok()) {
+ ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
+ }
+}
+
+SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) {}
+
+SignalHandler::SignalHandler(Callback cb) {
+#if ARROW_HAVE_SIGACTION
+ sa_.sa_handler = cb;
+ sa_.sa_flags = 0;
+ sigemptyset(&sa_.sa_mask);
+#else
+ cb_ = cb;
+#endif
+}
+
+#if ARROW_HAVE_SIGACTION
+SignalHandler::SignalHandler(const struct sigaction& sa) {
+ memcpy(&sa_, &sa, sizeof(sa));
+}
+#endif
+
+SignalHandler::Callback SignalHandler::callback() const {
+#if ARROW_HAVE_SIGACTION
+ return sa_.sa_handler;
+#else
+ return cb_;
+#endif
+}
+
+#if ARROW_HAVE_SIGACTION
+const struct sigaction& SignalHandler::action() const { return sa_; }
+#endif
+
+Result<SignalHandler> GetSignalHandler(int signum) {
+#if ARROW_HAVE_SIGACTION
+ struct sigaction sa;
+ int ret = sigaction(signum, nullptr, &sa);
+ if (ret != 0) {
+ // TODO more detailed message using errno
+ return Status::IOError("sigaction call failed");
+ }
+ return SignalHandler(sa);
+#else
+ // To read the old handler, set the signal handler to something else temporarily
+ SignalHandler::Callback cb = signal(signum, SIG_IGN);
+ if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) {
+ // TODO more detailed message using errno
+ return Status::IOError("signal call failed");
+ }
+ return SignalHandler(cb);
+#endif
+}
+
+Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler) {
+#if ARROW_HAVE_SIGACTION
+ struct sigaction old_sa;
+ int ret = sigaction(signum, &handler.action(), &old_sa);
+ if (ret != 0) {
+ // TODO more detailed message using errno
+ return Status::IOError("sigaction call failed");
+ }
+ return SignalHandler(old_sa);
+#else
+ SignalHandler::Callback cb = signal(signum, handler.callback());
+ if (cb == SIG_ERR) {
+ // TODO more detailed message using errno
+ return Status::IOError("signal call failed");
+ }
+ return SignalHandler(cb);
+#endif
+ return Status::OK();
+}
+
void ReinstateSignalHandler(int signum, SignalHandler::Callback handler) {
#if !ARROW_HAVE_SIGACTION
// Cannot report any errors from signal() (but there shouldn't be any)
@@ -1627,8 +1627,8 @@ Status SendSignalToThread(int signum, uint64_t thread_id) {
#endif
}
-namespace {
-
+namespace {
+
int64_t GetPid() {
#ifdef _WIN32
return GetCurrentProcessId();
@@ -1637,34 +1637,34 @@ int64_t GetPid() {
#endif
}
-std::mt19937_64 GetSeedGenerator() {
- // Initialize Mersenne Twister PRNG with a true random seed.
+std::mt19937_64 GetSeedGenerator() {
+ // Initialize Mersenne Twister PRNG with a true random seed.
// Make sure to mix in process id to minimize risks of clashes when parallel testing.
-#ifdef ARROW_VALGRIND
- // Valgrind can crash, hang or enter an infinite loop on std::random_device,
- // use a crude initializer instead.
- const uint8_t dummy = 0;
- ARROW_UNUSED(dummy);
- std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
+#ifdef ARROW_VALGRIND
+ // Valgrind can crash, hang or enter an infinite loop on std::random_device,
+ // use a crude initializer instead.
+ const uint8_t dummy = 0;
+ ARROW_UNUSED(dummy);
+ std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
static_cast<uintptr_t>(GetPid()));
-#else
- std::random_device true_random;
- std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
+#else
+ std::random_device true_random;
+ std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
(static_cast<uint64_t>(true_random()) << 32) ^
static_cast<uint64_t>(GetPid()));
-#endif
- return seed_gen;
-}
-
-} // namespace
-
-int64_t GetRandomSeed() {
- // The process-global seed generator to aims to avoid calling std::random_device
- // unless truly necessary (it can block on some systems, see ARROW-10287).
- static auto seed_gen = GetSeedGenerator();
- return static_cast<int64_t>(seed_gen());
-}
-
+#endif
+ return seed_gen;
+}
+
+} // namespace
+
+int64_t GetRandomSeed() {
+ // The process-global seed generator to aims to avoid calling std::random_device
+ // unless truly necessary (it can block on some systems, see ARROW-10287).
+ static auto seed_gen = GetSeedGenerator();
+ return static_cast<int64_t>(seed_gen());
+}
+
uint64_t GetThreadId() {
uint64_t equiv{0};
// std::thread::id is trivially copyable as per C++ spec,
@@ -1681,5 +1681,5 @@ uint64_t GetOptionalThreadId() {
return (tid == 0) ? tid - 1 : tid;
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
index 4255dd37105..2627d5e7cbb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
@@ -1,241 +1,241 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#ifndef _WIN32
-#define ARROW_HAVE_SIGACTION 1
-#endif
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#if ARROW_HAVE_SIGACTION
-#include <signal.h> // Needed for struct sigaction
-#endif
-
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/windows_fixup.h"
-
-namespace arrow {
-namespace internal {
-
-// NOTE: 8-bit path strings on Windows are encoded using UTF-8.
-// Using MBCS would fail encoding some paths.
-
-#if defined(_WIN32)
-using NativePathString = std::wstring;
-#else
-using NativePathString = std::string;
-#endif
-
-class ARROW_EXPORT PlatformFilename {
- public:
- struct Impl;
-
- ~PlatformFilename();
- PlatformFilename();
- PlatformFilename(const PlatformFilename&);
- PlatformFilename(PlatformFilename&&);
- PlatformFilename& operator=(const PlatformFilename&);
- PlatformFilename& operator=(PlatformFilename&&);
- explicit PlatformFilename(const NativePathString& path);
- explicit PlatformFilename(const NativePathString::value_type* path);
-
- const NativePathString& ToNative() const;
- std::string ToString() const;
-
- PlatformFilename Parent() const;
-
- // These functions can fail for character encoding reasons.
- static Result<PlatformFilename> FromString(const std::string& file_name);
- Result<PlatformFilename> Join(const std::string& child_name) const;
-
- PlatformFilename Join(const PlatformFilename& child_name) const;
-
- bool operator==(const PlatformFilename& other) const;
- bool operator!=(const PlatformFilename& other) const;
-
- // Made public to avoid the proliferation of friend declarations.
- const Impl* impl() const { return impl_.get(); }
-
- private:
- std::unique_ptr<Impl> impl_;
-
- explicit PlatformFilename(Impl impl);
-};
-
-/// Create a directory if it doesn't exist.
-///
-/// Return whether the directory was created.
-ARROW_EXPORT
-Result<bool> CreateDir(const PlatformFilename& dir_path);
-
-/// Create a directory and its parents if it doesn't exist.
-///
-/// Return whether the directory was created.
-ARROW_EXPORT
-Result<bool> CreateDirTree(const PlatformFilename& dir_path);
-
-/// Delete a directory's contents (but not the directory itself) if it exists.
-///
-/// Return whether the directory existed.
-ARROW_EXPORT
-Result<bool> DeleteDirContents(const PlatformFilename& dir_path,
- bool allow_not_found = true);
-
-/// Delete a directory tree if it exists.
-///
-/// Return whether the directory existed.
-ARROW_EXPORT
-Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found = true);
-
-// Non-recursively list the contents of the given directory.
-// The returned names are the children's base names, not including dir_path.
-ARROW_EXPORT
-Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path);
-
-/// Delete a file if it exists.
-///
-/// Return whether the file existed.
-ARROW_EXPORT
-Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found = true);
-
-/// Return whether a file exists.
-ARROW_EXPORT
-Result<bool> FileExists(const PlatformFilename& path);
-
-/// Open a file for reading and return a file descriptor.
-ARROW_EXPORT
-Result<int> FileOpenReadable(const PlatformFilename& file_name);
-
-/// Open a file for writing and return a file descriptor.
-ARROW_EXPORT
-Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only = true,
- bool truncate = true, bool append = false);
-
-/// Read from current file position. Return number of bytes read.
-ARROW_EXPORT
-Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes);
-/// Read from given file position. Return number of bytes read.
-ARROW_EXPORT
-Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes);
-
-ARROW_EXPORT
-Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes);
-ARROW_EXPORT
-Status FileTruncate(int fd, const int64_t size);
-
-ARROW_EXPORT
-Status FileSeek(int fd, int64_t pos);
-ARROW_EXPORT
-Status FileSeek(int fd, int64_t pos, int whence);
-ARROW_EXPORT
-Result<int64_t> FileTell(int fd);
-ARROW_EXPORT
-Result<int64_t> FileGetSize(int fd);
-
-ARROW_EXPORT
-Status FileClose(int fd);
-
-struct Pipe {
- int rfd;
- int wfd;
-};
-
-ARROW_EXPORT
-Result<Pipe> CreatePipe();
-
-ARROW_EXPORT
-int64_t GetPageSize();
-
-struct MemoryRegion {
- void* addr;
- size_t size;
-};
-
-ARROW_EXPORT
-Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
- void** new_addr);
-ARROW_EXPORT
-Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions);
-
-ARROW_EXPORT
-Result<std::string> GetEnvVar(const char* name);
-ARROW_EXPORT
-Result<std::string> GetEnvVar(const std::string& name);
-ARROW_EXPORT
-Result<NativePathString> GetEnvVarNative(const char* name);
-ARROW_EXPORT
-Result<NativePathString> GetEnvVarNative(const std::string& name);
-
-ARROW_EXPORT
-Status SetEnvVar(const char* name, const char* value);
-ARROW_EXPORT
-Status SetEnvVar(const std::string& name, const std::string& value);
-ARROW_EXPORT
-Status DelEnvVar(const char* name);
-ARROW_EXPORT
-Status DelEnvVar(const std::string& name);
-
-ARROW_EXPORT
-std::string ErrnoMessage(int errnum);
-#if _WIN32
-ARROW_EXPORT
-std::string WinErrorMessage(int errnum);
-#endif
-
-ARROW_EXPORT
-std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum);
-#if _WIN32
-ARROW_EXPORT
-std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum);
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifndef _WIN32
+#define ARROW_HAVE_SIGACTION 1
+#endif
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#if ARROW_HAVE_SIGACTION
+#include <signal.h> // Needed for struct sigaction
+#endif
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace internal {
+
+// NOTE: 8-bit path strings on Windows are encoded using UTF-8.
+// Using MBCS would fail encoding some paths.
+
+#if defined(_WIN32)
+using NativePathString = std::wstring;
+#else
+using NativePathString = std::string;
+#endif
+
+class ARROW_EXPORT PlatformFilename {
+ public:
+ struct Impl;
+
+ ~PlatformFilename();
+ PlatformFilename();
+ PlatformFilename(const PlatformFilename&);
+ PlatformFilename(PlatformFilename&&);
+ PlatformFilename& operator=(const PlatformFilename&);
+ PlatformFilename& operator=(PlatformFilename&&);
+ explicit PlatformFilename(const NativePathString& path);
+ explicit PlatformFilename(const NativePathString::value_type* path);
+
+ const NativePathString& ToNative() const;
+ std::string ToString() const;
+
+ PlatformFilename Parent() const;
+
+ // These functions can fail for character encoding reasons.
+ static Result<PlatformFilename> FromString(const std::string& file_name);
+ Result<PlatformFilename> Join(const std::string& child_name) const;
+
+ PlatformFilename Join(const PlatformFilename& child_name) const;
+
+ bool operator==(const PlatformFilename& other) const;
+ bool operator!=(const PlatformFilename& other) const;
+
+ // Made public to avoid the proliferation of friend declarations.
+ const Impl* impl() const { return impl_.get(); }
+
+ private:
+ std::unique_ptr<Impl> impl_;
+
+ explicit PlatformFilename(Impl impl);
+};
+
+/// Create a directory if it doesn't exist.
+///
+/// Return whether the directory was created.
+ARROW_EXPORT
+Result<bool> CreateDir(const PlatformFilename& dir_path);
+
+/// Create a directory and its parents if it doesn't exist.
+///
+/// Return whether the directory was created.
+ARROW_EXPORT
+Result<bool> CreateDirTree(const PlatformFilename& dir_path);
+
+/// Delete a directory's contents (but not the directory itself) if it exists.
+///
+/// Return whether the directory existed.
+ARROW_EXPORT
+Result<bool> DeleteDirContents(const PlatformFilename& dir_path,
+ bool allow_not_found = true);
+
+/// Delete a directory tree if it exists.
+///
+/// Return whether the directory existed.
+ARROW_EXPORT
+Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found = true);
+
+// Non-recursively list the contents of the given directory.
+// The returned names are the children's base names, not including dir_path.
+ARROW_EXPORT
+Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path);
+
+/// Delete a file if it exists.
+///
+/// Return whether the file existed.
+ARROW_EXPORT
+Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found = true);
+
+/// Return whether a file exists.
+ARROW_EXPORT
+Result<bool> FileExists(const PlatformFilename& path);
+
+/// Open a file for reading and return a file descriptor.
+ARROW_EXPORT
+Result<int> FileOpenReadable(const PlatformFilename& file_name);
+
+/// Open a file for writing and return a file descriptor.
+ARROW_EXPORT
+Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only = true,
+ bool truncate = true, bool append = false);
+
+/// Read from current file position. Return number of bytes read.
+ARROW_EXPORT
+Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes);
+/// Read from given file position. Return number of bytes read.
+ARROW_EXPORT
+Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes);
+
+ARROW_EXPORT
+Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes);
+ARROW_EXPORT
+Status FileTruncate(int fd, const int64_t size);
+
+ARROW_EXPORT
+Status FileSeek(int fd, int64_t pos);
+ARROW_EXPORT
+Status FileSeek(int fd, int64_t pos, int whence);
+ARROW_EXPORT
+Result<int64_t> FileTell(int fd);
+ARROW_EXPORT
+Result<int64_t> FileGetSize(int fd);
+
+ARROW_EXPORT
+Status FileClose(int fd);
+
+struct Pipe {
+ int rfd;
+ int wfd;
+};
+
+ARROW_EXPORT
+Result<Pipe> CreatePipe();
+
+ARROW_EXPORT
+int64_t GetPageSize();
+
+struct MemoryRegion {
+ void* addr;
+ size_t size;
+};
+
+ARROW_EXPORT
+Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
+ void** new_addr);
+ARROW_EXPORT
+Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions);
+
+ARROW_EXPORT
+Result<std::string> GetEnvVar(const char* name);
+ARROW_EXPORT
+Result<std::string> GetEnvVar(const std::string& name);
+ARROW_EXPORT
+Result<NativePathString> GetEnvVarNative(const char* name);
+ARROW_EXPORT
+Result<NativePathString> GetEnvVarNative(const std::string& name);
+
+ARROW_EXPORT
+Status SetEnvVar(const char* name, const char* value);
+ARROW_EXPORT
+Status SetEnvVar(const std::string& name, const std::string& value);
+ARROW_EXPORT
+Status DelEnvVar(const char* name);
+ARROW_EXPORT
+Status DelEnvVar(const std::string& name);
+
+ARROW_EXPORT
+std::string ErrnoMessage(int errnum);
+#if _WIN32
+ARROW_EXPORT
+std::string WinErrorMessage(int errnum);
+#endif
+
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum);
+#if _WIN32
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum);
+#endif
ARROW_EXPORT
std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum);
-
-template <typename... Args>
-Status StatusFromErrno(int errnum, StatusCode code, Args&&... args) {
- return Status::FromDetailAndArgs(code, StatusDetailFromErrno(errnum),
- std::forward<Args>(args)...);
-}
-
-template <typename... Args>
-Status IOErrorFromErrno(int errnum, Args&&... args) {
- return StatusFromErrno(errnum, StatusCode::IOError, std::forward<Args>(args)...);
-}
-
-#if _WIN32
-template <typename... Args>
-Status StatusFromWinError(int errnum, StatusCode code, Args&&... args) {
- return Status::FromDetailAndArgs(code, StatusDetailFromWinError(errnum),
- std::forward<Args>(args)...);
-}
-
-template <typename... Args>
-Status IOErrorFromWinError(int errnum, Args&&... args) {
- return StatusFromWinError(errnum, StatusCode::IOError, std::forward<Args>(args)...);
-}
-#endif
-
+
+template <typename... Args>
+Status StatusFromErrno(int errnum, StatusCode code, Args&&... args) {
+ return Status::FromDetailAndArgs(code, StatusDetailFromErrno(errnum),
+ std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status IOErrorFromErrno(int errnum, Args&&... args) {
+ return StatusFromErrno(errnum, StatusCode::IOError, std::forward<Args>(args)...);
+}
+
+#if _WIN32
+template <typename... Args>
+Status StatusFromWinError(int errnum, StatusCode code, Args&&... args) {
+ return Status::FromDetailAndArgs(code, StatusDetailFromWinError(errnum),
+ std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status IOErrorFromWinError(int errnum, Args&&... args) {
+ return StatusFromWinError(errnum, StatusCode::IOError, std::forward<Args>(args)...);
+}
+#endif
+
template <typename... Args>
Status StatusFromSignal(int signum, StatusCode code, Args&&... args) {
return Status::FromDetailAndArgs(code, StatusDetailFromSignal(signum),
@@ -247,68 +247,68 @@ Status CancelledFromSignal(int signum, Args&&... args) {
return StatusFromSignal(signum, StatusCode::Cancelled, std::forward<Args>(args)...);
}
-ARROW_EXPORT
-int ErrnoFromStatus(const Status&);
-
-// Always returns 0 on non-Windows platforms (for Python).
-ARROW_EXPORT
-int WinErrorFromStatus(const Status&);
-
+ARROW_EXPORT
+int ErrnoFromStatus(const Status&);
+
+// Always returns 0 on non-Windows platforms (for Python).
+ARROW_EXPORT
+int WinErrorFromStatus(const Status&);
+
ARROW_EXPORT
int SignalFromStatus(const Status&);
-class ARROW_EXPORT TemporaryDir {
- public:
- ~TemporaryDir();
-
- /// '/'-terminated path to the temporary dir
- const PlatformFilename& path() { return path_; }
-
- /// Create a temporary subdirectory in the system temporary dir,
- /// named starting with `prefix`.
- static Result<std::unique_ptr<TemporaryDir>> Make(const std::string& prefix);
-
- private:
- PlatformFilename path_;
-
- explicit TemporaryDir(PlatformFilename&&);
-};
-
-class ARROW_EXPORT SignalHandler {
- public:
- typedef void (*Callback)(int);
-
- SignalHandler();
- explicit SignalHandler(Callback cb);
-#if ARROW_HAVE_SIGACTION
- explicit SignalHandler(const struct sigaction& sa);
-#endif
-
- Callback callback() const;
-#if ARROW_HAVE_SIGACTION
- const struct sigaction& action() const;
-#endif
-
- protected:
-#if ARROW_HAVE_SIGACTION
- // Storing the full sigaction allows to restore the entire signal handling
- // configuration.
- struct sigaction sa_;
-#else
- Callback cb_;
-#endif
-};
-
-/// \brief Return the current handler for the given signal number.
-ARROW_EXPORT
-Result<SignalHandler> GetSignalHandler(int signum);
-
-/// \brief Set a new handler for the given signal number.
-///
-/// The old signal handler is returned.
-ARROW_EXPORT
-Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler);
-
+class ARROW_EXPORT TemporaryDir {
+ public:
+ ~TemporaryDir();
+
+ /// '/'-terminated path to the temporary dir
+ const PlatformFilename& path() { return path_; }
+
+ /// Create a temporary subdirectory in the system temporary dir,
+ /// named starting with `prefix`.
+ static Result<std::unique_ptr<TemporaryDir>> Make(const std::string& prefix);
+
+ private:
+ PlatformFilename path_;
+
+ explicit TemporaryDir(PlatformFilename&&);
+};
+
+class ARROW_EXPORT SignalHandler {
+ public:
+ typedef void (*Callback)(int);
+
+ SignalHandler();
+ explicit SignalHandler(Callback cb);
+#if ARROW_HAVE_SIGACTION
+ explicit SignalHandler(const struct sigaction& sa);
+#endif
+
+ Callback callback() const;
+#if ARROW_HAVE_SIGACTION
+ const struct sigaction& action() const;
+#endif
+
+ protected:
+#if ARROW_HAVE_SIGACTION
+ // Storing the full sigaction allows to restore the entire signal handling
+ // configuration.
+ struct sigaction sa_;
+#else
+ Callback cb_;
+#endif
+};
+
+/// \brief Return the current handler for the given signal number.
+ARROW_EXPORT
+Result<SignalHandler> GetSignalHandler(int signum);
+
+/// \brief Set a new handler for the given signal number.
+///
+/// The old signal handler is returned.
+ARROW_EXPORT
+Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler);
+
/// \brief Reinstate the signal handler
///
/// For use in signal handlers. This is needed on platforms without sigaction()
@@ -329,15 +329,15 @@ Status SendSignal(int signum);
ARROW_EXPORT
Status SendSignalToThread(int signum, uint64_t thread_id);
-/// \brief Get an unpredictable random seed
-///
-/// This function may be slightly costly, so should only be used to initialize
-/// a PRNG, not to generate a large amount of random numbers.
-/// It is better to use this function rather than std::random_device, unless
-/// absolutely necessary (e.g. to generate a cryptographic secret).
-ARROW_EXPORT
-int64_t GetRandomSeed();
-
+/// \brief Get an unpredictable random seed
+///
+/// This function may be slightly costly, so should only be used to initialize
+/// a PRNG, not to generate a large amount of random numbers.
+/// It is better to use this function rather than std::random_device, unless
+/// absolutely necessary (e.g. to generate a cryptographic secret).
+ARROW_EXPORT
+int64_t GetRandomSeed();
+
/// \brief Get the current thread id
///
/// In addition to having the same properties as std::thread, the returned value
@@ -345,5 +345,5 @@ int64_t GetRandomSeed();
ARROW_EXPORT
uint64_t GetThreadId();
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
index 2f42803d26f..ee1af85b5e8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
@@ -1,61 +1,61 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cassert>
-#include <functional>
-#include <memory>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/compare.h"
-#include "arrow/util/functional.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/optional.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-template <typename T>
-class Iterator;
-
-template <typename T>
-struct IterationTraits {
- /// \brief a reserved value which indicates the end of iteration. By
- /// default this is NULLPTR since most iterators yield pointer types.
- /// Specialize IterationTraits if different end semantics are required.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+template <typename T>
+class Iterator;
+
+template <typename T>
+struct IterationTraits {
+ /// \brief a reserved value which indicates the end of iteration. By
+ /// default this is NULLPTR since most iterators yield pointer types.
+ /// Specialize IterationTraits if different end semantics are required.
///
/// Note: This should not be used to determine if a given value is a
/// terminal value. Use IsIterationEnd (which uses IsEnd) instead. This
/// is only for returning terminal values.
- static T End() { return T(NULLPTR); }
+ static T End() { return T(NULLPTR); }
/// \brief Checks to see if the value is a terminal value.
/// A method is used here since T is not neccesarily comparable in many
/// cases even though it has a distinct final value
static bool IsEnd(const T& val) { return val == End(); }
-};
-
-template <typename T>
+};
+
+template <typename T>
T IterationEnd() {
return IterationTraits<T>::End();
}
@@ -66,150 +66,150 @@ bool IsIterationEnd(const T& val) {
}
template <typename T>
-struct IterationTraits<util::optional<T>> {
- /// \brief by default when iterating through a sequence of optional,
- /// nullopt indicates the end of iteration.
- /// Specialize IterationTraits if different end semantics are required.
- static util::optional<T> End() { return util::nullopt; }
-
+struct IterationTraits<util::optional<T>> {
+ /// \brief by default when iterating through a sequence of optional,
+ /// nullopt indicates the end of iteration.
+ /// Specialize IterationTraits if different end semantics are required.
+ static util::optional<T> End() { return util::nullopt; }
+
/// \brief by default when iterating through a sequence of optional,
/// nullopt (!has_value()) indicates the end of iteration.
/// Specialize IterationTraits if different end semantics are required.
static bool IsEnd(const util::optional<T>& val) { return !val.has_value(); }
- // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
- // Result<optional<T>> which is unnecessary (since only the unyielded end optional
- // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
-};
-
-/// \brief A generic Iterator that can return errors
-template <typename T>
-class Iterator : public util::EqualityComparable<Iterator<T>> {
- public:
- /// \brief Iterator may be constructed from any type which has a member function
+ // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
+ // Result<optional<T>> which is unnecessary (since only the unyielded end optional
+ // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
+};
+
+/// \brief A generic Iterator that can return errors
+template <typename T>
+class Iterator : public util::EqualityComparable<Iterator<T>> {
+ public:
+ /// \brief Iterator may be constructed from any type which has a member function
/// with signature Result<T> Next();
/// End of iterator is signalled by returning IteratorTraits<T>::End();
- ///
- /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
- /// its destructor and its Next method (which are stored in function pointers) are
- /// referenced after construction.
- ///
- /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using
- /// an abstract template base class: instead of being inlined as usual for a template
- /// function the base's virtual destructor will be exported, leading to multiple
- /// definition errors when linking to any other TU where the base is instantiated.
- template <typename Wrapped>
- explicit Iterator(Wrapped has_next)
- : ptr_(new Wrapped(std::move(has_next)), Delete<Wrapped>), next_(Next<Wrapped>) {}
-
- Iterator() : ptr_(NULLPTR, [](void*) {}) {}
-
- /// \brief Return the next element of the sequence, IterationTraits<T>::End() when the
- /// iteration is completed. Calling this on a default constructed Iterator
- /// will result in undefined behavior.
- Result<T> Next() { return next_(ptr_.get()); }
-
- /// Pass each element of the sequence to a visitor. Will return any error status
- /// returned by the visitor, terminating iteration.
- template <typename Visitor>
- Status Visit(Visitor&& visitor) {
- for (;;) {
- ARROW_ASSIGN_OR_RAISE(auto value, Next());
-
+ ///
+ /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
+ /// its destructor and its Next method (which are stored in function pointers) are
+ /// referenced after construction.
+ ///
+ /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using
+ /// an abstract template base class: instead of being inlined as usual for a template
+ /// function the base's virtual destructor will be exported, leading to multiple
+ /// definition errors when linking to any other TU where the base is instantiated.
+ template <typename Wrapped>
+ explicit Iterator(Wrapped has_next)
+ : ptr_(new Wrapped(std::move(has_next)), Delete<Wrapped>), next_(Next<Wrapped>) {}
+
+ Iterator() : ptr_(NULLPTR, [](void*) {}) {}
+
+ /// \brief Return the next element of the sequence, IterationTraits<T>::End() when the
+ /// iteration is completed. Calling this on a default constructed Iterator
+ /// will result in undefined behavior.
+ Result<T> Next() { return next_(ptr_.get()); }
+
+ /// Pass each element of the sequence to a visitor. Will return any error status
+ /// returned by the visitor, terminating iteration.
+ template <typename Visitor>
+ Status Visit(Visitor&& visitor) {
+ for (;;) {
+ ARROW_ASSIGN_OR_RAISE(auto value, Next());
+
if (IsIterationEnd(value)) break;
-
- ARROW_RETURN_NOT_OK(visitor(std::move(value)));
- }
-
- return Status::OK();
- }
-
- /// Iterators will only compare equal if they are both null.
- /// Equality comparability is required to make an Iterator of Iterators
- /// (to check for the end condition).
- bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; }
-
- explicit operator bool() const { return ptr_ != NULLPTR; }
-
- class RangeIterator {
- public:
- RangeIterator() : value_(IterationTraits<T>::End()) {}
-
- explicit RangeIterator(Iterator i)
- : value_(IterationTraits<T>::End()),
- iterator_(std::make_shared<Iterator>(std::move(i))) {
- Next();
- }
-
- bool operator!=(const RangeIterator& other) const { return value_ != other.value_; }
-
- RangeIterator& operator++() {
- Next();
- return *this;
- }
-
- Result<T> operator*() {
- ARROW_RETURN_NOT_OK(value_.status());
-
- auto value = std::move(value_);
- value_ = IterationTraits<T>::End();
- return value;
- }
-
- private:
- void Next() {
- if (!value_.ok()) {
- value_ = IterationTraits<T>::End();
- return;
- }
- value_ = iterator_->Next();
- }
-
- Result<T> value_;
- std::shared_ptr<Iterator> iterator_;
- };
-
- RangeIterator begin() { return RangeIterator(std::move(*this)); }
-
- RangeIterator end() { return RangeIterator(); }
-
- /// \brief Move every element of this iterator into a vector.
- Result<std::vector<T>> ToVector() {
- std::vector<T> out;
- for (auto maybe_element : *this) {
- ARROW_ASSIGN_OR_RAISE(auto element, maybe_element);
- out.push_back(std::move(element));
- }
- // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the
- // copy constructor, which may be deleted on the elements of type T
- return std::move(out);
- }
-
- private:
- /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and
- /// deletes that.
- template <typename HasNext>
- static void Delete(void* ptr) {
- delete static_cast<HasNext*>(ptr);
- }
-
- /// Implementation of Next: Casts from void* to the wrapped type and invokes that
- /// type's Next member function.
- template <typename HasNext>
- static Result<T> Next(void* ptr) {
- return static_cast<HasNext*>(ptr)->Next();
- }
-
- /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first
- /// casts from void* to a pointer to the wrapped type then deletes that.
- std::unique_ptr<void, void (*)(void*)> ptr_;
-
- /// next_ is a function pointer which first casts from void* to a pointer to the wrapped
- /// type then invokes its Next member function.
- Result<T> (*next_)(void*) = NULLPTR;
-};
-
-template <typename T>
+
+ ARROW_RETURN_NOT_OK(visitor(std::move(value)));
+ }
+
+ return Status::OK();
+ }
+
+ /// Iterators will only compare equal if they are both null.
+ /// Equality comparability is required to make an Iterator of Iterators
+ /// (to check for the end condition).
+ bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; }
+
+ explicit operator bool() const { return ptr_ != NULLPTR; }
+
+ class RangeIterator {
+ public:
+ RangeIterator() : value_(IterationTraits<T>::End()) {}
+
+ explicit RangeIterator(Iterator i)
+ : value_(IterationTraits<T>::End()),
+ iterator_(std::make_shared<Iterator>(std::move(i))) {
+ Next();
+ }
+
+ bool operator!=(const RangeIterator& other) const { return value_ != other.value_; }
+
+ RangeIterator& operator++() {
+ Next();
+ return *this;
+ }
+
+ Result<T> operator*() {
+ ARROW_RETURN_NOT_OK(value_.status());
+
+ auto value = std::move(value_);
+ value_ = IterationTraits<T>::End();
+ return value;
+ }
+
+ private:
+ void Next() {
+ if (!value_.ok()) {
+ value_ = IterationTraits<T>::End();
+ return;
+ }
+ value_ = iterator_->Next();
+ }
+
+ Result<T> value_;
+ std::shared_ptr<Iterator> iterator_;
+ };
+
+ RangeIterator begin() { return RangeIterator(std::move(*this)); }
+
+ RangeIterator end() { return RangeIterator(); }
+
+ /// \brief Move every element of this iterator into a vector.
+ Result<std::vector<T>> ToVector() {
+ std::vector<T> out;
+ for (auto maybe_element : *this) {
+ ARROW_ASSIGN_OR_RAISE(auto element, maybe_element);
+ out.push_back(std::move(element));
+ }
+ // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the
+ // copy constructor, which may be deleted on the elements of type T
+ return std::move(out);
+ }
+
+ private:
+ /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and
+ /// deletes that.
+ template <typename HasNext>
+ static void Delete(void* ptr) {
+ delete static_cast<HasNext*>(ptr);
+ }
+
+ /// Implementation of Next: Casts from void* to the wrapped type and invokes that
+ /// type's Next member function.
+ template <typename HasNext>
+ static Result<T> Next(void* ptr) {
+ return static_cast<HasNext*>(ptr)->Next();
+ }
+
+ /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first
+ /// casts from void* to a pointer to the wrapped type then deletes that.
+ std::unique_ptr<void, void (*)(void*)> ptr_;
+
+ /// next_ is a function pointer which first casts from void* to a pointer to the wrapped
+ /// type then invokes its Next member function.
+ Result<T> (*next_)(void*) = NULLPTR;
+};
+
+template <typename T>
struct TransformFlow {
using YieldValueType = T;
@@ -331,238 +331,238 @@ Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) {
}
template <typename T>
-struct IterationTraits<Iterator<T>> {
- // The end condition for an Iterator of Iterators is a default constructed (null)
- // Iterator.
- static Iterator<T> End() { return Iterator<T>(); }
+struct IterationTraits<Iterator<T>> {
+ // The end condition for an Iterator of Iterators is a default constructed (null)
+ // Iterator.
+ static Iterator<T> End() { return Iterator<T>(); }
static bool IsEnd(const Iterator<T>& val) { return !val; }
-};
-
-template <typename Fn, typename T>
-class FunctionIterator {
- public:
- explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {}
-
- Result<T> Next() { return fn_(); }
-
- private:
- Fn fn_;
-};
-
-/// \brief Construct an Iterator which invokes a callable on Next()
-template <typename Fn,
- typename Ret = typename internal::call_traits::return_type<Fn>::ValueType>
-Iterator<Ret> MakeFunctionIterator(Fn fn) {
- return Iterator<Ret>(FunctionIterator<Fn, Ret>(std::move(fn)));
-}
-
-template <typename T>
-Iterator<T> MakeEmptyIterator() {
- return MakeFunctionIterator([]() -> Result<T> { return IterationTraits<T>::End(); });
-}
-
-template <typename T>
-Iterator<T> MakeErrorIterator(Status s) {
- return MakeFunctionIterator([s]() -> Result<T> {
- ARROW_RETURN_NOT_OK(s);
- return IterationTraits<T>::End();
- });
-}
-
-/// \brief Simple iterator which yields the elements of a std::vector
-template <typename T>
-class VectorIterator {
- public:
- explicit VectorIterator(std::vector<T> v) : elements_(std::move(v)) {}
-
- Result<T> Next() {
- if (i_ == elements_.size()) {
- return IterationTraits<T>::End();
- }
- return std::move(elements_[i_++]);
- }
-
- private:
- std::vector<T> elements_;
- size_t i_ = 0;
-};
-
-template <typename T>
-Iterator<T> MakeVectorIterator(std::vector<T> v) {
- return Iterator<T>(VectorIterator<T>(std::move(v)));
-}
-
-/// \brief Simple iterator which yields *pointers* to the elements of a std::vector<T>.
-/// This is provided to support T where IterationTraits<T>::End is not specialized
-template <typename T>
-class VectorPointingIterator {
- public:
- explicit VectorPointingIterator(std::vector<T> v) : elements_(std::move(v)) {}
-
- Result<T*> Next() {
- if (i_ == elements_.size()) {
- return NULLPTR;
- }
- return &elements_[i_++];
- }
-
- private:
- std::vector<T> elements_;
- size_t i_ = 0;
-};
-
-template <typename T>
-Iterator<T*> MakeVectorPointingIterator(std::vector<T> v) {
- return Iterator<T*>(VectorPointingIterator<T>(std::move(v)));
-}
-
-/// \brief MapIterator takes ownership of an iterator and a function to apply
-/// on every element. The mapped function is not allowed to fail.
-template <typename Fn, typename I, typename O>
-class MapIterator {
- public:
- explicit MapIterator(Fn map, Iterator<I> it)
- : map_(std::move(map)), it_(std::move(it)) {}
-
- Result<O> Next() {
- ARROW_ASSIGN_OR_RAISE(I i, it_.Next());
-
+};
+
+template <typename Fn, typename T>
+class FunctionIterator {
+ public:
+ explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {}
+
+ Result<T> Next() { return fn_(); }
+
+ private:
+ Fn fn_;
+};
+
+/// \brief Construct an Iterator which invokes a callable on Next()
+template <typename Fn,
+ typename Ret = typename internal::call_traits::return_type<Fn>::ValueType>
+Iterator<Ret> MakeFunctionIterator(Fn fn) {
+ return Iterator<Ret>(FunctionIterator<Fn, Ret>(std::move(fn)));
+}
+
+template <typename T>
+Iterator<T> MakeEmptyIterator() {
+ return MakeFunctionIterator([]() -> Result<T> { return IterationTraits<T>::End(); });
+}
+
+template <typename T>
+Iterator<T> MakeErrorIterator(Status s) {
+ return MakeFunctionIterator([s]() -> Result<T> {
+ ARROW_RETURN_NOT_OK(s);
+ return IterationTraits<T>::End();
+ });
+}
+
+/// \brief Simple iterator which yields the elements of a std::vector
+template <typename T>
+class VectorIterator {
+ public:
+ explicit VectorIterator(std::vector<T> v) : elements_(std::move(v)) {}
+
+ Result<T> Next() {
+ if (i_ == elements_.size()) {
+ return IterationTraits<T>::End();
+ }
+ return std::move(elements_[i_++]);
+ }
+
+ private:
+ std::vector<T> elements_;
+ size_t i_ = 0;
+};
+
+template <typename T>
+Iterator<T> MakeVectorIterator(std::vector<T> v) {
+ return Iterator<T>(VectorIterator<T>(std::move(v)));
+}
+
+/// \brief Simple iterator which yields *pointers* to the elements of a std::vector<T>.
+/// This is provided to support T where IterationTraits<T>::End is not specialized
+template <typename T>
+class VectorPointingIterator {
+ public:
+ explicit VectorPointingIterator(std::vector<T> v) : elements_(std::move(v)) {}
+
+ Result<T*> Next() {
+ if (i_ == elements_.size()) {
+ return NULLPTR;
+ }
+ return &elements_[i_++];
+ }
+
+ private:
+ std::vector<T> elements_;
+ size_t i_ = 0;
+};
+
+template <typename T>
+Iterator<T*> MakeVectorPointingIterator(std::vector<T> v) {
+ return Iterator<T*>(VectorPointingIterator<T>(std::move(v)));
+}
+
+/// \brief MapIterator takes ownership of an iterator and a function to apply
+/// on every element. The mapped function is not allowed to fail.
+template <typename Fn, typename I, typename O>
+class MapIterator {
+ public:
+ explicit MapIterator(Fn map, Iterator<I> it)
+ : map_(std::move(map)), it_(std::move(it)) {}
+
+ Result<O> Next() {
+ ARROW_ASSIGN_OR_RAISE(I i, it_.Next());
+
if (IsIterationEnd(i)) {
- return IterationTraits<O>::End();
- }
-
- return map_(std::move(i));
- }
-
- private:
- Fn map_;
- Iterator<I> it_;
-};
-
-/// \brief MapIterator takes ownership of an iterator and a function to apply
-/// on every element. The mapped function is not allowed to fail.
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
- typename To = internal::call_traits::return_type<Fn>>
-Iterator<To> MakeMapIterator(Fn map, Iterator<From> it) {
- return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
-}
-
-/// \brief Like MapIterator, but where the function can fail.
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
- typename To = typename internal::call_traits::return_type<Fn>::ValueType>
-Iterator<To> MakeMaybeMapIterator(Fn map, Iterator<From> it) {
- return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
-}
-
-struct FilterIterator {
- enum Action { ACCEPT, REJECT };
-
- template <typename To>
- static Result<std::pair<To, Action>> Reject() {
- return std::make_pair(IterationTraits<To>::End(), REJECT);
- }
-
- template <typename To>
- static Result<std::pair<To, Action>> Accept(To out) {
- return std::make_pair(std::move(out), ACCEPT);
- }
-
- template <typename To>
- static Result<std::pair<To, Action>> MaybeAccept(Result<To> maybe_out) {
- return std::move(maybe_out).Map(Accept<To>);
- }
-
- template <typename To>
- static Result<std::pair<To, Action>> Error(Status s) {
- return s;
- }
-
- template <typename Fn, typename From, typename To>
- class Impl {
- public:
- explicit Impl(Fn filter, Iterator<From> it) : filter_(filter), it_(std::move(it)) {}
-
- Result<To> Next() {
- To out = IterationTraits<To>::End();
- Action action;
-
- for (;;) {
- ARROW_ASSIGN_OR_RAISE(From i, it_.Next());
-
+ return IterationTraits<O>::End();
+ }
+
+ return map_(std::move(i));
+ }
+
+ private:
+ Fn map_;
+ Iterator<I> it_;
+};
+
+/// \brief MapIterator takes ownership of an iterator and a function to apply
+/// on every element. The mapped function is not allowed to fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+ typename To = internal::call_traits::return_type<Fn>>
+Iterator<To> MakeMapIterator(Fn map, Iterator<From> it) {
+ return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
+}
+
+/// \brief Like MapIterator, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+ typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Iterator<To> MakeMaybeMapIterator(Fn map, Iterator<From> it) {
+ return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
+}
+
+struct FilterIterator {
+ enum Action { ACCEPT, REJECT };
+
+ template <typename To>
+ static Result<std::pair<To, Action>> Reject() {
+ return std::make_pair(IterationTraits<To>::End(), REJECT);
+ }
+
+ template <typename To>
+ static Result<std::pair<To, Action>> Accept(To out) {
+ return std::make_pair(std::move(out), ACCEPT);
+ }
+
+ template <typename To>
+ static Result<std::pair<To, Action>> MaybeAccept(Result<To> maybe_out) {
+ return std::move(maybe_out).Map(Accept<To>);
+ }
+
+ template <typename To>
+ static Result<std::pair<To, Action>> Error(Status s) {
+ return s;
+ }
+
+ template <typename Fn, typename From, typename To>
+ class Impl {
+ public:
+ explicit Impl(Fn filter, Iterator<From> it) : filter_(filter), it_(std::move(it)) {}
+
+ Result<To> Next() {
+ To out = IterationTraits<To>::End();
+ Action action;
+
+ for (;;) {
+ ARROW_ASSIGN_OR_RAISE(From i, it_.Next());
+
if (IsIterationEnd(i)) {
- return IterationTraits<To>::End();
- }
-
- ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i)));
-
- if (action == ACCEPT) return out;
- }
- }
-
- private:
- Fn filter_;
- Iterator<From> it_;
- };
-};
-
-/// \brief Like MapIterator, but where the function can fail or reject elements.
-template <
- typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>,
- typename Ret = typename internal::call_traits::return_type<Fn>::ValueType,
- typename To = typename std::tuple_element<0, Ret>::type,
- typename Enable = typename std::enable_if<std::is_same<
- typename std::tuple_element<1, Ret>::type, FilterIterator::Action>::value>::type>
-Iterator<To> MakeFilterIterator(Fn filter, Iterator<From> it) {
- return Iterator<To>(
- FilterIterator::Impl<Fn, From, To>(std::move(filter), std::move(it)));
-}
-
-/// \brief FlattenIterator takes an iterator generating iterators and yields a
-/// unified iterator that flattens/concatenates in a single stream.
-template <typename T>
-class FlattenIterator {
- public:
- explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}
-
- Result<T> Next() {
+ return IterationTraits<To>::End();
+ }
+
+ ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i)));
+
+ if (action == ACCEPT) return out;
+ }
+ }
+
+ private:
+ Fn filter_;
+ Iterator<From> it_;
+ };
+};
+
+/// \brief Like MapIterator, but where the function can fail or reject elements.
+template <
+ typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>,
+ typename Ret = typename internal::call_traits::return_type<Fn>::ValueType,
+ typename To = typename std::tuple_element<0, Ret>::type,
+ typename Enable = typename std::enable_if<std::is_same<
+ typename std::tuple_element<1, Ret>::type, FilterIterator::Action>::value>::type>
+Iterator<To> MakeFilterIterator(Fn filter, Iterator<From> it) {
+ return Iterator<To>(
+ FilterIterator::Impl<Fn, From, To>(std::move(filter), std::move(it)));
+}
+
+/// \brief FlattenIterator takes an iterator generating iterators and yields a
+/// unified iterator that flattens/concatenates in a single stream.
+template <typename T>
+class FlattenIterator {
+ public:
+ explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}
+
+ Result<T> Next() {
if (IsIterationEnd(child_)) {
- // Pop from parent's iterator.
- ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());
-
- // Check if final iteration reached.
+ // Pop from parent's iterator.
+ ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());
+
+ // Check if final iteration reached.
if (IsIterationEnd(child_)) {
- return IterationTraits<T>::End();
- }
-
- return Next();
- }
-
- // Pop from child_ and check for depletion.
- ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
+ return IterationTraits<T>::End();
+ }
+
+ return Next();
+ }
+
+ // Pop from child_ and check for depletion.
+ ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
if (IsIterationEnd(out)) {
- // Reset state such that we pop from parent on the recursive call
- child_ = IterationTraits<Iterator<T>>::End();
-
- return Next();
- }
-
- return out;
- }
-
- private:
- Iterator<Iterator<T>> parent_;
- Iterator<T> child_ = IterationTraits<Iterator<T>>::End();
-};
-
-template <typename T>
-Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
- return Iterator<T>(FlattenIterator<T>(std::move(it)));
-}
-
+ // Reset state such that we pop from parent on the recursive call
+ child_ = IterationTraits<Iterator<T>>::End();
+
+ return Next();
+ }
+
+ return out;
+ }
+
+ private:
+ Iterator<Iterator<T>> parent_;
+ Iterator<T> child_ = IterationTraits<Iterator<T>>::End();
+};
+
+template <typename T>
+Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
+ return Iterator<T>(FlattenIterator<T>(std::move(it)));
+}
+
template <typename Reader>
Iterator<typename Reader::ValueType> MakeIteratorFromReader(
const std::shared_ptr<Reader>& reader) {
return MakeFunctionIterator([reader] { return reader->Next(); });
-}
-
-} // namespace arrow
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
index ad3b686a9bd..d976c5d6623 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
@@ -1,274 +1,274 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/key_value_metadata.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/sort.h"
-
-using std::size_t;
-
-namespace arrow {
-
-static std::vector<std::string> UnorderedMapKeys(
- const std::unordered_map<std::string, std::string>& map) {
- std::vector<std::string> keys;
- keys.reserve(map.size());
- for (const auto& pair : map) {
- keys.push_back(pair.first);
- }
- return keys;
-}
-
-static std::vector<std::string> UnorderedMapValues(
- const std::unordered_map<std::string, std::string>& map) {
- std::vector<std::string> values;
- values.reserve(map.size());
- for (const auto& pair : map) {
- values.push_back(pair.second);
- }
- return values;
-}
-
-KeyValueMetadata::KeyValueMetadata() : keys_(), values_() {}
-
-KeyValueMetadata::KeyValueMetadata(
- const std::unordered_map<std::string, std::string>& map)
- : keys_(UnorderedMapKeys(map)), values_(UnorderedMapValues(map)) {
- ARROW_CHECK_EQ(keys_.size(), values_.size());
-}
-
-KeyValueMetadata::KeyValueMetadata(std::vector<std::string> keys,
- std::vector<std::string> values)
- : keys_(std::move(keys)), values_(std::move(values)) {
- ARROW_CHECK_EQ(keys.size(), values.size());
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/sort.h"
+
+using std::size_t;
+
+namespace arrow {
+
+static std::vector<std::string> UnorderedMapKeys(
+ const std::unordered_map<std::string, std::string>& map) {
+ std::vector<std::string> keys;
+ keys.reserve(map.size());
+ for (const auto& pair : map) {
+ keys.push_back(pair.first);
+ }
+ return keys;
+}
+
+static std::vector<std::string> UnorderedMapValues(
+ const std::unordered_map<std::string, std::string>& map) {
+ std::vector<std::string> values;
+ values.reserve(map.size());
+ for (const auto& pair : map) {
+ values.push_back(pair.second);
+ }
+ return values;
+}
+
+KeyValueMetadata::KeyValueMetadata() : keys_(), values_() {}
+
+KeyValueMetadata::KeyValueMetadata(
+ const std::unordered_map<std::string, std::string>& map)
+ : keys_(UnorderedMapKeys(map)), values_(UnorderedMapValues(map)) {
+ ARROW_CHECK_EQ(keys_.size(), values_.size());
+}
+
+KeyValueMetadata::KeyValueMetadata(std::vector<std::string> keys,
+ std::vector<std::string> values)
+ : keys_(std::move(keys)), values_(std::move(values)) {
+ ARROW_CHECK_EQ(keys.size(), values.size());
+}
+
std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Make(
std::vector<std::string> keys, std::vector<std::string> values) {
return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
}
-void KeyValueMetadata::ToUnorderedMap(
- std::unordered_map<std::string, std::string>* out) const {
- DCHECK_NE(out, nullptr);
- const int64_t n = size();
- out->reserve(n);
- for (int64_t i = 0; i < n; ++i) {
- out->insert(std::make_pair(key(i), value(i)));
- }
-}
-
-void KeyValueMetadata::Append(const std::string& key, const std::string& value) {
- keys_.push_back(key);
- values_.push_back(value);
-}
-
-Result<std::string> KeyValueMetadata::Get(const std::string& key) const {
- auto index = FindKey(key);
- if (index < 0) {
- return Status::KeyError(key);
- } else {
- return value(index);
- }
-}
-
-Status KeyValueMetadata::Delete(int64_t index) {
- keys_.erase(keys_.begin() + index);
- values_.erase(values_.begin() + index);
- return Status::OK();
-}
-
-Status KeyValueMetadata::DeleteMany(std::vector<int64_t> indices) {
- std::sort(indices.begin(), indices.end());
- const int64_t size = static_cast<int64_t>(keys_.size());
- indices.push_back(size);
-
- int64_t shift = 0;
- for (int64_t i = 0; i < static_cast<int64_t>(indices.size() - 1); ++i) {
- ++shift;
- const auto start = indices[i] + 1;
- const auto stop = indices[i + 1];
- DCHECK_GE(start, 0);
- DCHECK_LE(start, size);
- DCHECK_GE(stop, 0);
- DCHECK_LE(stop, size);
- for (int64_t index = start; index < stop; ++index) {
- keys_[index - shift] = std::move(keys_[index]);
- values_[index - shift] = std::move(values_[index]);
- }
- }
- keys_.resize(size - shift);
- values_.resize(size - shift);
- return Status::OK();
-}
-
-Status KeyValueMetadata::Delete(const std::string& key) {
- auto index = FindKey(key);
- if (index < 0) {
- return Status::KeyError(key);
- } else {
- return Delete(index);
- }
-}
-
-Status KeyValueMetadata::Set(const std::string& key, const std::string& value) {
- auto index = FindKey(key);
- if (index < 0) {
- Append(key, value);
- } else {
- keys_[index] = key;
- values_[index] = value;
- }
- return Status::OK();
-}
-
-bool KeyValueMetadata::Contains(const std::string& key) const {
- return FindKey(key) >= 0;
-}
-
-void KeyValueMetadata::reserve(int64_t n) {
- DCHECK_GE(n, 0);
- const auto m = static_cast<size_t>(n);
- keys_.reserve(m);
- values_.reserve(m);
-}
-
-int64_t KeyValueMetadata::size() const {
- DCHECK_EQ(keys_.size(), values_.size());
- return static_cast<int64_t>(keys_.size());
-}
-
-const std::string& KeyValueMetadata::key(int64_t i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(static_cast<size_t>(i), keys_.size());
- return keys_[i];
-}
-
-const std::string& KeyValueMetadata::value(int64_t i) const {
- DCHECK_GE(i, 0);
- DCHECK_LT(static_cast<size_t>(i), values_.size());
- return values_[i];
-}
-
-std::vector<std::pair<std::string, std::string>> KeyValueMetadata::sorted_pairs() const {
- std::vector<std::pair<std::string, std::string>> pairs;
- pairs.reserve(size());
-
- auto indices = internal::ArgSort(keys_);
- for (const auto i : indices) {
- pairs.emplace_back(keys_[i], values_[i]);
- }
- return pairs;
-}
-
-int KeyValueMetadata::FindKey(const std::string& key) const {
- for (size_t i = 0; i < keys_.size(); ++i) {
- if (keys_[i] == key) {
- return static_cast<int>(i);
- }
- }
- return -1;
-}
-
-std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Copy() const {
- return std::make_shared<KeyValueMetadata>(keys_, values_);
-}
-
-std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Merge(
- const KeyValueMetadata& other) const {
- std::unordered_set<std::string> observed_keys;
- std::vector<std::string> result_keys;
- std::vector<std::string> result_values;
-
- result_keys.reserve(keys_.size());
- result_values.reserve(keys_.size());
-
- for (int64_t i = 0; i < other.size(); ++i) {
- const auto& key = other.key(i);
- auto it = observed_keys.find(key);
- if (it == observed_keys.end()) {
- result_keys.push_back(key);
- result_values.push_back(other.value(i));
- observed_keys.insert(key);
- }
- }
- for (size_t i = 0; i < keys_.size(); ++i) {
- auto it = observed_keys.find(keys_[i]);
- if (it == observed_keys.end()) {
- result_keys.push_back(keys_[i]);
- result_values.push_back(values_[i]);
- observed_keys.insert(keys_[i]);
- }
- }
-
- return std::make_shared<KeyValueMetadata>(std::move(result_keys),
- std::move(result_values));
-}
-
-bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
- if (size() != other.size()) {
- return false;
- }
-
- auto indices = internal::ArgSort(keys_);
- auto other_indices = internal::ArgSort(other.keys_);
-
- for (int64_t i = 0; i < size(); ++i) {
- auto j = indices[i];
- auto k = other_indices[i];
- if (keys_[j] != other.keys_[k] || values_[j] != other.values_[k]) {
- return false;
- }
- }
- return true;
-}
-
-std::string KeyValueMetadata::ToString() const {
- std::stringstream buffer;
-
- buffer << "\n-- metadata --";
- for (int64_t i = 0; i < size(); ++i) {
- buffer << "\n" << keys_[i] << ": " << values_[i];
- }
-
- return buffer.str();
-}
-
-std::shared_ptr<KeyValueMetadata> key_value_metadata(
- const std::unordered_map<std::string, std::string>& pairs) {
- return std::make_shared<KeyValueMetadata>(pairs);
-}
-
-std::shared_ptr<KeyValueMetadata> key_value_metadata(std::vector<std::string> keys,
- std::vector<std::string> values) {
- return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
-}
-
-} // namespace arrow
+void KeyValueMetadata::ToUnorderedMap(
+ std::unordered_map<std::string, std::string>* out) const {
+ DCHECK_NE(out, nullptr);
+ const int64_t n = size();
+ out->reserve(n);
+ for (int64_t i = 0; i < n; ++i) {
+ out->insert(std::make_pair(key(i), value(i)));
+ }
+}
+
+void KeyValueMetadata::Append(const std::string& key, const std::string& value) {
+ keys_.push_back(key);
+ values_.push_back(value);
+}
+
+Result<std::string> KeyValueMetadata::Get(const std::string& key) const {
+ auto index = FindKey(key);
+ if (index < 0) {
+ return Status::KeyError(key);
+ } else {
+ return value(index);
+ }
+}
+
+Status KeyValueMetadata::Delete(int64_t index) {
+ keys_.erase(keys_.begin() + index);
+ values_.erase(values_.begin() + index);
+ return Status::OK();
+}
+
+Status KeyValueMetadata::DeleteMany(std::vector<int64_t> indices) {
+ std::sort(indices.begin(), indices.end());
+ const int64_t size = static_cast<int64_t>(keys_.size());
+ indices.push_back(size);
+
+ int64_t shift = 0;
+ for (int64_t i = 0; i < static_cast<int64_t>(indices.size() - 1); ++i) {
+ ++shift;
+ const auto start = indices[i] + 1;
+ const auto stop = indices[i + 1];
+ DCHECK_GE(start, 0);
+ DCHECK_LE(start, size);
+ DCHECK_GE(stop, 0);
+ DCHECK_LE(stop, size);
+ for (int64_t index = start; index < stop; ++index) {
+ keys_[index - shift] = std::move(keys_[index]);
+ values_[index - shift] = std::move(values_[index]);
+ }
+ }
+ keys_.resize(size - shift);
+ values_.resize(size - shift);
+ return Status::OK();
+}
+
+Status KeyValueMetadata::Delete(const std::string& key) {
+ auto index = FindKey(key);
+ if (index < 0) {
+ return Status::KeyError(key);
+ } else {
+ return Delete(index);
+ }
+}
+
+Status KeyValueMetadata::Set(const std::string& key, const std::string& value) {
+ auto index = FindKey(key);
+ if (index < 0) {
+ Append(key, value);
+ } else {
+ keys_[index] = key;
+ values_[index] = value;
+ }
+ return Status::OK();
+}
+
+bool KeyValueMetadata::Contains(const std::string& key) const {
+ return FindKey(key) >= 0;
+}
+
+void KeyValueMetadata::reserve(int64_t n) {
+ DCHECK_GE(n, 0);
+ const auto m = static_cast<size_t>(n);
+ keys_.reserve(m);
+ values_.reserve(m);
+}
+
+int64_t KeyValueMetadata::size() const {
+ DCHECK_EQ(keys_.size(), values_.size());
+ return static_cast<int64_t>(keys_.size());
+}
+
+const std::string& KeyValueMetadata::key(int64_t i) const {
+ DCHECK_GE(i, 0);
+ DCHECK_LT(static_cast<size_t>(i), keys_.size());
+ return keys_[i];
+}
+
+const std::string& KeyValueMetadata::value(int64_t i) const {
+ DCHECK_GE(i, 0);
+ DCHECK_LT(static_cast<size_t>(i), values_.size());
+ return values_[i];
+}
+
+std::vector<std::pair<std::string, std::string>> KeyValueMetadata::sorted_pairs() const {
+ std::vector<std::pair<std::string, std::string>> pairs;
+ pairs.reserve(size());
+
+ auto indices = internal::ArgSort(keys_);
+ for (const auto i : indices) {
+ pairs.emplace_back(keys_[i], values_[i]);
+ }
+ return pairs;
+}
+
+int KeyValueMetadata::FindKey(const std::string& key) const {
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ if (keys_[i] == key) {
+ return static_cast<int>(i);
+ }
+ }
+ return -1;
+}
+
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Copy() const {
+ return std::make_shared<KeyValueMetadata>(keys_, values_);
+}
+
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Merge(
+ const KeyValueMetadata& other) const {
+ std::unordered_set<std::string> observed_keys;
+ std::vector<std::string> result_keys;
+ std::vector<std::string> result_values;
+
+ result_keys.reserve(keys_.size());
+ result_values.reserve(keys_.size());
+
+ for (int64_t i = 0; i < other.size(); ++i) {
+ const auto& key = other.key(i);
+ auto it = observed_keys.find(key);
+ if (it == observed_keys.end()) {
+ result_keys.push_back(key);
+ result_values.push_back(other.value(i));
+ observed_keys.insert(key);
+ }
+ }
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ auto it = observed_keys.find(keys_[i]);
+ if (it == observed_keys.end()) {
+ result_keys.push_back(keys_[i]);
+ result_values.push_back(values_[i]);
+ observed_keys.insert(keys_[i]);
+ }
+ }
+
+ return std::make_shared<KeyValueMetadata>(std::move(result_keys),
+ std::move(result_values));
+}
+
+bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
+ if (size() != other.size()) {
+ return false;
+ }
+
+ auto indices = internal::ArgSort(keys_);
+ auto other_indices = internal::ArgSort(other.keys_);
+
+ for (int64_t i = 0; i < size(); ++i) {
+ auto j = indices[i];
+ auto k = other_indices[i];
+ if (keys_[j] != other.keys_[k] || values_[j] != other.values_[k]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+std::string KeyValueMetadata::ToString() const {
+ std::stringstream buffer;
+
+ buffer << "\n-- metadata --";
+ for (int64_t i = 0; i < size(); ++i) {
+ buffer << "\n" << keys_[i] << ": " << values_[i];
+ }
+
+ return buffer.str();
+}
+
+std::shared_ptr<KeyValueMetadata> key_value_metadata(
+ const std::unordered_map<std::string, std::string>& pairs) {
+ return std::make_shared<KeyValueMetadata>(pairs);
+}
+
+std::shared_ptr<KeyValueMetadata> key_value_metadata(std::vector<std::string> keys,
+ std::vector<std::string> values) {
+ return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
index d42ab78f667..4930cb08e0d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
@@ -1,99 +1,99 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-/// \brief A container for key-value pair type metadata. Not thread-safe
-class ARROW_EXPORT KeyValueMetadata {
- public:
- KeyValueMetadata();
- KeyValueMetadata(std::vector<std::string> keys, std::vector<std::string> values);
- explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
- virtual ~KeyValueMetadata() = default;
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief A container for key-value pair type metadata. Not thread-safe
+class ARROW_EXPORT KeyValueMetadata {
+ public:
+ KeyValueMetadata();
+ KeyValueMetadata(std::vector<std::string> keys, std::vector<std::string> values);
+ explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
+ virtual ~KeyValueMetadata() = default;
+
static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys,
std::vector<std::string> values);
- void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
- void Append(const std::string& key, const std::string& value);
-
- Result<std::string> Get(const std::string& key) const;
- bool Contains(const std::string& key) const;
- // Note that deleting may invalidate known indices
- Status Delete(const std::string& key);
- Status Delete(int64_t index);
- Status DeleteMany(std::vector<int64_t> indices);
- Status Set(const std::string& key, const std::string& value);
-
- void reserve(int64_t n);
-
- int64_t size() const;
- const std::string& key(int64_t i) const;
- const std::string& value(int64_t i) const;
- const std::vector<std::string>& keys() const { return keys_; }
- const std::vector<std::string>& values() const { return values_; }
-
- std::vector<std::pair<std::string, std::string>> sorted_pairs() const;
-
- /// \brief Perform linear search for key, returning -1 if not found
- int FindKey(const std::string& key) const;
-
- std::shared_ptr<KeyValueMetadata> Copy() const;
-
- /// \brief Return a new KeyValueMetadata by combining the passed metadata
- /// with this KeyValueMetadata. Colliding keys will be overridden by the
- /// passed metadata. Assumes keys in both containers are unique
- std::shared_ptr<KeyValueMetadata> Merge(const KeyValueMetadata& other) const;
-
- bool Equals(const KeyValueMetadata& other) const;
- std::string ToString() const;
-
- private:
- std::vector<std::string> keys_;
- std::vector<std::string> values_;
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(KeyValueMetadata);
-};
-
-/// \brief Create a KeyValueMetadata instance
-///
-/// \param pairs key-value mapping
-std::shared_ptr<KeyValueMetadata> ARROW_EXPORT
-key_value_metadata(const std::unordered_map<std::string, std::string>& pairs);
-
-/// \brief Create a KeyValueMetadata instance
-///
-/// \param keys sequence of metadata keys
-/// \param values sequence of corresponding metadata values
-std::shared_ptr<KeyValueMetadata> ARROW_EXPORT
-key_value_metadata(std::vector<std::string> keys, std::vector<std::string> values);
-
-} // namespace arrow
+ void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
+ void Append(const std::string& key, const std::string& value);
+
+ Result<std::string> Get(const std::string& key) const;
+ bool Contains(const std::string& key) const;
+ // Note that deleting may invalidate known indices
+ Status Delete(const std::string& key);
+ Status Delete(int64_t index);
+ Status DeleteMany(std::vector<int64_t> indices);
+ Status Set(const std::string& key, const std::string& value);
+
+ void reserve(int64_t n);
+
+ int64_t size() const;
+ const std::string& key(int64_t i) const;
+ const std::string& value(int64_t i) const;
+ const std::vector<std::string>& keys() const { return keys_; }
+ const std::vector<std::string>& values() const { return values_; }
+
+ std::vector<std::pair<std::string, std::string>> sorted_pairs() const;
+
+ /// \brief Perform linear search for key, returning -1 if not found
+ int FindKey(const std::string& key) const;
+
+ std::shared_ptr<KeyValueMetadata> Copy() const;
+
+ /// \brief Return a new KeyValueMetadata by combining the passed metadata
+ /// with this KeyValueMetadata. Colliding keys will be overridden by the
+ /// passed metadata. Assumes keys in both containers are unique
+ std::shared_ptr<KeyValueMetadata> Merge(const KeyValueMetadata& other) const;
+
+ bool Equals(const KeyValueMetadata& other) const;
+ std::string ToString() const;
+
+ private:
+ std::vector<std::string> keys_;
+ std::vector<std::string> values_;
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(KeyValueMetadata);
+};
+
+/// \brief Create a KeyValueMetadata instance
+///
+/// \param pairs key-value mapping
+std::shared_ptr<KeyValueMetadata> ARROW_EXPORT
+key_value_metadata(const std::unordered_map<std::string, std::string>& pairs);
+
+/// \brief Create a KeyValueMetadata instance
+///
+/// \param keys sequence of metadata keys
+/// \param values sequence of corresponding metadata values
+std::shared_ptr<KeyValueMetadata> ARROW_EXPORT
+key_value_metadata(std::vector<std::string> keys, std::vector<std::string> values);
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
index 65359b44081..3f2a10f41c9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
@@ -1,32 +1,32 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/logging.h"
+
+#ifdef ARROW_WITH_BACKTRACE
+#include <execinfo.h>
+#endif
+#include <cstdlib>
+#include <iostream>
+
+#ifdef ARROW_USE_GLOG
-#include "arrow/util/logging.h"
-
-#ifdef ARROW_WITH_BACKTRACE
-#include <execinfo.h>
-#endif
-#include <cstdlib>
-#include <iostream>
-
-#ifdef ARROW_USE_GLOG
-
-#include <signal.h>
-#include <vector>
+#include <signal.h>
+#include <vector>
#error #include "glog/logging.h"
@@ -49,208 +49,208 @@
#define DCHECK_GE ARROW_DCHECK_GE
#define DCHECK_GT ARROW_DCHECK_GT
-#endif
-
-namespace arrow {
-namespace util {
-
-// This code is adapted from
-// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.cc.
-
-// This is the default implementation of arrow log,
-// which is independent of any libs.
-class CerrLog {
- public:
- explicit CerrLog(ArrowLogLevel severity) : severity_(severity), has_logged_(false) {}
-
- virtual ~CerrLog() {
- if (has_logged_) {
- std::cerr << std::endl;
- }
- if (severity_ == ArrowLogLevel::ARROW_FATAL) {
- PrintBackTrace();
- std::abort();
- }
- }
-
- std::ostream& Stream() {
- has_logged_ = true;
- return std::cerr;
- }
-
- template <class T>
- CerrLog& operator<<(const T& t) {
- if (severity_ != ArrowLogLevel::ARROW_DEBUG) {
- has_logged_ = true;
- std::cerr << t;
- }
- return *this;
- }
-
- protected:
- const ArrowLogLevel severity_;
- bool has_logged_;
-
- void PrintBackTrace() {
-#ifdef ARROW_WITH_BACKTRACE
- void* buffer[255];
- const int calls = backtrace(buffer, static_cast<int>(sizeof(buffer) / sizeof(void*)));
- backtrace_symbols_fd(buffer, calls, 1);
-#endif
- }
-};
-
-#ifdef ARROW_USE_GLOG
-typedef google::LogMessage LoggingProvider;
-#else
-typedef CerrLog LoggingProvider;
-#endif
-
-ArrowLogLevel ArrowLog::severity_threshold_ = ArrowLogLevel::ARROW_INFO;
-// Keep the log directory.
-static std::unique_ptr<std::string> log_dir_;
-
-#ifdef ARROW_USE_GLOG
-
-// Glog's severity map.
-static int GetMappedSeverity(ArrowLogLevel severity) {
- switch (severity) {
- case ArrowLogLevel::ARROW_DEBUG:
- return google::GLOG_INFO;
- case ArrowLogLevel::ARROW_INFO:
- return google::GLOG_INFO;
- case ArrowLogLevel::ARROW_WARNING:
- return google::GLOG_WARNING;
- case ArrowLogLevel::ARROW_ERROR:
- return google::GLOG_ERROR;
- case ArrowLogLevel::ARROW_FATAL:
- return google::GLOG_FATAL;
- default:
- ARROW_LOG(FATAL) << "Unsupported logging level: " << static_cast<int>(severity);
- // This return won't be hit but compiler needs it.
- return google::GLOG_FATAL;
- }
-}
-
-#endif
-
-void ArrowLog::StartArrowLog(const std::string& app_name,
- ArrowLogLevel severity_threshold,
- const std::string& log_dir) {
- severity_threshold_ = severity_threshold;
- // In InitGoogleLogging, it simply keeps the pointer.
- // We need to make sure the app name passed to InitGoogleLogging exist.
- // We should avoid using static string is a dynamic lib.
- static std::unique_ptr<std::string> app_name_;
- app_name_.reset(new std::string(app_name));
- log_dir_.reset(new std::string(log_dir));
-#ifdef ARROW_USE_GLOG
- int mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
- google::SetStderrLogging(mapped_severity_threshold);
- // Enble log file if log_dir is not empty.
- if (!log_dir.empty()) {
- auto dir_ends_with_slash = log_dir;
- if (log_dir[log_dir.length() - 1] != '/') {
- dir_ends_with_slash += "/";
- }
- auto app_name_without_path = app_name;
- if (app_name.empty()) {
- app_name_without_path = "DefaultApp";
- } else {
- // Find the app name without the path.
- size_t pos = app_name.rfind('/');
- if (pos != app_name.npos && pos + 1 < app_name.length()) {
- app_name_without_path = app_name.substr(pos + 1);
- }
- }
- // If InitGoogleLogging is called but SetLogDestination is not called,
- // the log will be output to /tmp besides stderr. If log_dir is not
- // provided, we'd better not call InitGoogleLogging.
- google::InitGoogleLogging(app_name_->c_str());
- google::SetLogFilenameExtension(app_name_without_path.c_str());
- for (int i = static_cast<int>(severity_threshold_);
- i <= static_cast<int>(ArrowLogLevel::ARROW_FATAL); ++i) {
- int level = GetMappedSeverity(static_cast<ArrowLogLevel>(i));
- google::SetLogDestination(level, dir_ends_with_slash.c_str());
- }
- }
-#endif
-}
-
-void ArrowLog::UninstallSignalAction() {
-#ifdef ARROW_USE_GLOG
- ARROW_LOG(DEBUG) << "Uninstall signal handlers.";
- // This signal list comes from glog's signalhandler.cc.
- // https://github.com/google/glog/blob/master/src/signalhandler.cc#L58-L70
- std::vector<int> installed_signals({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM});
-#ifdef WIN32
- for (int signal_num : installed_signals) {
- ARROW_CHECK(signal(signal_num, SIG_DFL) != SIG_ERR);
- }
-#else
- struct sigaction sig_action;
- memset(&sig_action, 0, sizeof(sig_action));
- sigemptyset(&sig_action.sa_mask);
- sig_action.sa_handler = SIG_DFL;
- for (int signal_num : installed_signals) {
- ARROW_CHECK(sigaction(signal_num, &sig_action, NULL) == 0);
- }
-#endif
-#endif
-}
-
-void ArrowLog::ShutDownArrowLog() {
-#ifdef ARROW_USE_GLOG
- if (!log_dir_->empty()) {
- google::ShutdownGoogleLogging();
- }
-#endif
-}
-
-void ArrowLog::InstallFailureSignalHandler() {
-#ifdef ARROW_USE_GLOG
- google::InstallFailureSignalHandler();
-#endif
-}
-
-bool ArrowLog::IsLevelEnabled(ArrowLogLevel log_level) {
- return log_level >= severity_threshold_;
-}
-
-ArrowLog::ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity)
- // glog does not have DEBUG level, we can handle it using is_enabled_.
- : logging_provider_(nullptr), is_enabled_(severity >= severity_threshold_) {
-#ifdef ARROW_USE_GLOG
- if (is_enabled_) {
- logging_provider_ =
- new google::LogMessage(file_name, line_number, GetMappedSeverity(severity));
- }
-#else
- auto logging_provider = new CerrLog(severity);
- *logging_provider << file_name << ":" << line_number << ": ";
- logging_provider_ = logging_provider;
-#endif
-}
-
-std::ostream& ArrowLog::Stream() {
- auto logging_provider = reinterpret_cast<LoggingProvider*>(logging_provider_);
-#ifdef ARROW_USE_GLOG
- // Before calling this function, user should check IsEnabled.
- // When IsEnabled == false, logging_provider_ will be empty.
- return logging_provider->stream();
-#else
- return logging_provider->Stream();
-#endif
-}
-
-bool ArrowLog::IsEnabled() const { return is_enabled_; }
-
-ArrowLog::~ArrowLog() {
- if (logging_provider_ != nullptr) {
- delete reinterpret_cast<LoggingProvider*>(logging_provider_);
- logging_provider_ = nullptr;
- }
-}
-
-} // namespace util
-} // namespace arrow
+#endif
+
+namespace arrow {
+namespace util {
+
+// This code is adapted from
+// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.cc.
+
+// This is the default implementation of arrow log,
+// which is independent of any libs.
+class CerrLog {
+ public:
+ explicit CerrLog(ArrowLogLevel severity) : severity_(severity), has_logged_(false) {}
+
+ virtual ~CerrLog() {
+ if (has_logged_) {
+ std::cerr << std::endl;
+ }
+ if (severity_ == ArrowLogLevel::ARROW_FATAL) {
+ PrintBackTrace();
+ std::abort();
+ }
+ }
+
+ std::ostream& Stream() {
+ has_logged_ = true;
+ return std::cerr;
+ }
+
+ template <class T>
+ CerrLog& operator<<(const T& t) {
+ if (severity_ != ArrowLogLevel::ARROW_DEBUG) {
+ has_logged_ = true;
+ std::cerr << t;
+ }
+ return *this;
+ }
+
+ protected:
+ const ArrowLogLevel severity_;
+ bool has_logged_;
+
+ void PrintBackTrace() {
+#ifdef ARROW_WITH_BACKTRACE
+ void* buffer[255];
+ const int calls = backtrace(buffer, static_cast<int>(sizeof(buffer) / sizeof(void*)));
+ backtrace_symbols_fd(buffer, calls, 1);
+#endif
+ }
+};
+
+#ifdef ARROW_USE_GLOG
+typedef google::LogMessage LoggingProvider;
+#else
+typedef CerrLog LoggingProvider;
+#endif
+
+ArrowLogLevel ArrowLog::severity_threshold_ = ArrowLogLevel::ARROW_INFO;
+// Keep the log directory.
+static std::unique_ptr<std::string> log_dir_;
+
+#ifdef ARROW_USE_GLOG
+
+// Glog's severity map.
+static int GetMappedSeverity(ArrowLogLevel severity) {
+ switch (severity) {
+ case ArrowLogLevel::ARROW_DEBUG:
+ return google::GLOG_INFO;
+ case ArrowLogLevel::ARROW_INFO:
+ return google::GLOG_INFO;
+ case ArrowLogLevel::ARROW_WARNING:
+ return google::GLOG_WARNING;
+ case ArrowLogLevel::ARROW_ERROR:
+ return google::GLOG_ERROR;
+ case ArrowLogLevel::ARROW_FATAL:
+ return google::GLOG_FATAL;
+ default:
+ ARROW_LOG(FATAL) << "Unsupported logging level: " << static_cast<int>(severity);
+ // This return won't be hit but compiler needs it.
+ return google::GLOG_FATAL;
+ }
+}
+
+#endif
+
+void ArrowLog::StartArrowLog(const std::string& app_name,
+ ArrowLogLevel severity_threshold,
+ const std::string& log_dir) {
+ severity_threshold_ = severity_threshold;
+ // In InitGoogleLogging, it simply keeps the pointer.
+ // We need to make sure the app name passed to InitGoogleLogging exist.
+ // We should avoid using static string is a dynamic lib.
+ static std::unique_ptr<std::string> app_name_;
+ app_name_.reset(new std::string(app_name));
+ log_dir_.reset(new std::string(log_dir));
+#ifdef ARROW_USE_GLOG
+ int mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
+ google::SetStderrLogging(mapped_severity_threshold);
+ // Enble log file if log_dir is not empty.
+ if (!log_dir.empty()) {
+ auto dir_ends_with_slash = log_dir;
+ if (log_dir[log_dir.length() - 1] != '/') {
+ dir_ends_with_slash += "/";
+ }
+ auto app_name_without_path = app_name;
+ if (app_name.empty()) {
+ app_name_without_path = "DefaultApp";
+ } else {
+ // Find the app name without the path.
+ size_t pos = app_name.rfind('/');
+ if (pos != app_name.npos && pos + 1 < app_name.length()) {
+ app_name_without_path = app_name.substr(pos + 1);
+ }
+ }
+ // If InitGoogleLogging is called but SetLogDestination is not called,
+ // the log will be output to /tmp besides stderr. If log_dir is not
+ // provided, we'd better not call InitGoogleLogging.
+ google::InitGoogleLogging(app_name_->c_str());
+ google::SetLogFilenameExtension(app_name_without_path.c_str());
+ for (int i = static_cast<int>(severity_threshold_);
+ i <= static_cast<int>(ArrowLogLevel::ARROW_FATAL); ++i) {
+ int level = GetMappedSeverity(static_cast<ArrowLogLevel>(i));
+ google::SetLogDestination(level, dir_ends_with_slash.c_str());
+ }
+ }
+#endif
+}
+
+void ArrowLog::UninstallSignalAction() {
+#ifdef ARROW_USE_GLOG
+ ARROW_LOG(DEBUG) << "Uninstall signal handlers.";
+ // This signal list comes from glog's signalhandler.cc.
+ // https://github.com/google/glog/blob/master/src/signalhandler.cc#L58-L70
+ std::vector<int> installed_signals({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM});
+#ifdef WIN32
+ for (int signal_num : installed_signals) {
+ ARROW_CHECK(signal(signal_num, SIG_DFL) != SIG_ERR);
+ }
+#else
+ struct sigaction sig_action;
+ memset(&sig_action, 0, sizeof(sig_action));
+ sigemptyset(&sig_action.sa_mask);
+ sig_action.sa_handler = SIG_DFL;
+ for (int signal_num : installed_signals) {
+ ARROW_CHECK(sigaction(signal_num, &sig_action, NULL) == 0);
+ }
+#endif
+#endif
+}
+
+void ArrowLog::ShutDownArrowLog() {
+#ifdef ARROW_USE_GLOG
+ if (!log_dir_->empty()) {
+ google::ShutdownGoogleLogging();
+ }
+#endif
+}
+
+void ArrowLog::InstallFailureSignalHandler() {
+#ifdef ARROW_USE_GLOG
+ google::InstallFailureSignalHandler();
+#endif
+}
+
+bool ArrowLog::IsLevelEnabled(ArrowLogLevel log_level) {
+ return log_level >= severity_threshold_;
+}
+
+ArrowLog::ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity)
+ // glog does not have DEBUG level, we can handle it using is_enabled_.
+ : logging_provider_(nullptr), is_enabled_(severity >= severity_threshold_) {
+#ifdef ARROW_USE_GLOG
+ if (is_enabled_) {
+ logging_provider_ =
+ new google::LogMessage(file_name, line_number, GetMappedSeverity(severity));
+ }
+#else
+ auto logging_provider = new CerrLog(severity);
+ *logging_provider << file_name << ":" << line_number << ": ";
+ logging_provider_ = logging_provider;
+#endif
+}
+
+std::ostream& ArrowLog::Stream() {
+ auto logging_provider = reinterpret_cast<LoggingProvider*>(logging_provider_);
+#ifdef ARROW_USE_GLOG
+ // Before calling this function, user should check IsEnabled.
+ // When IsEnabled == false, logging_provider_ will be empty.
+ return logging_provider->stream();
+#else
+ return logging_provider->Stream();
+#endif
+}
+
+bool ArrowLog::IsEnabled() const { return is_enabled_; }
+
+ArrowLog::~ArrowLog() {
+ if (logging_provider_ != nullptr) {
+ delete reinterpret_cast<LoggingProvider*>(logging_provider_);
+ logging_provider_ = nullptr;
+ }
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
index 15a0188ab76..877e8f464ae 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
@@ -1,131 +1,131 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#ifdef GANDIVA_IR
-
-// The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
-// streams or stdc++. So, making the DCHECK calls void in that case.
-
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
-
-#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
-#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
-#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
-
-#else // !GANDIVA_IR
-
-#include <memory>
-#include <ostream>
-#include <string>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
-enum class ArrowLogLevel : int {
- ARROW_DEBUG = -1,
- ARROW_INFO = 0,
- ARROW_WARNING = 1,
- ARROW_ERROR = 2,
- ARROW_FATAL = 3
-};
-
-#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
-#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
-
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
-
-#define ARROW_CHECK(condition) \
- ARROW_PREDICT_TRUE(condition) \
- ? ARROW_IGNORE_EXPR(0) \
- : ::arrow::util::Voidify() & \
- ::arrow::util::ArrowLog(__FILE__, __LINE__, \
- ::arrow::util::ArrowLogLevel::ARROW_FATAL) \
- << " Check failed: " #condition " "
-
-// If 'to_call' returns a bad status, CHECK immediately with a logged message
-// of 'msg' followed by the status.
-#define ARROW_CHECK_OK_PREPEND(to_call, msg) \
- do { \
- ::arrow::Status _s = (to_call); \
- ARROW_CHECK(_s.ok()) << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
- << (msg) << ": " << _s.ToString(); \
- } while (false)
-
-// If the status is bad, CHECK immediately, appending the status to the
-// logged message.
-#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status")
-
-#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
-#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
-#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
-#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
-#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
-#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
-
-#ifdef NDEBUG
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
-
-// CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
-// only do so in debug mode.
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef GANDIVA_IR
+
+// The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
+// streams or stdc++. So, making the DCHECK calls void in that case.
+
+#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+
+#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+
+#else // !GANDIVA_IR
+
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+enum class ArrowLogLevel : int {
+ ARROW_DEBUG = -1,
+ ARROW_INFO = 0,
+ ARROW_WARNING = 1,
+ ARROW_ERROR = 2,
+ ARROW_FATAL = 3
+};
+
+#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
+#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
+
+#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+
+#define ARROW_CHECK(condition) \
+ ARROW_PREDICT_TRUE(condition) \
+ ? ARROW_IGNORE_EXPR(0) \
+ : ::arrow::util::Voidify() & \
+ ::arrow::util::ArrowLog(__FILE__, __LINE__, \
+ ::arrow::util::ArrowLogLevel::ARROW_FATAL) \
+ << " Check failed: " #condition " "
+
+// If 'to_call' returns a bad status, CHECK immediately with a logged message
+// of 'msg' followed by the status.
+#define ARROW_CHECK_OK_PREPEND(to_call, msg) \
+ do { \
+ ::arrow::Status _s = (to_call); \
+ ARROW_CHECK(_s.ok()) << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
+ << (msg) << ": " << _s.ToString(); \
+ } while (false)
+
+// If the status is bad, CHECK immediately, appending the status to the
+// logged message.
+#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status")
+
+#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
+#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
+#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
+#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
+#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
+#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
+
+#ifdef NDEBUG
+#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
+
+// CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
+// only do so in debug mode.
+
#define ARROW_DCHECK(condition) \
- while (false) ARROW_IGNORE_EXPR(condition); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(condition); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_OK(s) \
ARROW_IGNORE_EXPR(s); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_EQ(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_NE(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_LE(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_LT(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_GE(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
#define ARROW_DCHECK_GT(val1, val2) \
- while (false) ARROW_IGNORE_EXPR(val1); \
- while (false) ARROW_IGNORE_EXPR(val2); \
- while (false) ::arrow::util::detail::NullLog()
-
-#else
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
-
+ while (false) ARROW_IGNORE_EXPR(val1); \
+ while (false) ARROW_IGNORE_EXPR(val2); \
+ while (false) ::arrow::util::detail::NullLog()
+
+#else
+#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
+
#define ARROW_DCHECK ARROW_CHECK
#define ARROW_DCHECK_OK ARROW_CHECK_OK
#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
@@ -134,9 +134,9 @@ enum class ArrowLogLevel : int {
#define ARROW_DCHECK_LT ARROW_CHECK_LT
#define ARROW_DCHECK_GE ARROW_CHECK_GE
#define ARROW_DCHECK_GT ARROW_CHECK_GT
-
-#endif // NDEBUG
-
+
+#endif // NDEBUG
+
#define DCHECK ARROW_DCHECK
#define DCHECK_OK ARROW_DCHECK_OK
#define DCHECK_EQ ARROW_DCHECK_EQ
@@ -146,114 +146,114 @@ enum class ArrowLogLevel : int {
#define DCHECK_GE ARROW_DCHECK_GE
#define DCHECK_GT ARROW_DCHECK_GT
-// This code is adapted from
-// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
-
-// To make the logging lib pluggable with other logging libs and make
-// the implementation unawared by the user, ArrowLog is only a declaration
-// which hide the implementation into logging.cc file.
-// In logging.cc, we can choose different log libs using different macros.
-
-// This is also a null log which does not output anything.
-class ARROW_EXPORT ArrowLogBase {
- public:
- virtual ~ArrowLogBase() {}
-
- virtual bool IsEnabled() const { return false; }
-
- template <typename T>
- ArrowLogBase& operator<<(const T& t) {
- if (IsEnabled()) {
- Stream() << t;
- }
- return *this;
- }
-
- protected:
- virtual std::ostream& Stream() = 0;
-};
-
-class ARROW_EXPORT ArrowLog : public ArrowLogBase {
- public:
- ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity);
- ~ArrowLog() override;
-
- /// Return whether or not current logging instance is enabled.
- ///
- /// \return True if logging is enabled and false otherwise.
- bool IsEnabled() const override;
-
- /// The init function of arrow log for a program which should be called only once.
- ///
- /// \param appName The app name which starts the log.
- /// \param severity_threshold Logging threshold for the program.
- /// \param logDir Logging output file name. If empty, the log won't output to file.
- static void StartArrowLog(const std::string& appName,
- ArrowLogLevel severity_threshold = ArrowLogLevel::ARROW_INFO,
- const std::string& logDir = "");
-
- /// The shutdown function of arrow log, it should be used with StartArrowLog as a pair.
- static void ShutDownArrowLog();
-
- /// Install the failure signal handler to output call stack when crash.
- /// If glog is not installed, this function won't do anything.
- static void InstallFailureSignalHandler();
-
- /// Uninstall the signal actions installed by InstallFailureSignalHandler.
- static void UninstallSignalAction();
-
- /// Return whether or not the log level is enabled in current setting.
- ///
- /// \param log_level The input log level to test.
- /// \return True if input log level is not lower than the threshold.
- static bool IsLevelEnabled(ArrowLogLevel log_level);
-
- private:
- ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog);
-
- // Hide the implementation of log provider by void *.
- // Otherwise, lib user may define the same macro to use the correct header file.
- void* logging_provider_;
- /// True if log messages should be logged and false if they should be ignored.
- bool is_enabled_;
-
- static ArrowLogLevel severity_threshold_;
-
- protected:
- std::ostream& Stream() override;
-};
-
-// This class make ARROW_CHECK compilation pass to change the << operator to void.
-// This class is copied from glog.
-class ARROW_EXPORT Voidify {
- public:
- Voidify() {}
- // This has to be an operator with a precedence lower than << but
- // higher than ?:
- void operator&(ArrowLogBase&) {}
-};
-
-namespace detail {
-
-/// @brief A helper for the nil log sink.
-///
-/// Using this helper is analogous to sending log messages to /dev/null:
-/// nothing gets logged.
-class NullLog {
- public:
- /// The no-op output operator.
- ///
- /// @param [in] t
- /// The object to send into the nil sink.
- /// @return Reference to the updated object.
- template <class T>
- NullLog& operator<<(const T& t) {
- return *this;
- }
-};
-
-} // namespace detail
-} // namespace util
-} // namespace arrow
-
-#endif // GANDIVA_IR
+// This code is adapted from
+// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
+
+// To make the logging lib pluggable with other logging libs and make
+// the implementation unawared by the user, ArrowLog is only a declaration
+// which hide the implementation into logging.cc file.
+// In logging.cc, we can choose different log libs using different macros.
+
+// This is also a null log which does not output anything.
+class ARROW_EXPORT ArrowLogBase {
+ public:
+ virtual ~ArrowLogBase() {}
+
+ virtual bool IsEnabled() const { return false; }
+
+ template <typename T>
+ ArrowLogBase& operator<<(const T& t) {
+ if (IsEnabled()) {
+ Stream() << t;
+ }
+ return *this;
+ }
+
+ protected:
+ virtual std::ostream& Stream() = 0;
+};
+
+class ARROW_EXPORT ArrowLog : public ArrowLogBase {
+ public:
+ ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity);
+ ~ArrowLog() override;
+
+ /// Return whether or not current logging instance is enabled.
+ ///
+ /// \return True if logging is enabled and false otherwise.
+ bool IsEnabled() const override;
+
+ /// The init function of arrow log for a program which should be called only once.
+ ///
+ /// \param appName The app name which starts the log.
+ /// \param severity_threshold Logging threshold for the program.
+ /// \param logDir Logging output file name. If empty, the log won't output to file.
+ static void StartArrowLog(const std::string& appName,
+ ArrowLogLevel severity_threshold = ArrowLogLevel::ARROW_INFO,
+ const std::string& logDir = "");
+
+ /// The shutdown function of arrow log, it should be used with StartArrowLog as a pair.
+ static void ShutDownArrowLog();
+
+ /// Install the failure signal handler to output call stack when crash.
+ /// If glog is not installed, this function won't do anything.
+ static void InstallFailureSignalHandler();
+
+ /// Uninstall the signal actions installed by InstallFailureSignalHandler.
+ static void UninstallSignalAction();
+
+ /// Return whether or not the log level is enabled in current setting.
+ ///
+ /// \param log_level The input log level to test.
+ /// \return True if input log level is not lower than the threshold.
+ static bool IsLevelEnabled(ArrowLogLevel log_level);
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog);
+
+ // Hide the implementation of log provider by void *.
+ // Otherwise, lib user may define the same macro to use the correct header file.
+ void* logging_provider_;
+ /// True if log messages should be logged and false if they should be ignored.
+ bool is_enabled_;
+
+ static ArrowLogLevel severity_threshold_;
+
+ protected:
+ std::ostream& Stream() override;
+};
+
+// This class make ARROW_CHECK compilation pass to change the << operator to void.
+// This class is copied from glog.
+class ARROW_EXPORT Voidify {
+ public:
+ Voidify() {}
+ // This has to be an operator with a precedence lower than << but
+ // higher than ?:
+ void operator&(ArrowLogBase&) {}
+};
+
+namespace detail {
+
+/// @brief A helper for the nil log sink.
+///
+/// Using this helper is analogous to sending log messages to /dev/null:
+/// nothing gets logged.
+class NullLog {
+ public:
+ /// The no-op output operator.
+ ///
+ /// @param [in] t
+ /// The object to send into the nil sink.
+ /// @return Reference to the updated object.
+ template <class T>
+ NullLog& operator<<(const T& t) {
+ return *this;
+ }
+};
+
+} // namespace detail
+} // namespace util
+} // namespace arrow
+
+#endif // GANDIVA_IR
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/macros.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/macros.h
index 548cc041ec8..49008aab24f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/macros.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/macros.h
@@ -1,185 +1,185 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-
-#define ARROW_EXPAND(x) x
-#define ARROW_STRINGIFY(x) #x
-#define ARROW_CONCAT(x, y) x##y
-
-// From Google gutil
-#ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
-#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&) = delete; \
- void operator=(const TypeName&) = delete
-#endif
-
-#ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
-#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
- TypeName(TypeName&&) = default; \
- TypeName& operator=(TypeName&&) = default
-#endif
-
-#define ARROW_UNUSED(x) (void)(x)
-#define ARROW_ARG_UNUSED(x)
-//
-// GCC can be told that a certain branch is not likely to be taken (for
-// instance, a CHECK failure), and use that information in static analysis.
-// Giving it this information can help it optimize for the common case in
-// the absence of better information (ie. -fprofile-arcs).
-//
-#if defined(__GNUC__)
-#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
-#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
-#define ARROW_NORETURN __attribute__((noreturn))
-#define ARROW_NOINLINE __attribute__((noinline))
-#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
-#elif defined(_MSC_VER)
-#define ARROW_NORETURN __declspec(noreturn)
-#define ARROW_NOINLINE __declspec(noinline)
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#else
-#define ARROW_NORETURN
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#endif
-
-#if (defined(__GNUC__) || defined(__APPLE__))
-#define ARROW_MUST_USE_RESULT __attribute__((warn_unused_result))
-#elif defined(_MSC_VER)
-#define ARROW_MUST_USE_RESULT
-#else
-#define ARROW_MUST_USE_RESULT
-#endif
-
-#if defined(__clang__)
-// Only clang supports warn_unused_result as a type annotation.
-#define ARROW_MUST_USE_TYPE ARROW_MUST_USE_RESULT
-#else
-#define ARROW_MUST_USE_TYPE
-#endif
-
-// ----------------------------------------------------------------------
-// C++/CLI support macros (see ARROW-1134)
-
-#ifndef NULLPTR
-
-#ifdef __cplusplus_cli
-#define NULLPTR __nullptr
-#else
-#define NULLPTR nullptr
-#endif
-
-#endif // ifndef NULLPTR
-
-// ----------------------------------------------------------------------
-
-// clang-format off
-// [[deprecated]] is only available in C++14, use this for the time being
-// This macro takes an optional deprecation message
-#ifdef __COVERITY__
-# define ARROW_DEPRECATED(...)
-# define ARROW_DEPRECATED_USING(...)
-#elif __cplusplus > 201103L
-# define ARROW_DEPRECATED(...) [[deprecated(__VA_ARGS__)]]
-# define ARROW_DEPRECATED_USING(...) ARROW_DEPRECATED(__VA_ARGS__)
-#else
-# ifdef __GNUC__
-# define ARROW_DEPRECATED(...) __attribute__((deprecated(__VA_ARGS__)))
-# define ARROW_DEPRECATED_USING(...) ARROW_DEPRECATED(__VA_ARGS__)
-# elif defined(_MSC_VER)
-# define ARROW_DEPRECATED(...) __declspec(deprecated(__VA_ARGS__))
-# define ARROW_DEPRECATED_USING(...)
-# else
-# define ARROW_DEPRECATED(...)
-# define ARROW_DEPRECATED_USING(...)
-# endif
-#endif
-// clang-format on
-
-// ----------------------------------------------------------------------
-
-// macros to disable padding
-// these macros are portable across different compilers and platforms
-//[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
-#if !defined(MANUALLY_ALIGNED_STRUCT)
-#if defined(_MSC_VER)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
- __pragma(pack(1)); \
- struct __declspec(align(alignment))
-#define STRUCT_END(name, size) \
- __pragma(pack()); \
- static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#elif defined(__GNUC__) || defined(__clang__)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
- _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
-#define STRUCT_END(name, size) \
- _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#else
-#error Unknown compiler, please define structure alignment macros
-#endif
-#endif // !defined(MANUALLY_ALIGNED_STRUCT)
-
-// ----------------------------------------------------------------------
-// Convenience macro disabling a particular UBSan check in a function
-
-#if defined(__clang__)
-#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
-#else
-#define ARROW_DISABLE_UBSAN(feature)
-#endif
-
-// ----------------------------------------------------------------------
-// Machine information
-
-#if INTPTR_MAX == INT64_MAX
-#define ARROW_BITNESS 64
-#elif INTPTR_MAX == INT32_MAX
-#define ARROW_BITNESS 32
-#else
-#error Unexpected INTPTR_MAX
-#endif
-
-// ----------------------------------------------------------------------
-// From googletest
-// (also in parquet-cpp)
-
-// When you need to test the private or protected members of a class,
-// use the FRIEND_TEST macro to declare your tests as friends of the
-// class. For example:
-//
-// class MyClass {
-// private:
-// void MyMethod();
-// FRIEND_TEST(MyClassTest, MyMethod);
-// };
-//
-// class MyClassTest : public testing::Test {
-// // ...
-// };
-//
-// TEST_F(MyClassTest, MyMethod) {
-// // Can call MyClass::MyMethod() here.
-// }
-
-#define FRIEND_TEST(test_case_name, test_name) \
- friend class test_case_name##_##test_name##_Test
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#define ARROW_EXPAND(x) x
+#define ARROW_STRINGIFY(x) #x
+#define ARROW_CONCAT(x, y) x##y
+
+// From Google gutil
+#ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
+#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&) = delete; \
+ void operator=(const TypeName&) = delete
+#endif
+
+#ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
+#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
+ TypeName(TypeName&&) = default; \
+ TypeName& operator=(TypeName&&) = default
+#endif
+
+#define ARROW_UNUSED(x) (void)(x)
+#define ARROW_ARG_UNUSED(x)
+//
+// GCC can be told that a certain branch is not likely to be taken (for
+// instance, a CHECK failure), and use that information in static analysis.
+// Giving it this information can help it optimize for the common case in
+// the absence of better information (ie. -fprofile-arcs).
+//
+#if defined(__GNUC__)
+#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define ARROW_NORETURN __attribute__((noreturn))
+#define ARROW_NOINLINE __attribute__((noinline))
+#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
+#elif defined(_MSC_VER)
+#define ARROW_NORETURN __declspec(noreturn)
+#define ARROW_NOINLINE __declspec(noinline)
+#define ARROW_PREDICT_FALSE(x) (x)
+#define ARROW_PREDICT_TRUE(x) (x)
+#define ARROW_PREFETCH(addr)
+#else
+#define ARROW_NORETURN
+#define ARROW_PREDICT_FALSE(x) (x)
+#define ARROW_PREDICT_TRUE(x) (x)
+#define ARROW_PREFETCH(addr)
+#endif
+
+#if (defined(__GNUC__) || defined(__APPLE__))
+#define ARROW_MUST_USE_RESULT __attribute__((warn_unused_result))
+#elif defined(_MSC_VER)
+#define ARROW_MUST_USE_RESULT
+#else
+#define ARROW_MUST_USE_RESULT
+#endif
+
+#if defined(__clang__)
+// Only clang supports warn_unused_result as a type annotation.
+#define ARROW_MUST_USE_TYPE ARROW_MUST_USE_RESULT
+#else
+#define ARROW_MUST_USE_TYPE
+#endif
+
+// ----------------------------------------------------------------------
+// C++/CLI support macros (see ARROW-1134)
+
+#ifndef NULLPTR
+
+#ifdef __cplusplus_cli
+#define NULLPTR __nullptr
+#else
+#define NULLPTR nullptr
+#endif
+
+#endif // ifndef NULLPTR
+
+// ----------------------------------------------------------------------
+
+// clang-format off
+// [[deprecated]] is only available in C++14, use this for the time being
+// This macro takes an optional deprecation message
+#ifdef __COVERITY__
+# define ARROW_DEPRECATED(...)
+# define ARROW_DEPRECATED_USING(...)
+#elif __cplusplus > 201103L
+# define ARROW_DEPRECATED(...) [[deprecated(__VA_ARGS__)]]
+# define ARROW_DEPRECATED_USING(...) ARROW_DEPRECATED(__VA_ARGS__)
+#else
+# ifdef __GNUC__
+# define ARROW_DEPRECATED(...) __attribute__((deprecated(__VA_ARGS__)))
+# define ARROW_DEPRECATED_USING(...) ARROW_DEPRECATED(__VA_ARGS__)
+# elif defined(_MSC_VER)
+# define ARROW_DEPRECATED(...) __declspec(deprecated(__VA_ARGS__))
+# define ARROW_DEPRECATED_USING(...)
+# else
+# define ARROW_DEPRECATED(...)
+# define ARROW_DEPRECATED_USING(...)
+# endif
+#endif
+// clang-format on
+
+// ----------------------------------------------------------------------
+
+// macros to disable padding
+// these macros are portable across different compilers and platforms
+//[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
+#if !defined(MANUALLY_ALIGNED_STRUCT)
+#if defined(_MSC_VER)
+#define MANUALLY_ALIGNED_STRUCT(alignment) \
+ __pragma(pack(1)); \
+ struct __declspec(align(alignment))
+#define STRUCT_END(name, size) \
+ __pragma(pack()); \
+ static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#elif defined(__GNUC__) || defined(__clang__)
+#define MANUALLY_ALIGNED_STRUCT(alignment) \
+ _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
+#define STRUCT_END(name, size) \
+ _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#else
+#error Unknown compiler, please define structure alignment macros
+#endif
+#endif // !defined(MANUALLY_ALIGNED_STRUCT)
+
+// ----------------------------------------------------------------------
+// Convenience macro disabling a particular UBSan check in a function
+
+#if defined(__clang__)
+#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#else
+#define ARROW_DISABLE_UBSAN(feature)
+#endif
+
+// ----------------------------------------------------------------------
+// Machine information
+
+#if INTPTR_MAX == INT64_MAX
+#define ARROW_BITNESS 64
+#elif INTPTR_MAX == INT32_MAX
+#define ARROW_BITNESS 32
+#else
+#error Unexpected INTPTR_MAX
+#endif
+
+// ----------------------------------------------------------------------
+// From googletest
+// (also in parquet-cpp)
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class. For example:
+//
+// class MyClass {
+// private:
+// void MyMethod();
+// FRIEND_TEST(MyClassTest, MyMethod);
+// };
+//
+// class MyClassTest : public testing::Test {
+// // ...
+// };
+//
+// TEST_F(MyClassTest, MyMethod) {
+// // Can call MyClass::MyMethod() here.
+// }
+
+#define FRIEND_TEST(test_case_name, test_name) \
+ friend class test_case_name##_##test_name##_Test
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/make_unique.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/make_unique.h
index 850e20409b9..5fbed2df121 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/make_unique.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/make_unique.h
@@ -1,42 +1,42 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-namespace arrow {
-namespace internal {
-
-template <typename T, typename... A>
-typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type make_unique(
- A&&... args) {
- return std::unique_ptr<T>(new T(std::forward<A>(args)...));
-}
-
-template <typename T>
-typename std::enable_if<std::is_array<T>::value && std::extent<T>::value == 0,
- std::unique_ptr<T>>::type
-make_unique(std::size_t n) {
- using value_type = typename std::remove_extent<T>::type;
- return std::unique_ptr<value_type[]>(new value_type[n]);
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace arrow {
+namespace internal {
+
+template <typename T, typename... A>
+typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type make_unique(
+ A&&... args) {
+ return std::unique_ptr<T>(new T(std::forward<A>(args)...));
+}
+
+template <typename T>
+typename std::enable_if<std::is_array<T>::value && std::extent<T>::value == 0,
+ std::unique_ptr<T>>::type
+make_unique(std::size_t n) {
+ using value_type = typename std::remove_extent<T>::type;
+ return std::unique_ptr<value_type[]>(new value_type[n]);
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.cc
index e91009d5860..2f23eca294a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.cc
@@ -1,74 +1,74 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <vector>
-
-#include "arrow/util/logging.h"
-#include "arrow/util/memory.h"
-#include "arrow/util/thread_pool.h"
-
-namespace arrow {
-namespace internal {
-
-inline uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) {
- uintptr_t value = reinterpret_cast<uintptr_t>(address);
- return reinterpret_cast<uint8_t*>(value & bits);
-}
-
-// This function is just for avoiding MinGW-w64 32bit crash.
-// See also: https://sourceforge.net/p/mingw-w64/bugs/767/
-void* wrap_memcpy(void* dst, const void* src, size_t n) { return memcpy(dst, src, n); }
-
-void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
- uintptr_t block_size, int num_threads) {
- // XXX This function is really using `num_threads + 1` threads.
- auto pool = GetCpuThreadPool();
-
- uint8_t* left = pointer_logical_and(src + block_size - 1, ~(block_size - 1));
- uint8_t* right = pointer_logical_and(src + nbytes, ~(block_size - 1));
- int64_t num_blocks = (right - left) / block_size;
-
- // Update right address
- right = right - (num_blocks % num_threads) * block_size;
-
- // Now we divide these blocks between available threads. The remainder is
- // handled separately.
- size_t chunk_size = (right - left) / num_threads;
- int64_t prefix = left - src;
- int64_t suffix = src + nbytes - right;
- // Now the data layout is | prefix | k * num_threads * block_size | suffix |.
- // We have chunk_size = k * block_size, therefore the data layout is
- // | prefix | num_threads * chunk_size | suffix |.
- // Each thread gets a "chunk" of k blocks.
-
- // Start all parallel memcpy tasks and handle leftovers while threads run.
- std::vector<Future<void*>> futures;
-
- for (int i = 0; i < num_threads; i++) {
- futures.push_back(*pool->Submit(wrap_memcpy, dst + prefix + i * chunk_size,
- left + i * chunk_size, chunk_size));
- }
- memcpy(dst, src, prefix);
- memcpy(dst + prefix + num_threads * chunk_size, right, suffix);
-
- for (auto& fut : futures) {
- ARROW_CHECK_OK(fut.status());
- }
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vector>
+
+#include "arrow/util/logging.h"
+#include "arrow/util/memory.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+namespace internal {
+
+inline uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) {
+ uintptr_t value = reinterpret_cast<uintptr_t>(address);
+ return reinterpret_cast<uint8_t*>(value & bits);
+}
+
+// This function is just for avoiding MinGW-w64 32bit crash.
+// See also: https://sourceforge.net/p/mingw-w64/bugs/767/
+void* wrap_memcpy(void* dst, const void* src, size_t n) { return memcpy(dst, src, n); }
+
+void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
+ uintptr_t block_size, int num_threads) {
+ // XXX This function is really using `num_threads + 1` threads.
+ auto pool = GetCpuThreadPool();
+
+ uint8_t* left = pointer_logical_and(src + block_size - 1, ~(block_size - 1));
+ uint8_t* right = pointer_logical_and(src + nbytes, ~(block_size - 1));
+ int64_t num_blocks = (right - left) / block_size;
+
+ // Update right address
+ right = right - (num_blocks % num_threads) * block_size;
+
+ // Now we divide these blocks between available threads. The remainder is
+ // handled separately.
+ size_t chunk_size = (right - left) / num_threads;
+ int64_t prefix = left - src;
+ int64_t suffix = src + nbytes - right;
+ // Now the data layout is | prefix | k * num_threads * block_size | suffix |.
+ // We have chunk_size = k * block_size, therefore the data layout is
+ // | prefix | num_threads * chunk_size | suffix |.
+ // Each thread gets a "chunk" of k blocks.
+
+ // Start all parallel memcpy tasks and handle leftovers while threads run.
+ std::vector<Future<void*>> futures;
+
+ for (int i = 0; i < num_threads; i++) {
+ futures.push_back(*pool->Submit(wrap_memcpy, dst + prefix + i * chunk_size,
+ left + i * chunk_size, chunk_size));
+ }
+ memcpy(dst, src, prefix);
+ memcpy(dst + prefix + num_threads * chunk_size, right, suffix);
+
+ for (auto& fut : futures) {
+ ARROW_CHECK_OK(fut.status());
+ }
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.h
index 4250d0694b7..fe6f959288c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/memory.h
@@ -1,43 +1,43 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace internal {
-
-// A helper function for doing memcpy with multiple threads. This is required
-// to saturate the memory bandwidth of modern cpus.
-void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
- uintptr_t block_size, int num_threads);
-
-// A helper function for checking if two wrapped objects implementing `Equals`
-// are equal.
-template <typename T>
-bool SharedPtrEquals(const std::shared_ptr<T>& left, const std::shared_ptr<T>& right) {
- if (left == right) return true;
- if (left == NULLPTR || right == NULLPTR) return false;
- return left->Equals(*right);
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+// A helper function for doing memcpy with multiple threads. This is required
+// to saturate the memory bandwidth of modern cpus.
+void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
+ uintptr_t block_size, int num_threads);
+
+// A helper function for checking if two wrapped objects implementing `Equals`
+// are equal.
+template <typename T>
+bool SharedPtrEquals(const std::shared_ptr<T>& left, const std::shared_ptr<T>& right) {
+ if (left == right) return true;
+ if (left == NULLPTR || right == NULLPTR) return false;
+ return left->Equals(*right);
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.cc
index 7456d7889d8..fca7af13913 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.cc
@@ -1,54 +1,54 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/mutex.h"
-
-#include <mutex>
-
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace util {
-
-struct Mutex::Impl {
- std::mutex mutex_;
-};
-
-Mutex::Guard::Guard(Mutex* locked)
- : locked_(locked, [](Mutex* locked) {
- DCHECK(!locked->impl_->mutex_.try_lock());
- locked->impl_->mutex_.unlock();
- }) {}
-
-Mutex::Guard Mutex::TryLock() {
- DCHECK_NE(impl_, nullptr);
- if (impl_->mutex_.try_lock()) {
- return Guard{this};
- }
- return Guard{};
-}
-
-Mutex::Guard Mutex::Lock() {
- DCHECK_NE(impl_, nullptr);
- impl_->mutex_.lock();
- return Guard{this};
-}
-
-Mutex::Mutex() : impl_(new Impl, [](Impl* impl) { delete impl; }) {}
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/mutex.h"
+
+#include <mutex>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace util {
+
+struct Mutex::Impl {
+ std::mutex mutex_;
+};
+
+Mutex::Guard::Guard(Mutex* locked)
+ : locked_(locked, [](Mutex* locked) {
+ DCHECK(!locked->impl_->mutex_.try_lock());
+ locked->impl_->mutex_.unlock();
+ }) {}
+
+Mutex::Guard Mutex::TryLock() {
+ DCHECK_NE(impl_, nullptr);
+ if (impl_->mutex_.try_lock()) {
+ return Guard{this};
+ }
+ return Guard{};
+}
+
+Mutex::Guard Mutex::Lock() {
+ DCHECK_NE(impl_, nullptr);
+ impl_->mutex_.lock();
+ return Guard{this};
+}
+
+Mutex::Mutex() : impl_(new Impl, [](Impl* impl) { delete impl; }) {}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
index 6c80be380ae..ccc2e149811 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
@@ -1,64 +1,64 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
-/// A wrapper around std::mutex since we can't use it directly in
-/// public headers due to C++/CLI.
-/// https://docs.microsoft.com/en-us/cpp/standard-library/mutex#remarks
-class ARROW_EXPORT Mutex {
- public:
- Mutex();
- Mutex(Mutex&&) = default;
- Mutex& operator=(Mutex&&) = default;
-
- /// A Guard is falsy if a lock could not be acquired.
- class ARROW_EXPORT Guard {
- public:
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+/// A wrapper around std::mutex since we can't use it directly in
+/// public headers due to C++/CLI.
+/// https://docs.microsoft.com/en-us/cpp/standard-library/mutex#remarks
+class ARROW_EXPORT Mutex {
+ public:
+ Mutex();
+ Mutex(Mutex&&) = default;
+ Mutex& operator=(Mutex&&) = default;
+
+ /// A Guard is falsy if a lock could not be acquired.
+ class ARROW_EXPORT Guard {
+ public:
Guard() : locked_(NULLPTR, [](Mutex* /* mutex */) {}) {}
- Guard(Guard&&) = default;
- Guard& operator=(Guard&&) = default;
-
- explicit operator bool() const { return bool(locked_); }
-
- void Unlock() { locked_.reset(); }
-
- private:
- explicit Guard(Mutex* locked);
-
- std::unique_ptr<Mutex, void (*)(Mutex*)> locked_;
- friend Mutex;
- };
-
- Guard TryLock();
- Guard Lock();
-
- private:
- struct Impl;
- std::unique_ptr<Impl, void (*)(Impl*)> impl_;
-};
-
-} // namespace util
-} // namespace arrow
+ Guard(Guard&&) = default;
+ Guard& operator=(Guard&&) = default;
+
+ explicit operator bool() const { return bool(locked_); }
+
+ void Unlock() { locked_.reset(); }
+
+ private:
+ explicit Guard(Mutex* locked);
+
+ std::unique_ptr<Mutex, void (*)(Mutex*)> locked_;
+ friend Mutex;
+ };
+
+ Guard TryLock();
+ Guard Lock();
+
+ private:
+ struct Impl;
+ std::unique_ptr<Impl, void (*)(Impl*)> impl_;
+};
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/optional.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/optional.h
index b824b499bb8..a1625e5c7ff 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/optional.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/optional.h
@@ -1,33 +1,33 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <optional>
-
-namespace arrow {
-namespace util {
-
-template <typename T>
+
+namespace arrow {
+namespace util {
+
+template <typename T>
using optional = std::optional<T>;
-
+
using std::bad_optional_access;
using std::make_optional;
using std::nullopt;
-
-} // namespace util
-} // namespace arrow
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
index 80f60fbdb36..b0b870cee9e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
@@ -1,51 +1,51 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <utility>
-#include <vector>
-
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
#include "arrow/util/functional.h"
-#include "arrow/util/thread_pool.h"
+#include "arrow/util/thread_pool.h"
#include "arrow/util/vector.h"
-
-namespace arrow {
-namespace internal {
-
-// A parallelizer that takes a `Status(int)` function and calls it with
-// arguments between 0 and `num_tasks - 1`, on an arbitrary number of threads.
-
-template <class FUNCTION>
+
+namespace arrow {
+namespace internal {
+
+// A parallelizer that takes a `Status(int)` function and calls it with
+// arguments between 0 and `num_tasks - 1`, on an arbitrary number of threads.
+
+template <class FUNCTION>
Status ParallelFor(int num_tasks, FUNCTION&& func,
Executor* executor = internal::GetCpuThreadPool()) {
std::vector<Future<>> futures(num_tasks);
-
- for (int i = 0; i < num_tasks; ++i) {
+
+ for (int i = 0; i < num_tasks; ++i) {
ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i));
- }
- auto st = Status::OK();
- for (auto& fut : futures) {
- st &= fut.status();
- }
- return st;
-}
-
+ }
+ auto st = Status::OK();
+ for (auto& fut : futures) {
+ st &= fut.status();
+ }
+ return st;
+}
+
template <class FUNCTION, typename T,
typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
Future<std::vector<R>> ParallelForAsync(
@@ -61,23 +61,23 @@ Future<std::vector<R>> ParallelForAsync(
});
}
-// A parallelizer that takes a `Status(int)` function and calls it with
-// arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
-// depending on the input boolean.
-
-template <class FUNCTION>
+// A parallelizer that takes a `Status(int)` function and calls it with
+// arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION>
Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
Executor* executor = internal::GetCpuThreadPool()) {
- if (use_threads) {
+ if (use_threads) {
return ParallelFor(num_tasks, std::forward<FUNCTION>(func), executor);
- } else {
- for (int i = 0; i < num_tasks; ++i) {
- RETURN_NOT_OK(func(i));
- }
- return Status::OK();
- }
-}
-
+ } else {
+ for (int i = 0; i < num_tasks; ++i) {
+ RETURN_NOT_OK(func(i));
+ }
+ return Status::OK();
+ }
+}
+
// A parallelizer that takes a `Result<R>(int index, T item)` function and
// calls it with each item from the input array, in sequence or in parallel,
// depending on the input boolean.
@@ -98,5 +98,5 @@ Future<std::vector<R>> OptionalParallelForAsync(
}
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/range.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/range.h
index ea0fb0eeaab..94e62869a91 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/range.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/range.h
@@ -1,155 +1,155 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <numeric>
-#include <utility>
-#include <vector>
-
-namespace arrow {
-namespace internal {
-
-/// Create a vector containing the values from start up to stop
-template <typename T>
-std::vector<T> Iota(T start, T stop) {
- if (start > stop) {
- return {};
- }
- std::vector<T> result(static_cast<size_t>(stop - start));
- std::iota(result.begin(), result.end(), start);
- return result;
-}
-
-/// Create a vector containing the values from 0 up to length
-template <typename T>
-std::vector<T> Iota(T length) {
- return Iota(static_cast<T>(0), length);
-}
-
-/// Create a range from a callable which takes a single index parameter
-/// and returns the value of iterator on each call and a length.
-/// Only iterators obtained from the same range should be compared, the
-/// behaviour generally similar to other STL containers.
-template <typename Generator>
-class LazyRange {
- private:
- // callable which generates the values
- // has to be defined at the beginning of the class for type deduction
- const Generator gen_;
- // the length of the range
- int64_t length_;
-#ifdef _MSC_VER
- // workaround to VS2010 not supporting decltype properly
- // see https://stackoverflow.com/questions/21782846/decltype-for-class-member-function
- static Generator gen_static_;
-#endif
-
- public:
-#ifdef _MSC_VER
- using return_type = decltype(gen_static_(0));
-#else
- using return_type = decltype(gen_(0));
-#endif
-
- /// Construct a new range from a callable and length
- LazyRange(Generator gen, int64_t length) : gen_(gen), length_(length) {}
-
- // Class of the dependent iterator, created implicitly by begin and end
- class RangeIter {
- public:
- using difference_type = int64_t;
- using value_type = return_type;
- using reference = const value_type&;
- using pointer = const value_type*;
- using iterator_category = std::forward_iterator_tag;
-
-#ifdef _MSC_VER
- // msvc complains about unchecked iterators,
- // see https://stackoverflow.com/questions/21655496/error-c4996-checked-iterators
- using _Unchecked_type = typename LazyRange<Generator>::RangeIter;
-#endif
-
- RangeIter() = delete;
- RangeIter(const RangeIter& other) = default;
- RangeIter& operator=(const RangeIter& other) = default;
-
- RangeIter(const LazyRange<Generator>& range, int64_t index)
- : range_(&range), index_(index) {}
-
- const return_type operator*() const { return range_->gen_(index_); }
-
- RangeIter operator+(difference_type length) const {
- return RangeIter(*range_, index_ + length);
- }
-
- // pre-increment
- RangeIter& operator++() {
- ++index_;
- return *this;
- }
-
- // post-increment
- RangeIter operator++(int) {
- auto copy = RangeIter(*this);
- ++index_;
- return copy;
- }
-
- bool operator==(const typename LazyRange<Generator>::RangeIter& other) const {
- return this->index_ == other.index_ && this->range_ == other.range_;
- }
-
- bool operator!=(const typename LazyRange<Generator>::RangeIter& other) const {
- return this->index_ != other.index_ || this->range_ != other.range_;
- }
-
- int64_t operator-(const typename LazyRange<Generator>::RangeIter& other) const {
- return this->index_ - other.index_;
- }
-
- bool operator<(const typename LazyRange<Generator>::RangeIter& other) const {
- return this->index_ < other.index_;
- }
-
- private:
- // parent range reference
- const LazyRange* range_;
- // current index
- int64_t index_;
- };
-
- friend class RangeIter;
-
- // Create a new begin const iterator
- RangeIter begin() { return RangeIter(*this, 0); }
-
- // Create a new end const iterator
- RangeIter end() { return RangeIter(*this, length_); }
-};
-
-/// Helper function to create a lazy range from a callable (e.g. lambda) and length
-template <typename Generator>
-LazyRange<Generator> MakeLazyRange(Generator&& gen, int64_t length) {
- return LazyRange<Generator>(std::forward<Generator>(gen), length);
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+namespace arrow {
+namespace internal {
+
+/// Create a vector containing the values from start up to stop
+template <typename T>
+std::vector<T> Iota(T start, T stop) {
+ if (start > stop) {
+ return {};
+ }
+ std::vector<T> result(static_cast<size_t>(stop - start));
+ std::iota(result.begin(), result.end(), start);
+ return result;
+}
+
+/// Create a vector containing the values from 0 up to length
+template <typename T>
+std::vector<T> Iota(T length) {
+ return Iota(static_cast<T>(0), length);
+}
+
+/// Create a range from a callable which takes a single index parameter
+/// and returns the value of iterator on each call and a length.
+/// Only iterators obtained from the same range should be compared, the
+/// behaviour generally similar to other STL containers.
+template <typename Generator>
+class LazyRange {
+ private:
+ // callable which generates the values
+ // has to be defined at the beginning of the class for type deduction
+ const Generator gen_;
+ // the length of the range
+ int64_t length_;
+#ifdef _MSC_VER
+ // workaround to VS2010 not supporting decltype properly
+ // see https://stackoverflow.com/questions/21782846/decltype-for-class-member-function
+ static Generator gen_static_;
+#endif
+
+ public:
+#ifdef _MSC_VER
+ using return_type = decltype(gen_static_(0));
+#else
+ using return_type = decltype(gen_(0));
+#endif
+
+ /// Construct a new range from a callable and length
+ LazyRange(Generator gen, int64_t length) : gen_(gen), length_(length) {}
+
+ // Class of the dependent iterator, created implicitly by begin and end
+ class RangeIter {
+ public:
+ using difference_type = int64_t;
+ using value_type = return_type;
+ using reference = const value_type&;
+ using pointer = const value_type*;
+ using iterator_category = std::forward_iterator_tag;
+
+#ifdef _MSC_VER
+ // msvc complains about unchecked iterators,
+ // see https://stackoverflow.com/questions/21655496/error-c4996-checked-iterators
+ using _Unchecked_type = typename LazyRange<Generator>::RangeIter;
+#endif
+
+ RangeIter() = delete;
+ RangeIter(const RangeIter& other) = default;
+ RangeIter& operator=(const RangeIter& other) = default;
+
+ RangeIter(const LazyRange<Generator>& range, int64_t index)
+ : range_(&range), index_(index) {}
+
+ const return_type operator*() const { return range_->gen_(index_); }
+
+ RangeIter operator+(difference_type length) const {
+ return RangeIter(*range_, index_ + length);
+ }
+
+ // pre-increment
+ RangeIter& operator++() {
+ ++index_;
+ return *this;
+ }
+
+ // post-increment
+ RangeIter operator++(int) {
+ auto copy = RangeIter(*this);
+ ++index_;
+ return copy;
+ }
+
+ bool operator==(const typename LazyRange<Generator>::RangeIter& other) const {
+ return this->index_ == other.index_ && this->range_ == other.range_;
+ }
+
+ bool operator!=(const typename LazyRange<Generator>::RangeIter& other) const {
+ return this->index_ != other.index_ || this->range_ != other.range_;
+ }
+
+ int64_t operator-(const typename LazyRange<Generator>::RangeIter& other) const {
+ return this->index_ - other.index_;
+ }
+
+ bool operator<(const typename LazyRange<Generator>::RangeIter& other) const {
+ return this->index_ < other.index_;
+ }
+
+ private:
+ // parent range reference
+ const LazyRange* range_;
+ // current index
+ int64_t index_;
+ };
+
+ friend class RangeIter;
+
+ // Create a new begin const iterator
+ RangeIter begin() { return RangeIter(*this, 0); }
+
+ // Create a new end const iterator
+ RangeIter end() { return RangeIter(*this, length_); }
+};
+
+/// Helper function to create a lazy range from a callable (e.g. lambda) and length
+template <typename Generator>
+LazyRange<Generator> MakeLazyRange(Generator&& gen, int64_t length) {
+ return LazyRange<Generator>(std::forward<Generator>(gen), length);
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/simd.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/simd.h
index 259641dd456..3eb0cc2ca2a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/simd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/simd.h
@@ -1,50 +1,50 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#ifdef _MSC_VER
-// MSVC x86_64/arm64
-
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>
-#elif defined(_M_ARM64)
-#include <arm64_neon.h>
-#endif
-
-#else
-// gcc/clang (possibly others)
-
-#if defined(ARROW_HAVE_BMI2)
-#include <x86intrin.h>
-#endif
-
-#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
-#include <immintrin.h>
-#elif defined(ARROW_HAVE_SSE4_2)
-#include <nmmintrin.h>
-#endif
-
-#ifdef ARROW_HAVE_NEON
-#include <arm_neon.h>
-#endif
-
-#ifdef ARROW_HAVE_ARMV8_CRC
-#include <arm_acle.h>
-#endif
-
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef _MSC_VER
+// MSVC x86_64/arm64
+
+#if defined(_M_AMD64) || defined(_M_X64)
+#include <intrin.h>
+#elif defined(_M_ARM64)
+#include <arm64_neon.h>
+#endif
+
+#else
+// gcc/clang (possibly others)
+
+#if defined(ARROW_HAVE_BMI2)
+#include <x86intrin.h>
+#endif
+
+#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
+#include <immintrin.h>
+#elif defined(ARROW_HAVE_SSE4_2)
+#include <nmmintrin.h>
+#endif
+
+#ifdef ARROW_HAVE_NEON
+#include <arm_neon.h>
+#endif
+
+#ifdef ARROW_HAVE_ARMV8_CRC
+#include <arm_acle.h>
+#endif
+
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/sort.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/sort.h
index cdffe0b2317..ca7b751f22f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/sort.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/sort.h
@@ -1,78 +1,78 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <functional>
-#include <numeric>
-#include <utility>
-#include <vector>
-
-namespace arrow {
-namespace internal {
-
-template <typename T, typename Cmp = std::less<T>>
-std::vector<int64_t> ArgSort(const std::vector<T>& values, Cmp&& cmp = {}) {
- std::vector<int64_t> indices(values.size());
- std::iota(indices.begin(), indices.end(), 0);
- std::sort(indices.begin(), indices.end(),
- [&](int64_t i, int64_t j) -> bool { return cmp(values[i], values[j]); });
- return indices;
-}
-
-template <typename T>
-size_t Permute(const std::vector<int64_t>& indices, std::vector<T>* values) {
- if (indices.size() <= 1) {
- return indices.size();
- }
-
- // mask indicating which of values are in the correct location
- std::vector<bool> sorted(indices.size(), false);
-
- size_t cycle_count = 0;
-
- for (auto cycle_start = sorted.begin(); cycle_start != sorted.end();
- cycle_start = std::find(cycle_start, sorted.end(), false)) {
- ++cycle_count;
-
- // position in which an element belongs WRT sort
- auto sort_into = static_cast<int64_t>(cycle_start - sorted.begin());
-
- if (indices[sort_into] == sort_into) {
- // trivial cycle
- sorted[sort_into] = true;
- continue;
- }
-
- // resolve this cycle
- const auto end = sort_into;
- for (int64_t take_from = indices[sort_into]; take_from != end;
- take_from = indices[sort_into]) {
- std::swap(values->at(sort_into), values->at(take_from));
- sorted[sort_into] = true;
- sort_into = take_from;
- }
- sorted[sort_into] = true;
- }
-
- return cycle_count;
-}
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+namespace arrow {
+namespace internal {
+
+template <typename T, typename Cmp = std::less<T>>
+std::vector<int64_t> ArgSort(const std::vector<T>& values, Cmp&& cmp = {}) {
+ std::vector<int64_t> indices(values.size());
+ std::iota(indices.begin(), indices.end(), 0);
+ std::sort(indices.begin(), indices.end(),
+ [&](int64_t i, int64_t j) -> bool { return cmp(values[i], values[j]); });
+ return indices;
+}
+
+template <typename T>
+size_t Permute(const std::vector<int64_t>& indices, std::vector<T>* values) {
+ if (indices.size() <= 1) {
+ return indices.size();
+ }
+
+ // mask indicating which of values are in the correct location
+ std::vector<bool> sorted(indices.size(), false);
+
+ size_t cycle_count = 0;
+
+ for (auto cycle_start = sorted.begin(); cycle_start != sorted.end();
+ cycle_start = std::find(cycle_start, sorted.end(), false)) {
+ ++cycle_count;
+
+ // position in which an element belongs WRT sort
+ auto sort_into = static_cast<int64_t>(cycle_start - sorted.begin());
+
+ if (indices[sort_into] == sort_into) {
+ // trivial cycle
+ sorted[sort_into] = true;
+ continue;
+ }
+
+ // resolve this cycle
+ const auto end = sort_into;
+ for (int64_t take_from = indices[sort_into]; take_from != end;
+ take_from = indices[sort_into]) {
+ std::swap(values->at(sort_into), values->at(take_from));
+ sorted[sort_into] = true;
+ sort_into = take_from;
+ }
+ sorted[sort_into] = true;
+ }
+
+ return cycle_count;
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
index d922311df1c..e1a673c1d7d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
@@ -1,97 +1,97 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/string.h"
-
-#include <algorithm>
-#include <cctype>
-#include <memory>
-
-#include "arrow/status.h"
-
-namespace arrow {
-
-static const char* kAsciiTable = "0123456789ABCDEF";
-
-std::string HexEncode(const uint8_t* data, size_t length) {
- std::string hex_string;
- hex_string.reserve(length * 2);
- for (size_t j = 0; j < length; ++j) {
- // Convert to 2 base16 digits
- hex_string.push_back(kAsciiTable[data[j] >> 4]);
- hex_string.push_back(kAsciiTable[data[j] & 15]);
- }
- return hex_string;
-}
-
-std::string Escape(const char* data, size_t length) {
- std::string escaped_string;
- escaped_string.reserve(length);
- for (size_t j = 0; j < length; ++j) {
- switch (data[j]) {
- case '"':
- escaped_string += R"(\")";
- break;
- case '\\':
- escaped_string += R"(\\)";
- break;
- case '\t':
- escaped_string += R"(\t)";
- break;
- case '\r':
- escaped_string += R"(\r)";
- break;
- case '\n':
- escaped_string += R"(\n)";
- break;
- default:
- escaped_string.push_back(data[j]);
- }
- }
- return escaped_string;
-}
-
-std::string HexEncode(const char* data, size_t length) {
- return HexEncode(reinterpret_cast<const uint8_t*>(data), length);
-}
-
-std::string HexEncode(util::string_view str) { return HexEncode(str.data(), str.size()); }
-
-std::string Escape(util::string_view str) { return Escape(str.data(), str.size()); }
-
-Status ParseHexValue(const char* data, uint8_t* out) {
- char c1 = data[0];
- char c2 = data[1];
-
- const char* kAsciiTableEnd = kAsciiTable + 16;
- const char* pos1 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c1);
- const char* pos2 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c2);
-
- // Error checking
- if (pos1 == kAsciiTableEnd || pos2 == kAsciiTableEnd || *pos1 != c1 || *pos2 != c2) {
- return Status::Invalid("Encountered non-hex digit");
- }
-
- *out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable));
- return Status::OK();
-}
-
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/string.h"
+
+#include <algorithm>
+#include <cctype>
+#include <memory>
+
+#include "arrow/status.h"
+
+namespace arrow {
+
+static const char* kAsciiTable = "0123456789ABCDEF";
+
+std::string HexEncode(const uint8_t* data, size_t length) {
+ std::string hex_string;
+ hex_string.reserve(length * 2);
+ for (size_t j = 0; j < length; ++j) {
+ // Convert to 2 base16 digits
+ hex_string.push_back(kAsciiTable[data[j] >> 4]);
+ hex_string.push_back(kAsciiTable[data[j] & 15]);
+ }
+ return hex_string;
+}
+
+std::string Escape(const char* data, size_t length) {
+ std::string escaped_string;
+ escaped_string.reserve(length);
+ for (size_t j = 0; j < length; ++j) {
+ switch (data[j]) {
+ case '"':
+ escaped_string += R"(\")";
+ break;
+ case '\\':
+ escaped_string += R"(\\)";
+ break;
+ case '\t':
+ escaped_string += R"(\t)";
+ break;
+ case '\r':
+ escaped_string += R"(\r)";
+ break;
+ case '\n':
+ escaped_string += R"(\n)";
+ break;
+ default:
+ escaped_string.push_back(data[j]);
+ }
+ }
+ return escaped_string;
+}
+
+std::string HexEncode(const char* data, size_t length) {
+ return HexEncode(reinterpret_cast<const uint8_t*>(data), length);
+}
+
+std::string HexEncode(util::string_view str) { return HexEncode(str.data(), str.size()); }
+
+std::string Escape(util::string_view str) { return Escape(str.data(), str.size()); }
+
+Status ParseHexValue(const char* data, uint8_t* out) {
+ char c1 = data[0];
+ char c2 = data[1];
+
+ const char* kAsciiTableEnd = kAsciiTable + 16;
+ const char* pos1 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c1);
+ const char* pos2 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c2);
+
+ // Error checking
+ if (pos1 == kAsciiTableEnd || pos2 == kAsciiTableEnd || *pos1 != c1 || *pos2 != c2) {
+ return Status::Invalid("Encountered non-hex digit");
+ }
+
+ *out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable));
+ return Status::OK();
+}
+
+namespace internal {
+
std::vector<util::string_view> SplitString(util::string_view v, char delimiter) {
std::vector<util::string_view> parts;
size_t start = 0, end;
@@ -109,22 +109,22 @@ std::vector<util::string_view> SplitString(util::string_view v, char delimiter)
template <typename StringLike>
static std::string JoinStringLikes(const std::vector<StringLike>& strings,
util::string_view delimiter) {
- if (strings.size() == 0) {
- return "";
- }
- std::string out = std::string(strings.front());
- for (size_t i = 1; i < strings.size(); ++i) {
- out.append(delimiter.begin(), delimiter.end());
- out.append(strings[i].begin(), strings[i].end());
- }
- return out;
-}
-
+ if (strings.size() == 0) {
+ return "";
+ }
+ std::string out = std::string(strings.front());
+ for (size_t i = 1; i < strings.size(); ++i) {
+ out.append(delimiter.begin(), delimiter.end());
+ out.append(strings[i].begin(), strings[i].end());
+ }
+ return out;
+}
+
std::string JoinStrings(const std::vector<util::string_view>& strings,
util::string_view delimiter) {
return JoinStringLikes(strings, delimiter);
}
-
+
std::string JoinStrings(const std::vector<std::string>& strings,
util::string_view delimiter) {
return JoinStringLikes(strings, delimiter);
@@ -132,60 +132,60 @@ std::string JoinStrings(const std::vector<std::string>& strings,
static constexpr bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
-std::string TrimString(std::string value) {
- size_t ltrim_chars = 0;
- while (ltrim_chars < value.size() && IsWhitespace(value[ltrim_chars])) {
- ++ltrim_chars;
- }
- value.erase(0, ltrim_chars);
- size_t rtrim_chars = 0;
- while (rtrim_chars < value.size() &&
- IsWhitespace(value[value.size() - 1 - rtrim_chars])) {
- ++rtrim_chars;
- }
- value.erase(value.size() - rtrim_chars, rtrim_chars);
- return value;
-}
-
-bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right) {
- // TODO: ASCII validation
- if (left.size() != right.size()) {
- return false;
- }
- for (size_t i = 0; i < left.size(); ++i) {
- if (std::tolower(static_cast<unsigned char>(left[i])) !=
- std::tolower(static_cast<unsigned char>(right[i]))) {
- return false;
- }
- }
- return true;
-}
-
-std::string AsciiToLower(util::string_view value) {
- // TODO: ASCII validation
- std::string result = std::string(value);
- std::transform(result.begin(), result.end(), result.begin(),
- [](unsigned char c) { return std::tolower(c); });
- return result;
-}
-
-std::string AsciiToUpper(util::string_view value) {
- // TODO: ASCII validation
- std::string result = std::string(value);
- std::transform(result.begin(), result.end(), result.begin(),
- [](unsigned char c) { return std::toupper(c); });
- return result;
-}
-
-util::optional<std::string> Replace(util::string_view s, util::string_view token,
- util::string_view replacement) {
- size_t token_start = s.find(token);
- if (token_start == std::string::npos) {
- return util::nullopt;
- }
- return s.substr(0, token_start).to_string() + replacement.to_string() +
- s.substr(token_start + token.size()).to_string();
-}
-
-} // namespace internal
-} // namespace arrow
+std::string TrimString(std::string value) {
+ size_t ltrim_chars = 0;
+ while (ltrim_chars < value.size() && IsWhitespace(value[ltrim_chars])) {
+ ++ltrim_chars;
+ }
+ value.erase(0, ltrim_chars);
+ size_t rtrim_chars = 0;
+ while (rtrim_chars < value.size() &&
+ IsWhitespace(value[value.size() - 1 - rtrim_chars])) {
+ ++rtrim_chars;
+ }
+ value.erase(value.size() - rtrim_chars, rtrim_chars);
+ return value;
+}
+
+bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right) {
+ // TODO: ASCII validation
+ if (left.size() != right.size()) {
+ return false;
+ }
+ for (size_t i = 0; i < left.size(); ++i) {
+ if (std::tolower(static_cast<unsigned char>(left[i])) !=
+ std::tolower(static_cast<unsigned char>(right[i]))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+std::string AsciiToLower(util::string_view value) {
+ // TODO: ASCII validation
+ std::string result = std::string(value);
+ std::transform(result.begin(), result.end(), result.begin(),
+ [](unsigned char c) { return std::tolower(c); });
+ return result;
+}
+
+std::string AsciiToUpper(util::string_view value) {
+ // TODO: ASCII validation
+ std::string result = std::string(value);
+ std::transform(result.begin(), result.end(), result.begin(),
+ [](unsigned char c) { return std::toupper(c); });
+ return result;
+}
+
+util::optional<std::string> Replace(util::string_view s, util::string_view token,
+ util::string_view replacement) {
+ size_t token_start = s.find(token);
+ if (token_start == std::string::npos) {
+ return util::nullopt;
+ }
+ return s.substr(0, token_start).to_string() + replacement.to_string() +
+ s.substr(token_start + token.size()).to_string();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
index 68b8a54e313..2d43d5bb62f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
@@ -1,79 +1,79 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "arrow/util/optional.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class Status;
-
-ARROW_EXPORT std::string HexEncode(const uint8_t* data, size_t length);
-
-ARROW_EXPORT std::string Escape(const char* data, size_t length);
-
-ARROW_EXPORT std::string HexEncode(const char* data, size_t length);
-
-ARROW_EXPORT std::string HexEncode(util::string_view str);
-
-ARROW_EXPORT std::string Escape(util::string_view str);
-
-ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out);
-
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Status;
+
+ARROW_EXPORT std::string HexEncode(const uint8_t* data, size_t length);
+
+ARROW_EXPORT std::string Escape(const char* data, size_t length);
+
+ARROW_EXPORT std::string HexEncode(const char* data, size_t length);
+
+ARROW_EXPORT std::string HexEncode(util::string_view str);
+
+ARROW_EXPORT std::string Escape(util::string_view str);
+
+ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out);
+
+namespace internal {
+
/// \brief Split a string with a delimiter
ARROW_EXPORT
std::vector<util::string_view> SplitString(util::string_view v, char delim);
-/// \brief Join strings with a delimiter
-ARROW_EXPORT
-std::string JoinStrings(const std::vector<util::string_view>& strings,
- util::string_view delimiter);
-
+/// \brief Join strings with a delimiter
+ARROW_EXPORT
+std::string JoinStrings(const std::vector<util::string_view>& strings,
+ util::string_view delimiter);
+
/// \brief Join strings with a delimiter
ARROW_EXPORT
std::string JoinStrings(const std::vector<std::string>& strings,
util::string_view delimiter);
-/// \brief Trim whitespace from left and right sides of string
-ARROW_EXPORT
-std::string TrimString(std::string value);
-
-ARROW_EXPORT
-bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right);
-
-ARROW_EXPORT
-std::string AsciiToLower(util::string_view value);
-
-ARROW_EXPORT
-std::string AsciiToUpper(util::string_view value);
-
-/// \brief Search for the first instance of a token and replace it or return nullopt if
-/// the token is not found.
-ARROW_EXPORT
-util::optional<std::string> Replace(util::string_view s, util::string_view token,
- util::string_view replacement);
-
-} // namespace internal
-} // namespace arrow
+/// \brief Trim whitespace from left and right sides of string
+ARROW_EXPORT
+std::string TrimString(std::string value);
+
+ARROW_EXPORT
+bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right);
+
+ARROW_EXPORT
+std::string AsciiToLower(util::string_view value);
+
+ARROW_EXPORT
+std::string AsciiToUpper(util::string_view value);
+
+/// \brief Search for the first instance of a token and replace it or return nullopt if
+/// the token is not found.
+ARROW_EXPORT
+util::optional<std::string> Replace(util::string_view s, util::string_view token,
+ util::string_view replacement);
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.cc
index 625ae007534..baea9803bbb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.cc
@@ -1,40 +1,40 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/string_builder.h"
-
-#include <sstream>
-
-#include "arrow/util/make_unique.h"
-
-namespace arrow {
-
-using internal::make_unique;
-
-namespace util {
-namespace detail {
-
-StringStreamWrapper::StringStreamWrapper()
- : sstream_(make_unique<std::ostringstream>()), ostream_(*sstream_) {}
-
-StringStreamWrapper::~StringStreamWrapper() {}
-
-std::string StringStreamWrapper::str() { return sstream_->str(); }
-
-} // namespace detail
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/string_builder.h"
+
+#include <sstream>
+
+#include "arrow/util/make_unique.h"
+
+namespace arrow {
+
+using internal::make_unique;
+
+namespace util {
+namespace detail {
+
+StringStreamWrapper::StringStreamWrapper()
+ : sstream_(make_unique<std::ostringstream>()), ostream_(*sstream_) {}
+
+StringStreamWrapper::~StringStreamWrapper() {}
+
+std::string StringStreamWrapper::str() { return sstream_->str(); }
+
+} // namespace detail
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.h
index 7c05ccd51f7..43fb63efa19 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_builder.h
@@ -1,84 +1,84 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License. template <typename T>
-
-#pragma once
-
-#include <memory>
-#include <ostream>
-#include <string>
-#include <utility>
-
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
-namespace detail {
-
-class ARROW_EXPORT StringStreamWrapper {
- public:
- StringStreamWrapper();
- ~StringStreamWrapper();
-
- std::ostream& stream() { return ostream_; }
- std::string str();
-
- protected:
- std::unique_ptr<std::ostringstream> sstream_;
- std::ostream& ostream_;
-};
-
-} // namespace detail
-
-template <typename Head>
-void StringBuilderRecursive(std::ostream& stream, Head&& head) {
- stream << head;
-}
-
-template <typename Head, typename... Tail>
-void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) {
- StringBuilderRecursive(stream, std::forward<Head>(head));
- StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
-}
-
-template <typename... Args>
-std::string StringBuilder(Args&&... args) {
- detail::StringStreamWrapper ss;
- StringBuilderRecursive(ss.stream(), std::forward<Args>(args)...);
- return ss.str();
-}
-
-/// CRTP helper for declaring string representation. Defines operator<<
-template <typename T>
-class ToStringOstreamable {
- public:
- ~ToStringOstreamable() {
- static_assert(
- std::is_same<decltype(std::declval<const T>().ToString()), std::string>::value,
- "ToStringOstreamable depends on the method T::ToString() const");
- }
-
- private:
- const T& cast() const { return static_cast<const T&>(*this); }
-
- friend inline std::ostream& operator<<(std::ostream& os, const ToStringOstreamable& t) {
- return os << t.cast().ToString();
- }
-};
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License. template <typename T>
+
+#pragma once
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+namespace detail {
+
+class ARROW_EXPORT StringStreamWrapper {
+ public:
+ StringStreamWrapper();
+ ~StringStreamWrapper();
+
+ std::ostream& stream() { return ostream_; }
+ std::string str();
+
+ protected:
+ std::unique_ptr<std::ostringstream> sstream_;
+ std::ostream& ostream_;
+};
+
+} // namespace detail
+
+template <typename Head>
+void StringBuilderRecursive(std::ostream& stream, Head&& head) {
+ stream << head;
+}
+
+template <typename Head, typename... Tail>
+void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) {
+ StringBuilderRecursive(stream, std::forward<Head>(head));
+ StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::string StringBuilder(Args&&... args) {
+ detail::StringStreamWrapper ss;
+ StringBuilderRecursive(ss.stream(), std::forward<Args>(args)...);
+ return ss.str();
+}
+
+/// CRTP helper for declaring string representation. Defines operator<<
+template <typename T>
+class ToStringOstreamable {
+ public:
+ ~ToStringOstreamable() {
+ static_assert(
+ std::is_same<decltype(std::declval<const T>().ToString()), std::string>::value,
+ "ToStringOstreamable depends on the method T::ToString() const");
+ }
+
+ private:
+ const T& cast() const { return static_cast<const T&>(*this); }
+
+ friend inline std::ostream& operator<<(std::ostream& os, const ToStringOstreamable& t) {
+ return os << t.cast().ToString();
+ }
+};
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h
index 4a51c2ebd9e..d15d0b8b657 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h
@@ -1,38 +1,38 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD
-
-#include <cstdint>
-#include <string>
-
-#include "arrow/vendored/string_view.hpp" // IWYU pragma: export
-
-namespace arrow {
-namespace util {
-
-using nonstd::string_view;
-
-template <class Char, class Traits = std::char_traits<Char>>
-using basic_string_view = nonstd::basic_string_view<Char, Traits>;
-
-using bytes_view = basic_string_view<uint8_t>;
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD
+
+#include <cstdint>
+#include <string>
+
+#include "arrow/vendored/string_view.hpp" // IWYU pragma: export
+
+namespace arrow {
+namespace util {
+
+using nonstd::string_view;
+
+template <class Char, class Traits = std::char_traits<Char>>
+using basic_string_view = nonstd::basic_string_view<Char, Traits>;
+
+using bytes_view = basic_string_view<uint8_t>;
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
index 7e8ab64b703..0f0626651b3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
@@ -1,92 +1,92 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/task_group.h"
-
-#include <atomic>
-#include <condition_variable>
-#include <cstdint>
-#include <mutex>
-#include <utility>
-
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/thread_pool.h"
-
-namespace arrow {
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/task_group.h"
+
+#include <atomic>
+#include <condition_variable>
+#include <cstdint>
+#include <mutex>
+#include <utility>
+
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+namespace internal {
+
namespace {
-////////////////////////////////////////////////////////////////////////
-// Serial TaskGroup implementation
-
-class SerialTaskGroup : public TaskGroup {
- public:
+////////////////////////////////////////////////////////////////////////
+// Serial TaskGroup implementation
+
+class SerialTaskGroup : public TaskGroup {
+ public:
explicit SerialTaskGroup(StopToken stop_token) : stop_token_(std::move(stop_token)) {}
void AppendReal(FnOnce<Status()> task) override {
- DCHECK(!finished_);
+ DCHECK(!finished_);
if (stop_token_.IsStopRequested()) {
status_ &= stop_token_.Poll();
return;
}
- if (status_.ok()) {
+ if (status_.ok()) {
status_ &= std::move(task)();
- }
- }
-
- Status current_status() override { return status_; }
-
+ }
+ }
+
+ Status current_status() override { return status_; }
+
bool ok() const override { return status_.ok(); }
-
- Status Finish() override {
- if (!finished_) {
- finished_ = true;
- }
- return status_;
- }
-
+
+ Status Finish() override {
+ if (!finished_) {
+ finished_ = true;
+ }
+ return status_;
+ }
+
Future<> FinishAsync() override { return Future<>::MakeFinished(Finish()); }
- int parallelism() override { return 1; }
-
+ int parallelism() override { return 1; }
+
StopToken stop_token_;
- Status status_;
- bool finished_ = false;
-};
-
-////////////////////////////////////////////////////////////////////////
-// Threaded TaskGroup implementation
-
-class ThreadedTaskGroup : public TaskGroup {
- public:
+ Status status_;
+ bool finished_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////
+// Threaded TaskGroup implementation
+
+class ThreadedTaskGroup : public TaskGroup {
+ public:
ThreadedTaskGroup(Executor* executor, StopToken stop_token)
: executor_(executor),
stop_token_(std::move(stop_token)),
nremaining_(0),
ok_(true) {}
-
- ~ThreadedTaskGroup() override {
- // Make sure all pending tasks are finished, so that dangling references
- // to this don't persist.
- ARROW_UNUSED(Finish());
- }
-
+
+ ~ThreadedTaskGroup() override {
+ // Make sure all pending tasks are finished, so that dangling references
+ // to this don't persist.
+ ARROW_UNUSED(Finish());
+ }
+
void AppendReal(FnOnce<Status()> task) override {
DCHECK(!finished_);
if (stop_token_.IsStopRequested()) {
@@ -94,12 +94,12 @@ class ThreadedTaskGroup : public TaskGroup {
return;
}
- // The hot path is unlocked thanks to atomics
- // Only if an error occurs is the lock taken
- if (ok_.load(std::memory_order_acquire)) {
- nremaining_.fetch_add(1, std::memory_order_acquire);
-
- auto self = checked_pointer_cast<ThreadedTaskGroup>(shared_from_this());
+ // The hot path is unlocked thanks to atomics
+ // Only if an error occurs is the lock taken
+ if (ok_.load(std::memory_order_acquire)) {
+ nremaining_.fetch_add(1, std::memory_order_acquire);
+
+ auto self = checked_pointer_cast<ThreadedTaskGroup>(shared_from_this());
struct Callable {
void operator()() {
@@ -114,7 +114,7 @@ class ThreadedTaskGroup : public TaskGroup {
self_->UpdateStatus(std::move(st));
}
self_->OneTaskDone();
- }
+ }
std::shared_ptr<ThreadedTaskGroup> self_;
FnOnce<Status()> task_;
@@ -123,29 +123,29 @@ class ThreadedTaskGroup : public TaskGroup {
Status st =
executor_->Spawn(Callable{std::move(self), std::move(task), stop_token_});
- UpdateStatus(std::move(st));
- }
- }
-
- Status current_status() override {
- std::lock_guard<std::mutex> lock(mutex_);
- return status_;
- }
-
+ UpdateStatus(std::move(st));
+ }
+ }
+
+ Status current_status() override {
+ std::lock_guard<std::mutex> lock(mutex_);
+ return status_;
+ }
+
bool ok() const override { return ok_.load(); }
-
- Status Finish() override {
- std::unique_lock<std::mutex> lock(mutex_);
- if (!finished_) {
- cv_.wait(lock, [&]() { return nremaining_.load() == 0; });
- // Current tasks may start other tasks, so only set this when done
- finished_ = true;
- }
- return status_;
- }
-
+
+ Status Finish() override {
+ std::unique_lock<std::mutex> lock(mutex_);
+ if (!finished_) {
+ cv_.wait(lock, [&]() { return nremaining_.load() == 0; });
+ // Current tasks may start other tasks, so only set this when done
+ finished_ = true;
+ }
+ return status_;
+ }
+
Future<> FinishAsync() override {
- std::lock_guard<std::mutex> lock(mutex_);
+ std::lock_guard<std::mutex> lock(mutex_);
if (!completion_future_.has_value()) {
if (nremaining_.load() == 0) {
completion_future_ = Future<>::MakeFinished(status_);
@@ -154,29 +154,29 @@ class ThreadedTaskGroup : public TaskGroup {
}
}
return *completion_future_;
- }
-
+ }
+
int parallelism() override { return executor_->GetCapacity(); }
- protected:
- void UpdateStatus(Status&& st) {
- // Must be called unlocked, only locks on error
- if (ARROW_PREDICT_FALSE(!st.ok())) {
- std::lock_guard<std::mutex> lock(mutex_);
- ok_.store(false, std::memory_order_release);
- status_ &= std::move(st);
- }
- }
-
- void OneTaskDone() {
- // Can be called unlocked thanks to atomics
- auto nremaining = nremaining_.fetch_sub(1, std::memory_order_release) - 1;
- DCHECK_GE(nremaining, 0);
- if (nremaining == 0) {
- // Take the lock so that ~ThreadedTaskGroup cannot destroy cv
- // before cv.notify_one() has returned
- std::unique_lock<std::mutex> lock(mutex_);
- cv_.notify_one();
+ protected:
+ void UpdateStatus(Status&& st) {
+ // Must be called unlocked, only locks on error
+ if (ARROW_PREDICT_FALSE(!st.ok())) {
+ std::lock_guard<std::mutex> lock(mutex_);
+ ok_.store(false, std::memory_order_release);
+ status_ &= std::move(st);
+ }
+ }
+
+ void OneTaskDone() {
+ // Can be called unlocked thanks to atomics
+ auto nremaining = nremaining_.fetch_sub(1, std::memory_order_release) - 1;
+ DCHECK_GE(nremaining, 0);
+ if (nremaining == 0) {
+ // Take the lock so that ~ThreadedTaskGroup cannot destroy cv
+ // before cv.notify_one() has returned
+ std::unique_lock<std::mutex> lock(mutex_);
+ cv_.notify_one();
if (completion_future_.has_value()) {
// MarkFinished could be slow. We don't want to call it while we are holding
// the lock.
@@ -192,33 +192,33 @@ class ThreadedTaskGroup : public TaskGroup {
lock.unlock();
}
}
- }
- }
-
- // These members are usable unlocked
- Executor* executor_;
+ }
+ }
+
+ // These members are usable unlocked
+ Executor* executor_;
StopToken stop_token_;
- std::atomic<int32_t> nremaining_;
- std::atomic<bool> ok_;
-
- // These members use locking
- std::mutex mutex_;
- std::condition_variable cv_;
- Status status_;
- bool finished_ = false;
+ std::atomic<int32_t> nremaining_;
+ std::atomic<bool> ok_;
+
+ // These members use locking
+ std::mutex mutex_;
+ std::condition_variable cv_;
+ Status status_;
+ bool finished_ = false;
util::optional<Future<>> completion_future_;
-};
-
+};
+
} // namespace
std::shared_ptr<TaskGroup> TaskGroup::MakeSerial(StopToken stop_token) {
return std::shared_ptr<TaskGroup>(new SerialTaskGroup{stop_token});
-}
-
+}
+
std::shared_ptr<TaskGroup> TaskGroup::MakeThreaded(Executor* thread_pool,
StopToken stop_token) {
return std::shared_ptr<TaskGroup>(new ThreadedTaskGroup{thread_pool, stop_token});
-}
-
-} // namespace internal
-} // namespace arrow
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
index 3bb72f0d9cb..5f0078becde 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
@@ -1,43 +1,43 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <utility>
-
-#include "arrow/status.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/cancel.h"
#include "arrow/util/functional.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-/// \brief A group of related tasks
-///
-/// A TaskGroup executes tasks with the signature `Status()`.
-/// Execution can be serial or parallel, depending on the TaskGroup
-/// implementation. When Finish() returns, it is guaranteed that all
-/// tasks have finished, or at least one has errored.
-///
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A group of related tasks
+///
+/// A TaskGroup executes tasks with the signature `Status()`.
+/// Execution can be serial or parallel, depending on the TaskGroup
+/// implementation. When Finish() returns, it is guaranteed that all
+/// tasks have finished, or at least one has errored.
+///
/// Once an error has occurred any tasks that are submitted to the task group
/// will not run. The call to Append will simply return without scheduling the
/// task.
@@ -50,21 +50,21 @@ namespace internal {
///
/// Once a task group has finished new tasks may not be added to it. If you need to start
/// a new batch of work then you should create a new task group.
-class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
- public:
- /// Add a Status-returning function to execute. Execution order is
- /// undefined. The function may be executed immediately or later.
- template <typename Function>
- void Append(Function&& func) {
- return AppendReal(std::forward<Function>(func));
- }
-
- /// Wait for execution of all tasks (and subgroups) to be finished,
- /// or for at least one task (or subgroup) to error out.
- /// The returned Status propagates the error status of the first failing
- /// task (or subgroup).
- virtual Status Finish() = 0;
-
+class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
+ public:
+ /// Add a Status-returning function to execute. Execution order is
+ /// undefined. The function may be executed immediately or later.
+ template <typename Function>
+ void Append(Function&& func) {
+ return AppendReal(std::forward<Function>(func));
+ }
+
+ /// Wait for execution of all tasks (and subgroups) to be finished,
+ /// or for at least one task (or subgroup) to error out.
+ /// The returned Status propagates the error status of the first failing
+ /// task (or subgroup).
+ virtual Status Finish() = 0;
+
/// Returns a future that will complete the first time all tasks are finished.
/// This should be called only after all top level tasks
/// have been added to the task group.
@@ -79,28 +79,28 @@ class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
/// pass.
virtual Future<> FinishAsync() = 0;
- /// The current aggregate error Status. Non-blocking, useful for stopping early.
- virtual Status current_status() = 0;
-
+ /// The current aggregate error Status. Non-blocking, useful for stopping early.
+ virtual Status current_status() = 0;
+
/// Whether some tasks have already failed. Non-blocking, useful for stopping early.
virtual bool ok() const = 0;
-
- /// How many tasks can typically be executed in parallel.
- /// This is only a hint, useful for testing or debugging.
- virtual int parallelism() = 0;
-
+
+ /// How many tasks can typically be executed in parallel.
+ /// This is only a hint, useful for testing or debugging.
+ virtual int parallelism() = 0;
+
static std::shared_ptr<TaskGroup> MakeSerial(StopToken = StopToken::Unstoppable());
static std::shared_ptr<TaskGroup> MakeThreaded(internal::Executor*,
StopToken = StopToken::Unstoppable());
-
- virtual ~TaskGroup() = default;
-
- protected:
- TaskGroup() = default;
- ARROW_DISALLOW_COPY_AND_ASSIGN(TaskGroup);
-
+
+ virtual ~TaskGroup() = default;
+
+ protected:
+ TaskGroup() = default;
+ ARROW_DISALLOW_COPY_AND_ASSIGN(TaskGroup);
+
virtual void AppendReal(FnOnce<Status()> task) = 0;
-};
-
-} // namespace internal
-} // namespace arrow
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
index 758295d01ed..ad0571e9908 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
@@ -1,39 +1,39 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/thread_pool.h"
-
-#include <algorithm>
-#include <condition_variable>
-#include <deque>
-#include <list>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "arrow/util/io_util.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace internal {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/thread_pool.h"
+
+#include <algorithm>
+#include <condition_variable>
+#include <deque>
+#include <list>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
Executor::~Executor() = default;
-
+
namespace {
struct Task {
@@ -112,66 +112,66 @@ void SerialExecutor::RunLoop() {
}
}
-struct ThreadPool::State {
+struct ThreadPool::State {
State() = default;
-
- // NOTE: in case locking becomes too expensive, we can investigate lock-free FIFOs
- // such as https://github.com/cameron314/concurrentqueue
-
- std::mutex mutex_;
- std::condition_variable cv_;
- std::condition_variable cv_shutdown_;
-
- std::list<std::thread> workers_;
- // Trashcan for finished threads
- std::vector<std::thread> finished_workers_;
+
+ // NOTE: in case locking becomes too expensive, we can investigate lock-free FIFOs
+ // such as https://github.com/cameron314/concurrentqueue
+
+ std::mutex mutex_;
+ std::condition_variable cv_;
+ std::condition_variable cv_shutdown_;
+
+ std::list<std::thread> workers_;
+ // Trashcan for finished threads
+ std::vector<std::thread> finished_workers_;
std::deque<Task> pending_tasks_;
-
- // Desired number of threads
+
+ // Desired number of threads
int desired_capacity_ = 0;
// Total number of tasks that are either queued or running
int tasks_queued_or_running_ = 0;
- // Are we shutting down?
+ // Are we shutting down?
bool please_shutdown_ = false;
bool quick_shutdown_ = false;
-};
-
-// The worker loop is an independent function so that it can keep running
-// after the ThreadPool is destroyed.
-static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
- std::list<std::thread>::iterator it) {
- std::unique_lock<std::mutex> lock(state->mutex_);
-
- // Since we hold the lock, `it` now points to the correct thread object
- // (LaunchWorkersUnlocked has exited)
- DCHECK_EQ(std::this_thread::get_id(), it->get_id());
-
- // If too many threads, we should secede from the pool
- const auto should_secede = [&]() -> bool {
- return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
- };
-
- while (true) {
- // By the time this thread is started, some tasks may have been pushed
- // or shutdown could even have been requested. So we only wait on the
- // condition variable at the end of the loop.
-
- // Execute pending tasks if any
- while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
- // We check this opportunistically at each loop iteration since
- // it releases the lock below.
- if (should_secede()) {
- break;
- }
+};
+
+// The worker loop is an independent function so that it can keep running
+// after the ThreadPool is destroyed.
+static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
+ std::list<std::thread>::iterator it) {
+ std::unique_lock<std::mutex> lock(state->mutex_);
+
+ // Since we hold the lock, `it` now points to the correct thread object
+ // (LaunchWorkersUnlocked has exited)
+ DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+
+ // If too many threads, we should secede from the pool
+ const auto should_secede = [&]() -> bool {
+ return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
+ };
+
+ while (true) {
+ // By the time this thread is started, some tasks may have been pushed
+ // or shutdown could even have been requested. So we only wait on the
+ // condition variable at the end of the loop.
+
+ // Execute pending tasks if any
+ while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
+ // We check this opportunistically at each loop iteration since
+ // it releases the lock below.
+ if (should_secede()) {
+ break;
+ }
DCHECK_GE(state->tasks_queued_or_running_, 0);
- {
+ {
Task task = std::move(state->pending_tasks_.front());
- state->pending_tasks_.pop_front();
+ state->pending_tasks_.pop_front();
StopToken* stop_token = &task.stop_token;
- lock.unlock();
+ lock.unlock();
if (!stop_token->IsStopRequested()) {
std::move(task.callable)();
} else {
@@ -181,87 +181,87 @@ static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
}
ARROW_UNUSED(std::move(task)); // release resources before waiting for lock
lock.lock();
- }
+ }
state->tasks_queued_or_running_--;
- }
- // Now either the queue is empty *or* a quick shutdown was requested
- if (state->please_shutdown_ || should_secede()) {
- break;
- }
- // Wait for next wakeup
- state->cv_.wait(lock);
- }
+ }
+ // Now either the queue is empty *or* a quick shutdown was requested
+ if (state->please_shutdown_ || should_secede()) {
+ break;
+ }
+ // Wait for next wakeup
+ state->cv_.wait(lock);
+ }
DCHECK_GE(state->tasks_queued_or_running_, 0);
-
- // We're done. Move our thread object to the trashcan of finished
- // workers. This has two motivations:
- // 1) the thread object doesn't get destroyed before this function finishes
- // (but we could call thread::detach() instead)
- // 2) we can explicitly join() the trashcan threads to make sure all OS threads
- // are exited before the ThreadPool is destroyed. Otherwise subtle
- // timing conditions can lead to false positives with Valgrind.
- DCHECK_EQ(std::this_thread::get_id(), it->get_id());
- state->finished_workers_.push_back(std::move(*it));
- state->workers_.erase(it);
- if (state->please_shutdown_) {
- // Notify the function waiting in Shutdown().
- state->cv_shutdown_.notify_one();
- }
-}
-
-ThreadPool::ThreadPool()
- : sp_state_(std::make_shared<ThreadPool::State>()),
- state_(sp_state_.get()),
- shutdown_on_destroy_(true) {
-#ifndef _WIN32
- pid_ = getpid();
-#endif
-}
-
-ThreadPool::~ThreadPool() {
- if (shutdown_on_destroy_) {
- ARROW_UNUSED(Shutdown(false /* wait */));
- }
-}
-
-void ThreadPool::ProtectAgainstFork() {
-#ifndef _WIN32
- pid_t current_pid = getpid();
- if (pid_ != current_pid) {
- // Reinitialize internal state in child process after fork()
- // Ideally we would use pthread_at_fork(), but that doesn't allow
- // storing an argument, hence we'd need to maintain a list of all
- // existing ThreadPools.
- int capacity = state_->desired_capacity_;
-
- auto new_state = std::make_shared<ThreadPool::State>();
- new_state->please_shutdown_ = state_->please_shutdown_;
- new_state->quick_shutdown_ = state_->quick_shutdown_;
-
- pid_ = current_pid;
- sp_state_ = new_state;
- state_ = sp_state_.get();
-
- // Launch worker threads anew
- if (!state_->please_shutdown_) {
- ARROW_UNUSED(SetCapacity(capacity));
- }
- }
-#endif
-}
-
-Status ThreadPool::SetCapacity(int threads) {
- ProtectAgainstFork();
- std::unique_lock<std::mutex> lock(state_->mutex_);
- if (state_->please_shutdown_) {
- return Status::Invalid("operation forbidden during or after shutdown");
- }
- if (threads <= 0) {
- return Status::Invalid("ThreadPool capacity must be > 0");
- }
- CollectFinishedWorkersUnlocked();
-
- state_->desired_capacity_ = threads;
+
+ // We're done. Move our thread object to the trashcan of finished
+ // workers. This has two motivations:
+ // 1) the thread object doesn't get destroyed before this function finishes
+ // (but we could call thread::detach() instead)
+ // 2) we can explicitly join() the trashcan threads to make sure all OS threads
+ // are exited before the ThreadPool is destroyed. Otherwise subtle
+ // timing conditions can lead to false positives with Valgrind.
+ DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+ state->finished_workers_.push_back(std::move(*it));
+ state->workers_.erase(it);
+ if (state->please_shutdown_) {
+ // Notify the function waiting in Shutdown().
+ state->cv_shutdown_.notify_one();
+ }
+}
+
+ThreadPool::ThreadPool()
+ : sp_state_(std::make_shared<ThreadPool::State>()),
+ state_(sp_state_.get()),
+ shutdown_on_destroy_(true) {
+#ifndef _WIN32
+ pid_ = getpid();
+#endif
+}
+
+ThreadPool::~ThreadPool() {
+ if (shutdown_on_destroy_) {
+ ARROW_UNUSED(Shutdown(false /* wait */));
+ }
+}
+
+void ThreadPool::ProtectAgainstFork() {
+#ifndef _WIN32
+ pid_t current_pid = getpid();
+ if (pid_ != current_pid) {
+ // Reinitialize internal state in child process after fork()
+ // Ideally we would use pthread_at_fork(), but that doesn't allow
+ // storing an argument, hence we'd need to maintain a list of all
+ // existing ThreadPools.
+ int capacity = state_->desired_capacity_;
+
+ auto new_state = std::make_shared<ThreadPool::State>();
+ new_state->please_shutdown_ = state_->please_shutdown_;
+ new_state->quick_shutdown_ = state_->quick_shutdown_;
+
+ pid_ = current_pid;
+ sp_state_ = new_state;
+ state_ = sp_state_.get();
+
+ // Launch worker threads anew
+ if (!state_->please_shutdown_) {
+ ARROW_UNUSED(SetCapacity(capacity));
+ }
+ }
+#endif
+}
+
+Status ThreadPool::SetCapacity(int threads) {
+ ProtectAgainstFork();
+ std::unique_lock<std::mutex> lock(state_->mutex_);
+ if (state_->please_shutdown_) {
+ return Status::Invalid("operation forbidden during or after shutdown");
+ }
+ if (threads <= 0) {
+ return Status::Invalid("ThreadPool capacity must be > 0");
+ }
+ CollectFinishedWorkersUnlocked();
+
+ state_->desired_capacity_ = threads;
// See if we need to increase or decrease the number of running threads
const int required = std::min(static_cast<int>(state_->pending_tasks_.size()),
threads - static_cast<int>(state_->workers_.size()));
@@ -270,83 +270,83 @@ Status ThreadPool::SetCapacity(int threads) {
LaunchWorkersUnlocked(required);
} else if (required < 0) {
// Excess threads are running, wake them so that they stop
- state_->cv_.notify_all();
- }
- return Status::OK();
-}
-
-int ThreadPool::GetCapacity() {
- ProtectAgainstFork();
- std::unique_lock<std::mutex> lock(state_->mutex_);
- return state_->desired_capacity_;
-}
-
+ state_->cv_.notify_all();
+ }
+ return Status::OK();
+}
+
+int ThreadPool::GetCapacity() {
+ ProtectAgainstFork();
+ std::unique_lock<std::mutex> lock(state_->mutex_);
+ return state_->desired_capacity_;
+}
+
int ThreadPool::GetNumTasks() {
ProtectAgainstFork();
std::unique_lock<std::mutex> lock(state_->mutex_);
return state_->tasks_queued_or_running_;
}
-int ThreadPool::GetActualCapacity() {
- ProtectAgainstFork();
- std::unique_lock<std::mutex> lock(state_->mutex_);
- return static_cast<int>(state_->workers_.size());
-}
-
-Status ThreadPool::Shutdown(bool wait) {
- ProtectAgainstFork();
- std::unique_lock<std::mutex> lock(state_->mutex_);
-
- if (state_->please_shutdown_) {
- return Status::Invalid("Shutdown() already called");
- }
- state_->please_shutdown_ = true;
- state_->quick_shutdown_ = !wait;
- state_->cv_.notify_all();
- state_->cv_shutdown_.wait(lock, [this] { return state_->workers_.empty(); });
- if (!state_->quick_shutdown_) {
- DCHECK_EQ(state_->pending_tasks_.size(), 0);
- } else {
- state_->pending_tasks_.clear();
- }
- CollectFinishedWorkersUnlocked();
- return Status::OK();
-}
-
-void ThreadPool::CollectFinishedWorkersUnlocked() {
- for (auto& thread : state_->finished_workers_) {
- // Make sure OS thread has exited
- thread.join();
- }
- state_->finished_workers_.clear();
-}
-
+int ThreadPool::GetActualCapacity() {
+ ProtectAgainstFork();
+ std::unique_lock<std::mutex> lock(state_->mutex_);
+ return static_cast<int>(state_->workers_.size());
+}
+
+Status ThreadPool::Shutdown(bool wait) {
+ ProtectAgainstFork();
+ std::unique_lock<std::mutex> lock(state_->mutex_);
+
+ if (state_->please_shutdown_) {
+ return Status::Invalid("Shutdown() already called");
+ }
+ state_->please_shutdown_ = true;
+ state_->quick_shutdown_ = !wait;
+ state_->cv_.notify_all();
+ state_->cv_shutdown_.wait(lock, [this] { return state_->workers_.empty(); });
+ if (!state_->quick_shutdown_) {
+ DCHECK_EQ(state_->pending_tasks_.size(), 0);
+ } else {
+ state_->pending_tasks_.clear();
+ }
+ CollectFinishedWorkersUnlocked();
+ return Status::OK();
+}
+
+void ThreadPool::CollectFinishedWorkersUnlocked() {
+ for (auto& thread : state_->finished_workers_) {
+ // Make sure OS thread has exited
+ thread.join();
+ }
+ state_->finished_workers_.clear();
+}
+
thread_local ThreadPool* current_thread_pool_ = nullptr;
bool ThreadPool::OwnsThisThread() { return current_thread_pool_ == this; }
-void ThreadPool::LaunchWorkersUnlocked(int threads) {
- std::shared_ptr<State> state = sp_state_;
-
- for (int i = 0; i < threads; i++) {
- state_->workers_.emplace_back();
- auto it = --(state_->workers_.end());
+void ThreadPool::LaunchWorkersUnlocked(int threads) {
+ std::shared_ptr<State> state = sp_state_;
+
+ for (int i = 0; i < threads; i++) {
+ state_->workers_.emplace_back();
+ auto it = --(state_->workers_.end());
*it = std::thread([this, state, it] {
current_thread_pool_ = this;
WorkerLoop(state, it);
});
- }
-}
-
+ }
+}
+
Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token,
StopCallback&& stop_callback) {
- {
- ProtectAgainstFork();
- std::lock_guard<std::mutex> lock(state_->mutex_);
- if (state_->please_shutdown_) {
- return Status::Invalid("operation forbidden during or after shutdown");
- }
- CollectFinishedWorkersUnlocked();
+ {
+ ProtectAgainstFork();
+ std::lock_guard<std::mutex> lock(state_->mutex_);
+ if (state_->please_shutdown_) {
+ return Status::Invalid("operation forbidden during or after shutdown");
+ }
+ CollectFinishedWorkersUnlocked();
state_->tasks_queued_or_running_++;
if (static_cast<int>(state_->workers_.size()) < state_->tasks_queued_or_running_ &&
state_->desired_capacity_ > static_cast<int>(state_->workers_.size())) {
@@ -355,88 +355,88 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken sto
}
state_->pending_tasks_.push_back(
{std::move(task), std::move(stop_token), std::move(stop_callback)});
- }
- state_->cv_.notify_one();
- return Status::OK();
-}
-
-Result<std::shared_ptr<ThreadPool>> ThreadPool::Make(int threads) {
- auto pool = std::shared_ptr<ThreadPool>(new ThreadPool());
- RETURN_NOT_OK(pool->SetCapacity(threads));
- return pool;
-}
-
-Result<std::shared_ptr<ThreadPool>> ThreadPool::MakeEternal(int threads) {
- ARROW_ASSIGN_OR_RAISE(auto pool, Make(threads));
- // On Windows, the ThreadPool destructor may be called after non-main threads
- // have been killed by the OS, and hang in a condition variable.
- // On Unix, we want to avoid leak reports by Valgrind.
-#ifdef _WIN32
- pool->shutdown_on_destroy_ = false;
-#endif
- return pool;
-}
-
-// ----------------------------------------------------------------------
-// Global thread pool
-
-static int ParseOMPEnvVar(const char* name) {
- // OMP_NUM_THREADS is a comma-separated list of positive integers.
- // We are only interested in the first (top-level) number.
- auto result = GetEnvVar(name);
- if (!result.ok()) {
- return 0;
- }
- auto str = *std::move(result);
- auto first_comma = str.find_first_of(',');
- if (first_comma != std::string::npos) {
- str = str.substr(0, first_comma);
- }
- try {
- return std::max(0, std::stoi(str));
- } catch (...) {
- return 0;
- }
-}
-
-int ThreadPool::DefaultCapacity() {
- int capacity, limit;
- capacity = ParseOMPEnvVar("OMP_NUM_THREADS");
- if (capacity == 0) {
- capacity = std::thread::hardware_concurrency();
- }
- limit = ParseOMPEnvVar("OMP_THREAD_LIMIT");
- if (limit > 0) {
- capacity = std::min(limit, capacity);
- }
- if (capacity == 0) {
- ARROW_LOG(WARNING) << "Failed to determine the number of available threads, "
- "using a hardcoded arbitrary value";
- capacity = 4;
- }
- return capacity;
-}
-
-// Helper for the singleton pattern
-std::shared_ptr<ThreadPool> ThreadPool::MakeCpuThreadPool() {
- auto maybe_pool = ThreadPool::MakeEternal(ThreadPool::DefaultCapacity());
- if (!maybe_pool.ok()) {
- maybe_pool.status().Abort("Failed to create global CPU thread pool");
- }
- return *std::move(maybe_pool);
-}
-
-ThreadPool* GetCpuThreadPool() {
- static std::shared_ptr<ThreadPool> singleton = ThreadPool::MakeCpuThreadPool();
- return singleton.get();
-}
-
-} // namespace internal
-
-int GetCpuThreadPoolCapacity() { return internal::GetCpuThreadPool()->GetCapacity(); }
-
-Status SetCpuThreadPoolCapacity(int threads) {
- return internal::GetCpuThreadPool()->SetCapacity(threads);
-}
-
-} // namespace arrow
+ }
+ state_->cv_.notify_one();
+ return Status::OK();
+}
+
+Result<std::shared_ptr<ThreadPool>> ThreadPool::Make(int threads) {
+ auto pool = std::shared_ptr<ThreadPool>(new ThreadPool());
+ RETURN_NOT_OK(pool->SetCapacity(threads));
+ return pool;
+}
+
+Result<std::shared_ptr<ThreadPool>> ThreadPool::MakeEternal(int threads) {
+ ARROW_ASSIGN_OR_RAISE(auto pool, Make(threads));
+ // On Windows, the ThreadPool destructor may be called after non-main threads
+ // have been killed by the OS, and hang in a condition variable.
+ // On Unix, we want to avoid leak reports by Valgrind.
+#ifdef _WIN32
+ pool->shutdown_on_destroy_ = false;
+#endif
+ return pool;
+}
+
+// ----------------------------------------------------------------------
+// Global thread pool
+
+static int ParseOMPEnvVar(const char* name) {
+ // OMP_NUM_THREADS is a comma-separated list of positive integers.
+ // We are only interested in the first (top-level) number.
+ auto result = GetEnvVar(name);
+ if (!result.ok()) {
+ return 0;
+ }
+ auto str = *std::move(result);
+ auto first_comma = str.find_first_of(',');
+ if (first_comma != std::string::npos) {
+ str = str.substr(0, first_comma);
+ }
+ try {
+ return std::max(0, std::stoi(str));
+ } catch (...) {
+ return 0;
+ }
+}
+
+int ThreadPool::DefaultCapacity() {
+ int capacity, limit;
+ capacity = ParseOMPEnvVar("OMP_NUM_THREADS");
+ if (capacity == 0) {
+ capacity = std::thread::hardware_concurrency();
+ }
+ limit = ParseOMPEnvVar("OMP_THREAD_LIMIT");
+ if (limit > 0) {
+ capacity = std::min(limit, capacity);
+ }
+ if (capacity == 0) {
+ ARROW_LOG(WARNING) << "Failed to determine the number of available threads, "
+ "using a hardcoded arbitrary value";
+ capacity = 4;
+ }
+ return capacity;
+}
+
+// Helper for the singleton pattern
+std::shared_ptr<ThreadPool> ThreadPool::MakeCpuThreadPool() {
+ auto maybe_pool = ThreadPool::MakeEternal(ThreadPool::DefaultCapacity());
+ if (!maybe_pool.ok()) {
+ maybe_pool.status().Abort("Failed to create global CPU thread pool");
+ }
+ return *std::move(maybe_pool);
+}
+
+ThreadPool* GetCpuThreadPool() {
+ static std::shared_ptr<ThreadPool> singleton = ThreadPool::MakeCpuThreadPool();
+ return singleton.get();
+}
+
+} // namespace internal
+
+int GetCpuThreadPoolCapacity() { return internal::GetCpuThreadPool()->GetCapacity(); }
+
+Status SetCpuThreadPoolCapacity(int threads) {
+ return internal::GetCpuThreadPool()->SetCapacity(threads);
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
index 9ac8e36a3d8..4c72979f5c4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
@@ -1,101 +1,101 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include <cstdint>
-#include <memory>
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <cstdint>
+#include <memory>
#include <queue>
-#include <type_traits>
-#include <utility>
-
-#include "arrow/result.h"
-#include "arrow/status.h"
+#include <type_traits>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
#include "arrow/util/cancel.h"
#include "arrow/util/functional.h"
-#include "arrow/util/future.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
-
-#if defined(_MSC_VER)
-// Disable harmless warning for decorated name length limit
-#pragma warning(disable : 4503)
-#endif
-
-namespace arrow {
-
-/// \brief Get the capacity of the global thread pool
-///
-/// Return the number of worker threads in the thread pool to which
-/// Arrow dispatches various CPU-bound tasks. This is an ideal number,
-/// not necessarily the exact number of threads at a given point in time.
-///
-/// You can change this number using SetCpuThreadPoolCapacity().
-ARROW_EXPORT int GetCpuThreadPoolCapacity();
-
-/// \brief Set the capacity of the global thread pool
-///
-/// Set the number of worker threads int the thread pool to which
-/// Arrow dispatches various CPU-bound tasks.
-///
-/// The current number is returned by GetCpuThreadPoolCapacity().
-ARROW_EXPORT Status SetCpuThreadPoolCapacity(int threads);
-
-namespace internal {
-
-// Hints about a task that may be used by an Executor.
-// They are ignored by the provided ThreadPool implementation.
-struct TaskHints {
- // The lower, the more urgent
- int32_t priority = 0;
- // The IO transfer size in bytes
- int64_t io_size = -1;
- // The approximate CPU cost in number of instructions
- int64_t cpu_cost = -1;
- // An application-specific ID
- int64_t external_id = -1;
-};
-
-class ARROW_EXPORT Executor {
- public:
+#include "arrow/util/future.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+#if defined(_MSC_VER)
+// Disable harmless warning for decorated name length limit
+#pragma warning(disable : 4503)
+#endif
+
+namespace arrow {
+
+/// \brief Get the capacity of the global thread pool
+///
+/// Return the number of worker threads in the thread pool to which
+/// Arrow dispatches various CPU-bound tasks. This is an ideal number,
+/// not necessarily the exact number of threads at a given point in time.
+///
+/// You can change this number using SetCpuThreadPoolCapacity().
+ARROW_EXPORT int GetCpuThreadPoolCapacity();
+
+/// \brief Set the capacity of the global thread pool
+///
+/// Set the number of worker threads int the thread pool to which
+/// Arrow dispatches various CPU-bound tasks.
+///
+/// The current number is returned by GetCpuThreadPoolCapacity().
+ARROW_EXPORT Status SetCpuThreadPoolCapacity(int threads);
+
+namespace internal {
+
+// Hints about a task that may be used by an Executor.
+// They are ignored by the provided ThreadPool implementation.
+struct TaskHints {
+ // The lower, the more urgent
+ int32_t priority = 0;
+ // The IO transfer size in bytes
+ int64_t io_size = -1;
+ // The approximate CPU cost in number of instructions
+ int64_t cpu_cost = -1;
+ // An application-specific ID
+ int64_t external_id = -1;
+};
+
+class ARROW_EXPORT Executor {
+ public:
using StopCallback = internal::FnOnce<void(const Status&)>;
- virtual ~Executor();
-
- // Spawn a fire-and-forget task.
- template <typename Function>
- Status Spawn(Function&& func) {
+ virtual ~Executor();
+
+ // Spawn a fire-and-forget task.
+ template <typename Function>
+ Status Spawn(Function&& func) {
return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(),
StopCallback{});
- }
- template <typename Function>
+ }
+ template <typename Function>
Status Spawn(Function&& func, StopToken stop_token) {
return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token),
StopCallback{});
}
template <typename Function>
- Status Spawn(TaskHints hints, Function&& func) {
+ Status Spawn(TaskHints hints, Function&& func) {
return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(),
StopCallback{});
- }
+ }
template <typename Function>
Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) {
return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
@@ -107,7 +107,7 @@ class ARROW_EXPORT Executor {
return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
std::move(stop_callback));
}
-
+
// Transfers a future to this executor. Any continuations added to the
// returned future will run in this executor. Otherwise they would run
// on the same thread that called MarkFinished.
@@ -136,22 +136,22 @@ class ARROW_EXPORT Executor {
return DoTransfer(std::move(future), true);
}
- // Submit a callable and arguments for execution. Return a future that
- // will return the callable's result value once.
- // The callable's arguments are copied before execution.
+ // Submit a callable and arguments for execution. Return a future that
+ // will return the callable's result value once.
+ // The callable's arguments are copied before execution.
template <typename Function, typename... Args,
typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
Function && (Args && ...)>>
Result<FutureType> Submit(TaskHints hints, StopToken stop_token, Function&& func,
Args&&... args) {
using ValueType = typename FutureType::ValueType;
-
+
auto future = FutureType::Make();
auto task = std::bind(::arrow::detail::ContinueFuture{}, future,
std::forward<Function>(func), std::forward<Args>(args)...);
struct {
WeakFuture<ValueType> weak_fut;
-
+
void operator()(const Status& st) {
auto fut = weak_fut.get();
if (fut.is_valid()) {
@@ -161,18 +161,18 @@ class ARROW_EXPORT Executor {
} stop_callback{WeakFuture<ValueType>(future)};
ARROW_RETURN_NOT_OK(SpawnReal(hints, std::move(task), std::move(stop_token),
std::move(stop_callback)));
-
- return future;
- }
-
+
+ return future;
+ }
+
template <typename Function, typename... Args,
typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
Function && (Args && ...)>>
Result<FutureType> Submit(StopToken stop_token, Function&& func, Args&&... args) {
return Submit(TaskHints{}, stop_token, std::forward<Function>(func),
std::forward<Args>(args)...);
- }
-
+ }
+
template <typename Function, typename... Args,
typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
Function && (Args && ...)>>
@@ -189,19 +189,19 @@ class ARROW_EXPORT Executor {
std::forward<Args>(args)...);
}
- // Return the level of parallelism (the number of tasks that may be executed
- // concurrently). This may be an approximate number.
- virtual int GetCapacity() = 0;
-
+ // Return the level of parallelism (the number of tasks that may be executed
+ // concurrently). This may be an approximate number.
+ virtual int GetCapacity() = 0;
+
// Return true if the thread from which this function is called is owned by this
// Executor. Returns false if this Executor does not support this property.
virtual bool OwnsThisThread() { return false; }
- protected:
- ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
-
- Executor() = default;
-
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
+
+ Executor() = default;
+
template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
Future<T> DoTransfer(Future<T> future, bool always_transfer = false) {
auto transferred = Future<T>::Make();
@@ -235,11 +235,11 @@ class ARROW_EXPORT Executor {
return future;
}
- // Subclassing API
+ // Subclassing API
virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
StopCallback&&) = 0;
-};
-
+};
+
/// \brief An executor implementation that runs all tasks on a single thread using an
/// event loop.
///
@@ -299,82 +299,82 @@ class ARROW_EXPORT SerialExecutor : public Executor {
/// Note: Any sort of nested parallelism will deadlock this executor. Blocking waits are
/// fine but if one task needs to wait for another task it must be expressed as an
/// asynchronous continuation.
-class ARROW_EXPORT ThreadPool : public Executor {
- public:
- // Construct a thread pool with the given number of worker threads
- static Result<std::shared_ptr<ThreadPool>> Make(int threads);
-
- // Like Make(), but takes care that the returned ThreadPool is compatible
- // with destruction late at process exit.
- static Result<std::shared_ptr<ThreadPool>> MakeEternal(int threads);
-
- // Destroy thread pool; the pool will first be shut down
+class ARROW_EXPORT ThreadPool : public Executor {
+ public:
+ // Construct a thread pool with the given number of worker threads
+ static Result<std::shared_ptr<ThreadPool>> Make(int threads);
+
+ // Like Make(), but takes care that the returned ThreadPool is compatible
+ // with destruction late at process exit.
+ static Result<std::shared_ptr<ThreadPool>> MakeEternal(int threads);
+
+ // Destroy thread pool; the pool will first be shut down
~ThreadPool() override;
-
- // Return the desired number of worker threads.
- // The actual number of workers may lag a bit before being adjusted to
- // match this value.
- int GetCapacity() override;
-
+
+ // Return the desired number of worker threads.
+ // The actual number of workers may lag a bit before being adjusted to
+ // match this value.
+ int GetCapacity() override;
+
bool OwnsThisThread() override;
// Return the number of tasks either running or in the queue.
int GetNumTasks();
- // Dynamically change the number of worker threads.
+ // Dynamically change the number of worker threads.
//
// This function always returns immediately.
// If fewer threads are running than this number, new threads are spawned
// on-demand when needed for task execution.
// If more threads are running than this number, excess threads are reaped
// as soon as possible.
- Status SetCapacity(int threads);
-
- // Heuristic for the default capacity of a thread pool for CPU-bound tasks.
- // This is exposed as a static method to help with testing.
- static int DefaultCapacity();
-
- // Shutdown the pool. Once the pool starts shutting down, new tasks
- // cannot be submitted anymore.
- // If "wait" is true, shutdown waits for all pending tasks to be finished.
- // If "wait" is false, workers are stopped as soon as currently executing
- // tasks are finished.
- Status Shutdown(bool wait = true);
-
- struct State;
-
- protected:
- FRIEND_TEST(TestThreadPool, SetCapacity);
- FRIEND_TEST(TestGlobalThreadPool, Capacity);
- friend ARROW_EXPORT ThreadPool* GetCpuThreadPool();
-
- ThreadPool();
-
+ Status SetCapacity(int threads);
+
+ // Heuristic for the default capacity of a thread pool for CPU-bound tasks.
+ // This is exposed as a static method to help with testing.
+ static int DefaultCapacity();
+
+ // Shutdown the pool. Once the pool starts shutting down, new tasks
+ // cannot be submitted anymore.
+ // If "wait" is true, shutdown waits for all pending tasks to be finished.
+ // If "wait" is false, workers are stopped as soon as currently executing
+ // tasks are finished.
+ Status Shutdown(bool wait = true);
+
+ struct State;
+
+ protected:
+ FRIEND_TEST(TestThreadPool, SetCapacity);
+ FRIEND_TEST(TestGlobalThreadPool, Capacity);
+ friend ARROW_EXPORT ThreadPool* GetCpuThreadPool();
+
+ ThreadPool();
+
Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
StopCallback&&) override;
-
- // Collect finished worker threads, making sure the OS threads have exited
- void CollectFinishedWorkersUnlocked();
- // Launch a given number of additional workers
- void LaunchWorkersUnlocked(int threads);
- // Get the current actual capacity
- int GetActualCapacity();
- // Reinitialize the thread pool if the pid changed
- void ProtectAgainstFork();
-
- static std::shared_ptr<ThreadPool> MakeCpuThreadPool();
-
- std::shared_ptr<State> sp_state_;
- State* state_;
- bool shutdown_on_destroy_;
-#ifndef _WIN32
- pid_t pid_;
-#endif
-};
-
-// Return the process-global thread pool for CPU-bound tasks.
-ARROW_EXPORT ThreadPool* GetCpuThreadPool();
-
+
+ // Collect finished worker threads, making sure the OS threads have exited
+ void CollectFinishedWorkersUnlocked();
+ // Launch a given number of additional workers
+ void LaunchWorkersUnlocked(int threads);
+ // Get the current actual capacity
+ int GetActualCapacity();
+ // Reinitialize the thread pool if the pid changed
+ void ProtectAgainstFork();
+
+ static std::shared_ptr<ThreadPool> MakeCpuThreadPool();
+
+ std::shared_ptr<State> sp_state_;
+ State* state_;
+ bool shutdown_on_destroy_;
+#ifndef _WIN32
+ pid_t pid_;
+#endif
+};
+
+// Return the process-global thread pool for CPU-bound tasks.
+ARROW_EXPORT ThreadPool* GetCpuThreadPool();
+
/// \brief Potentially run an async operation serially (if use_threads is false)
/// \see RunSerially
///
@@ -394,5 +394,5 @@ typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future,
}
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/time.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/time.cc
index c285f075099..84a136d6a0f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/time.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/time.cc
@@ -1,68 +1,68 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <memory>
-
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/time.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace util {
-
-// TimestampType -> TimestampType
-static const std::pair<DivideOrMultiply, int64_t> kTimestampConversionTable[4][4] = {
- // TimestampType::SECOND
- {{MULTIPLY, 1}, {MULTIPLY, 1000}, {MULTIPLY, 1000000}, {MULTIPLY, 1000000000}},
- // TimestampType::MILLI
- {{DIVIDE, 1000}, {MULTIPLY, 1}, {MULTIPLY, 1000}, {MULTIPLY, 1000000}},
- // TimestampType::MICRO
- {{DIVIDE, 1000000}, {DIVIDE, 1000}, {MULTIPLY, 1}, {MULTIPLY, 1000}},
- // TimestampType::NANO
- {{DIVIDE, 1000000000}, {DIVIDE, 1000000}, {DIVIDE, 1000}, {MULTIPLY, 1}},
-};
-
-std::pair<DivideOrMultiply, int64_t> GetTimestampConversion(TimeUnit::type in_unit,
- TimeUnit::type out_unit) {
- return kTimestampConversionTable[static_cast<int>(in_unit)][static_cast<int>(out_unit)];
-}
-
-Result<int64_t> ConvertTimestampValue(const std::shared_ptr<DataType>& in,
- const std::shared_ptr<DataType>& out,
- int64_t value) {
- auto op_factor =
- GetTimestampConversion(checked_cast<const TimestampType&>(*in).unit(),
- checked_cast<const TimestampType&>(*out).unit());
-
- auto op = op_factor.first;
- auto factor = op_factor.second;
- switch (op) {
- case MULTIPLY:
- return value * factor;
- case DIVIDE:
- return value / factor;
- }
-
- // unreachable...
- return 0;
-}
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/time.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace util {
+
+// TimestampType -> TimestampType
+static const std::pair<DivideOrMultiply, int64_t> kTimestampConversionTable[4][4] = {
+ // TimestampType::SECOND
+ {{MULTIPLY, 1}, {MULTIPLY, 1000}, {MULTIPLY, 1000000}, {MULTIPLY, 1000000000}},
+ // TimestampType::MILLI
+ {{DIVIDE, 1000}, {MULTIPLY, 1}, {MULTIPLY, 1000}, {MULTIPLY, 1000000}},
+ // TimestampType::MICRO
+ {{DIVIDE, 1000000}, {DIVIDE, 1000}, {MULTIPLY, 1}, {MULTIPLY, 1000}},
+ // TimestampType::NANO
+ {{DIVIDE, 1000000000}, {DIVIDE, 1000000}, {DIVIDE, 1000}, {MULTIPLY, 1}},
+};
+
+std::pair<DivideOrMultiply, int64_t> GetTimestampConversion(TimeUnit::type in_unit,
+ TimeUnit::type out_unit) {
+ return kTimestampConversionTable[static_cast<int>(in_unit)][static_cast<int>(out_unit)];
+}
+
+Result<int64_t> ConvertTimestampValue(const std::shared_ptr<DataType>& in,
+ const std::shared_ptr<DataType>& out,
+ int64_t value) {
+ auto op_factor =
+ GetTimestampConversion(checked_cast<const TimestampType&>(*in).unit(),
+ checked_cast<const TimestampType&>(*out).unit());
+
+ auto op = op_factor.first;
+ auto factor = op_factor.second;
+ switch (op) {
+ case MULTIPLY:
+ return value * factor;
+ case DIVIDE:
+ return value / factor;
+ }
+
+ // unreachable...
+ return 0;
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/time.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/time.h
index 80b41f63c58..83be234da51 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/time.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/time.h
@@ -1,82 +1,82 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <chrono>
-#include <memory>
-#include <utility>
-
-#include "arrow/type_fwd.h"
-
-namespace arrow {
-namespace util {
-
-enum DivideOrMultiply {
- MULTIPLY,
- DIVIDE,
-};
-
-ARROW_EXPORT
-std::pair<DivideOrMultiply, int64_t> GetTimestampConversion(TimeUnit::type in_unit,
- TimeUnit::type out_unit);
-
-// Converts a Timestamp value into another Timestamp value.
-//
-// This function takes care of properly transforming from one unit to another.
-//
-// \param[in] in the input type. Must be TimestampType.
-// \param[in] out the output type. Must be TimestampType.
-// \param[in] value the input value.
-//
-// \return The converted value, or an error.
-ARROW_EXPORT Result<int64_t> ConvertTimestampValue(const std::shared_ptr<DataType>& in,
- const std::shared_ptr<DataType>& out,
- int64_t value);
-
-template <typename Visitor, typename... Args>
-decltype(std::declval<Visitor>()(std::chrono::seconds{}, std::declval<Args&&>()...))
-VisitDuration(TimeUnit::type unit, Visitor&& visitor, Args&&... args) {
- switch (unit) {
- default:
- case TimeUnit::SECOND:
- break;
- case TimeUnit::MILLI:
- return visitor(std::chrono::milliseconds{}, std::forward<Args>(args)...);
- case TimeUnit::MICRO:
- return visitor(std::chrono::microseconds{}, std::forward<Args>(args)...);
- case TimeUnit::NANO:
- return visitor(std::chrono::nanoseconds{}, std::forward<Args>(args)...);
- }
- return visitor(std::chrono::seconds{}, std::forward<Args>(args)...);
-}
-
-/// Convert a count of seconds to the corresponding count in a different TimeUnit
-struct CastSecondsToUnitImpl {
- template <typename Duration>
- int64_t operator()(Duration, int64_t seconds) {
- auto duration = std::chrono::duration_cast<Duration>(std::chrono::seconds{seconds});
- return static_cast<int64_t>(duration.count());
- }
-};
-
-inline int64_t CastSecondsToUnit(TimeUnit::type unit, int64_t seconds) {
- return VisitDuration(unit, CastSecondsToUnitImpl{}, seconds);
-}
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <utility>
+
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+namespace util {
+
+enum DivideOrMultiply {
+ MULTIPLY,
+ DIVIDE,
+};
+
+ARROW_EXPORT
+std::pair<DivideOrMultiply, int64_t> GetTimestampConversion(TimeUnit::type in_unit,
+ TimeUnit::type out_unit);
+
+// Converts a Timestamp value into another Timestamp value.
+//
+// This function takes care of properly transforming from one unit to another.
+//
+// \param[in] in the input type. Must be TimestampType.
+// \param[in] out the output type. Must be TimestampType.
+// \param[in] value the input value.
+//
+// \return The converted value, or an error.
+ARROW_EXPORT Result<int64_t> ConvertTimestampValue(const std::shared_ptr<DataType>& in,
+ const std::shared_ptr<DataType>& out,
+ int64_t value);
+
+template <typename Visitor, typename... Args>
+decltype(std::declval<Visitor>()(std::chrono::seconds{}, std::declval<Args&&>()...))
+VisitDuration(TimeUnit::type unit, Visitor&& visitor, Args&&... args) {
+ switch (unit) {
+ default:
+ case TimeUnit::SECOND:
+ break;
+ case TimeUnit::MILLI:
+ return visitor(std::chrono::milliseconds{}, std::forward<Args>(args)...);
+ case TimeUnit::MICRO:
+ return visitor(std::chrono::microseconds{}, std::forward<Args>(args)...);
+ case TimeUnit::NANO:
+ return visitor(std::chrono::nanoseconds{}, std::forward<Args>(args)...);
+ }
+ return visitor(std::chrono::seconds{}, std::forward<Args>(args)...);
+}
+
+/// Convert a count of seconds to the corresponding count in a different TimeUnit
+struct CastSecondsToUnitImpl {
+ template <typename Duration>
+ int64_t operator()(Duration, int64_t seconds) {
+ auto duration = std::chrono::duration_cast<Duration>(std::chrono::seconds{seconds});
+ return static_cast<int64_t>(duration.count());
+ }
+};
+
+inline int64_t CastSecondsToUnit(TimeUnit::type unit, int64_t seconds) {
+ return VisitDuration(unit, CastSecondsToUnitImpl{}, seconds);
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.cc
index 7fa7f852eb4..3424b2e34c5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.cc
@@ -1,211 +1,211 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/trie.h"
-
-#include <iostream>
-#include <utility>
-
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace internal {
-
-Status Trie::Validate() const {
- const auto n_nodes = static_cast<fast_index_type>(nodes_.size());
- if (size_ > n_nodes) {
- return Status::Invalid("Number of entries larger than number of nodes");
- }
- for (const auto& node : nodes_) {
- if (node.found_index_ >= size_) {
- return Status::Invalid("Found index >= size");
- }
- if (node.child_lookup_ != -1 &&
- node.child_lookup_ * 256 >
- static_cast<fast_index_type>(lookup_table_.size() - 256)) {
- return Status::Invalid("Child lookup base doesn't point to 256 valid indices");
- }
- }
- for (const auto index : lookup_table_) {
- if (index >= n_nodes) {
- return Status::Invalid("Child lookup index out of bounds");
- }
- }
- return Status::OK();
-}
-
-void Trie::Dump(const Node* node, const std::string& indent) const {
- std::cerr << "[\"" << node->substring_ << "\"]";
- if (node->found_index_ >= 0) {
- std::cerr << " *";
- }
- std::cerr << "\n";
- if (node->child_lookup_ >= 0) {
- auto child_indent = indent + " ";
- std::cerr << child_indent << "|\n";
- for (fast_index_type i = 0; i < 256; ++i) {
- auto child_index = lookup_table_[node->child_lookup_ * 256 + i];
- if (child_index >= 0) {
- const Node* child = &nodes_[child_index];
- std::cerr << child_indent << "|-> '" << static_cast<char>(i) << "' (" << i
- << ") -> ";
- Dump(child, child_indent);
- }
- }
- }
-}
-
-void Trie::Dump() const { Dump(&nodes_[0], ""); }
-
-TrieBuilder::TrieBuilder() { trie_.nodes_.push_back(Trie::Node{-1, -1, ""}); }
-
-Status TrieBuilder::AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node) {
- if (parent->child_lookup_ == -1) {
- RETURN_NOT_OK(ExtendLookupTable(&parent->child_lookup_));
- }
- auto parent_lookup = parent->child_lookup_ * 256 + ch;
-
- DCHECK_EQ(trie_.lookup_table_[parent_lookup], -1);
- if (trie_.nodes_.size() >= static_cast<size_t>(kMaxIndex)) {
- auto max_capacity = kMaxIndex;
- return Status::CapacityError("TrieBuilder cannot contain more than ", max_capacity,
- " child nodes");
- }
- trie_.nodes_.push_back(std::move(node));
- trie_.lookup_table_[parent_lookup] = static_cast<index_type>(trie_.nodes_.size() - 1);
- return Status::OK();
-}
-
-Status TrieBuilder::CreateChildNode(Trie::Node* parent, uint8_t ch,
- util::string_view substring) {
- const auto kMaxSubstringLength = Trie::kMaxSubstringLength;
-
- while (substring.length() > kMaxSubstringLength) {
- // Substring doesn't fit in node => create intermediate node
- auto mid_node = Trie::Node{-1, -1, substring.substr(0, kMaxSubstringLength)};
- RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(mid_node)));
- // Recurse
- parent = &trie_.nodes_.back();
- ch = static_cast<uint8_t>(substring[kMaxSubstringLength]);
- substring = substring.substr(kMaxSubstringLength + 1);
- }
-
- // Create final matching node
- auto child_node = Trie::Node{trie_.size_, -1, substring};
- RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(child_node)));
- ++trie_.size_;
- return Status::OK();
-}
-
-Status TrieBuilder::CreateChildNode(Trie::Node* parent, char ch,
- util::string_view substring) {
- return CreateChildNode(parent, static_cast<uint8_t>(ch), substring);
-}
-
-Status TrieBuilder::ExtendLookupTable(index_type* out_index) {
- auto cur_size = trie_.lookup_table_.size();
- auto cur_index = cur_size / 256;
- if (cur_index > static_cast<size_t>(kMaxIndex)) {
- return Status::CapacityError("TrieBuilder cannot extend lookup table further");
- }
- trie_.lookup_table_.resize(cur_size + 256, -1);
- *out_index = static_cast<index_type>(cur_index);
- return Status::OK();
-}
-
-Status TrieBuilder::SplitNode(fast_index_type node_index, fast_index_type split_at) {
- Trie::Node* node = &trie_.nodes_[node_index];
-
- DCHECK_LT(split_at, node->substring_length());
-
- // Before:
- // {node} -> [...]
- // After:
- // {node} -> [c] -> {out_node} -> [...]
- auto child_node = Trie::Node{node->found_index_, node->child_lookup_,
- node->substring_.substr(split_at + 1)};
- auto ch = node->substring_[split_at];
- node->child_lookup_ = -1;
- node->found_index_ = -1;
- node->substring_ = node->substring_.substr(0, split_at);
- RETURN_NOT_OK(AppendChildNode(node, ch, std::move(child_node)));
-
- return Status::OK();
-}
-
-Status TrieBuilder::Append(util::string_view s, bool allow_duplicate) {
- // Find or create node for string
- fast_index_type node_index = 0;
- fast_index_type pos = 0;
- fast_index_type remaining = static_cast<fast_index_type>(s.length());
-
- while (true) {
- Trie::Node* node = &trie_.nodes_[node_index];
- const auto substring_length = node->substring_length();
- const auto substring_data = node->substring_data();
-
- for (fast_index_type i = 0; i < substring_length; ++i) {
- if (remaining == 0) {
- // New string too short => need to split node
- RETURN_NOT_OK(SplitNode(node_index, i));
- // Current node matches exactly
- node = &trie_.nodes_[node_index];
- node->found_index_ = trie_.size_++;
- return Status::OK();
- }
- if (s[pos] != substring_data[i]) {
- // Mismatching substring => need to split node
- RETURN_NOT_OK(SplitNode(node_index, i));
- // Create new node for mismatching char
- node = &trie_.nodes_[node_index];
- return CreateChildNode(node, s[pos], s.substr(pos + 1));
- }
- ++pos;
- --remaining;
- }
- if (remaining == 0) {
- // Node matches exactly
- if (node->found_index_ >= 0) {
- if (allow_duplicate) {
- return Status::OK();
- } else {
- return Status::Invalid("Duplicate entry in trie");
- }
- }
- node->found_index_ = trie_.size_++;
- return Status::OK();
- }
- // Lookup child using next input character
- if (node->child_lookup_ == -1) {
- // Need to extend lookup table for this node
- RETURN_NOT_OK(ExtendLookupTable(&node->child_lookup_));
- }
- auto c = static_cast<uint8_t>(s[pos++]);
- --remaining;
- node_index = trie_.lookup_table_[node->child_lookup_ * 256 + c];
- if (node_index == -1) {
- // Child not found => need to create child node
- return CreateChildNode(node, c, s.substr(pos));
- }
- node = &trie_.nodes_[node_index];
- }
-}
-
-Trie TrieBuilder::Finish() { return std::move(trie_); }
-
-} // namespace internal
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/trie.h"
+
+#include <iostream>
+#include <utility>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+Status Trie::Validate() const {
+ const auto n_nodes = static_cast<fast_index_type>(nodes_.size());
+ if (size_ > n_nodes) {
+ return Status::Invalid("Number of entries larger than number of nodes");
+ }
+ for (const auto& node : nodes_) {
+ if (node.found_index_ >= size_) {
+ return Status::Invalid("Found index >= size");
+ }
+ if (node.child_lookup_ != -1 &&
+ node.child_lookup_ * 256 >
+ static_cast<fast_index_type>(lookup_table_.size() - 256)) {
+ return Status::Invalid("Child lookup base doesn't point to 256 valid indices");
+ }
+ }
+ for (const auto index : lookup_table_) {
+ if (index >= n_nodes) {
+ return Status::Invalid("Child lookup index out of bounds");
+ }
+ }
+ return Status::OK();
+}
+
+void Trie::Dump(const Node* node, const std::string& indent) const {
+ std::cerr << "[\"" << node->substring_ << "\"]";
+ if (node->found_index_ >= 0) {
+ std::cerr << " *";
+ }
+ std::cerr << "\n";
+ if (node->child_lookup_ >= 0) {
+ auto child_indent = indent + " ";
+ std::cerr << child_indent << "|\n";
+ for (fast_index_type i = 0; i < 256; ++i) {
+ auto child_index = lookup_table_[node->child_lookup_ * 256 + i];
+ if (child_index >= 0) {
+ const Node* child = &nodes_[child_index];
+ std::cerr << child_indent << "|-> '" << static_cast<char>(i) << "' (" << i
+ << ") -> ";
+ Dump(child, child_indent);
+ }
+ }
+ }
+}
+
+void Trie::Dump() const { Dump(&nodes_[0], ""); }
+
+TrieBuilder::TrieBuilder() { trie_.nodes_.push_back(Trie::Node{-1, -1, ""}); }
+
+Status TrieBuilder::AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node) {
+ if (parent->child_lookup_ == -1) {
+ RETURN_NOT_OK(ExtendLookupTable(&parent->child_lookup_));
+ }
+ auto parent_lookup = parent->child_lookup_ * 256 + ch;
+
+ DCHECK_EQ(trie_.lookup_table_[parent_lookup], -1);
+ if (trie_.nodes_.size() >= static_cast<size_t>(kMaxIndex)) {
+ auto max_capacity = kMaxIndex;
+ return Status::CapacityError("TrieBuilder cannot contain more than ", max_capacity,
+ " child nodes");
+ }
+ trie_.nodes_.push_back(std::move(node));
+ trie_.lookup_table_[parent_lookup] = static_cast<index_type>(trie_.nodes_.size() - 1);
+ return Status::OK();
+}
+
+Status TrieBuilder::CreateChildNode(Trie::Node* parent, uint8_t ch,
+ util::string_view substring) {
+ const auto kMaxSubstringLength = Trie::kMaxSubstringLength;
+
+ while (substring.length() > kMaxSubstringLength) {
+ // Substring doesn't fit in node => create intermediate node
+ auto mid_node = Trie::Node{-1, -1, substring.substr(0, kMaxSubstringLength)};
+ RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(mid_node)));
+ // Recurse
+ parent = &trie_.nodes_.back();
+ ch = static_cast<uint8_t>(substring[kMaxSubstringLength]);
+ substring = substring.substr(kMaxSubstringLength + 1);
+ }
+
+ // Create final matching node
+ auto child_node = Trie::Node{trie_.size_, -1, substring};
+ RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(child_node)));
+ ++trie_.size_;
+ return Status::OK();
+}
+
+Status TrieBuilder::CreateChildNode(Trie::Node* parent, char ch,
+ util::string_view substring) {
+ return CreateChildNode(parent, static_cast<uint8_t>(ch), substring);
+}
+
+Status TrieBuilder::ExtendLookupTable(index_type* out_index) {
+ auto cur_size = trie_.lookup_table_.size();
+ auto cur_index = cur_size / 256;
+ if (cur_index > static_cast<size_t>(kMaxIndex)) {
+ return Status::CapacityError("TrieBuilder cannot extend lookup table further");
+ }
+ trie_.lookup_table_.resize(cur_size + 256, -1);
+ *out_index = static_cast<index_type>(cur_index);
+ return Status::OK();
+}
+
+Status TrieBuilder::SplitNode(fast_index_type node_index, fast_index_type split_at) {
+ Trie::Node* node = &trie_.nodes_[node_index];
+
+ DCHECK_LT(split_at, node->substring_length());
+
+ // Before:
+ // {node} -> [...]
+ // After:
+ // {node} -> [c] -> {out_node} -> [...]
+ auto child_node = Trie::Node{node->found_index_, node->child_lookup_,
+ node->substring_.substr(split_at + 1)};
+ auto ch = node->substring_[split_at];
+ node->child_lookup_ = -1;
+ node->found_index_ = -1;
+ node->substring_ = node->substring_.substr(0, split_at);
+ RETURN_NOT_OK(AppendChildNode(node, ch, std::move(child_node)));
+
+ return Status::OK();
+}
+
+Status TrieBuilder::Append(util::string_view s, bool allow_duplicate) {
+ // Find or create node for string
+ fast_index_type node_index = 0;
+ fast_index_type pos = 0;
+ fast_index_type remaining = static_cast<fast_index_type>(s.length());
+
+ while (true) {
+ Trie::Node* node = &trie_.nodes_[node_index];
+ const auto substring_length = node->substring_length();
+ const auto substring_data = node->substring_data();
+
+ for (fast_index_type i = 0; i < substring_length; ++i) {
+ if (remaining == 0) {
+ // New string too short => need to split node
+ RETURN_NOT_OK(SplitNode(node_index, i));
+ // Current node matches exactly
+ node = &trie_.nodes_[node_index];
+ node->found_index_ = trie_.size_++;
+ return Status::OK();
+ }
+ if (s[pos] != substring_data[i]) {
+ // Mismatching substring => need to split node
+ RETURN_NOT_OK(SplitNode(node_index, i));
+ // Create new node for mismatching char
+ node = &trie_.nodes_[node_index];
+ return CreateChildNode(node, s[pos], s.substr(pos + 1));
+ }
+ ++pos;
+ --remaining;
+ }
+ if (remaining == 0) {
+ // Node matches exactly
+ if (node->found_index_ >= 0) {
+ if (allow_duplicate) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("Duplicate entry in trie");
+ }
+ }
+ node->found_index_ = trie_.size_++;
+ return Status::OK();
+ }
+ // Lookup child using next input character
+ if (node->child_lookup_ == -1) {
+ // Need to extend lookup table for this node
+ RETURN_NOT_OK(ExtendLookupTable(&node->child_lookup_));
+ }
+ auto c = static_cast<uint8_t>(s[pos++]);
+ --remaining;
+ node_index = trie_.lookup_table_[node->child_lookup_ * 256 + c];
+ if (node_index == -1) {
+ // Child not found => need to create child node
+ return CreateChildNode(node, c, s.substr(pos));
+ }
+ node = &trie_.nodes_[node_index];
+ }
+}
+
+Trie TrieBuilder::Finish() { return std::move(trie_); }
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
index b250cca647d..0cc9d669ff0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
@@ -1,245 +1,245 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <iosfwd>
-#include <limits>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-// A non-zero-terminated small string class.
-// std::string usually has a small string optimization
-// (see review at https://shaharmike.com/cpp/std-string/)
-// but this one allows tight control and optimization of memory layout.
-template <uint8_t N>
-class SmallString {
- public:
- SmallString() : length_(0) {}
-
- template <typename T>
- SmallString(const T& v) { // NOLINT implicit constructor
- *this = util::string_view(v);
- }
-
- SmallString& operator=(const util::string_view s) {
-#ifndef NDEBUG
- CheckSize(s.size());
-#endif
- length_ = static_cast<uint8_t>(s.size());
- std::memcpy(data_, s.data(), length_);
- return *this;
- }
-
- SmallString& operator=(const std::string& s) {
- *this = util::string_view(s);
- return *this;
- }
-
- SmallString& operator=(const char* s) {
- *this = util::string_view(s);
- return *this;
- }
-
- explicit operator util::string_view() const {
- return util::string_view(data_, length_);
- }
-
- const char* data() const { return data_; }
- size_t length() const { return length_; }
- bool empty() const { return length_ == 0; }
- char operator[](size_t pos) const {
-#ifdef NDEBUG
- assert(pos <= length_);
-#endif
- return data_[pos];
- }
-
- SmallString substr(size_t pos) const {
- return SmallString(util::string_view(*this).substr(pos));
- }
-
- SmallString substr(size_t pos, size_t count) const {
- return SmallString(util::string_view(*this).substr(pos, count));
- }
-
- template <typename T>
- bool operator==(T&& other) const {
- return util::string_view(*this) == util::string_view(std::forward<T>(other));
- }
-
- template <typename T>
- bool operator!=(T&& other) const {
- return util::string_view(*this) != util::string_view(std::forward<T>(other));
- }
-
- protected:
- uint8_t length_;
- char data_[N];
-
- void CheckSize(size_t n) { assert(n <= N); }
-};
-
-template <uint8_t N>
-std::ostream& operator<<(std::ostream& os, const SmallString<N>& str) {
- return os << util::string_view(str);
-}
-
-// A trie class for byte strings, optimized for small sets of short strings.
-// This class is immutable by design, use a TrieBuilder to construct it.
-class ARROW_EXPORT Trie {
- using index_type = int16_t;
- using fast_index_type = int_fast16_t;
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iosfwd>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// A non-zero-terminated small string class.
+// std::string usually has a small string optimization
+// (see review at https://shaharmike.com/cpp/std-string/)
+// but this one allows tight control and optimization of memory layout.
+template <uint8_t N>
+class SmallString {
+ public:
+ SmallString() : length_(0) {}
+
+ template <typename T>
+ SmallString(const T& v) { // NOLINT implicit constructor
+ *this = util::string_view(v);
+ }
+
+ SmallString& operator=(const util::string_view s) {
+#ifndef NDEBUG
+ CheckSize(s.size());
+#endif
+ length_ = static_cast<uint8_t>(s.size());
+ std::memcpy(data_, s.data(), length_);
+ return *this;
+ }
+
+ SmallString& operator=(const std::string& s) {
+ *this = util::string_view(s);
+ return *this;
+ }
+
+ SmallString& operator=(const char* s) {
+ *this = util::string_view(s);
+ return *this;
+ }
+
+ explicit operator util::string_view() const {
+ return util::string_view(data_, length_);
+ }
+
+ const char* data() const { return data_; }
+ size_t length() const { return length_; }
+ bool empty() const { return length_ == 0; }
+ char operator[](size_t pos) const {
+#ifdef NDEBUG
+ assert(pos <= length_);
+#endif
+ return data_[pos];
+ }
+
+ SmallString substr(size_t pos) const {
+ return SmallString(util::string_view(*this).substr(pos));
+ }
+
+ SmallString substr(size_t pos, size_t count) const {
+ return SmallString(util::string_view(*this).substr(pos, count));
+ }
+
+ template <typename T>
+ bool operator==(T&& other) const {
+ return util::string_view(*this) == util::string_view(std::forward<T>(other));
+ }
+
+ template <typename T>
+ bool operator!=(T&& other) const {
+ return util::string_view(*this) != util::string_view(std::forward<T>(other));
+ }
+
+ protected:
+ uint8_t length_;
+ char data_[N];
+
+ void CheckSize(size_t n) { assert(n <= N); }
+};
+
+template <uint8_t N>
+std::ostream& operator<<(std::ostream& os, const SmallString<N>& str) {
+ return os << util::string_view(str);
+}
+
+// A trie class for byte strings, optimized for small sets of short strings.
+// This class is immutable by design, use a TrieBuilder to construct it.
+class ARROW_EXPORT Trie {
+ using index_type = int16_t;
+ using fast_index_type = int_fast16_t;
static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
-
- public:
- Trie() : size_(0) {}
- Trie(Trie&&) = default;
- Trie& operator=(Trie&&) = default;
-
- int32_t Find(util::string_view s) const {
- const Node* node = &nodes_[0];
- fast_index_type pos = 0;
+
+ public:
+ Trie() : size_(0) {}
+ Trie(Trie&&) = default;
+ Trie& operator=(Trie&&) = default;
+
+ int32_t Find(util::string_view s) const {
+ const Node* node = &nodes_[0];
+ fast_index_type pos = 0;
if (s.length() > static_cast<size_t>(kMaxIndex)) {
return -1;
}
- fast_index_type remaining = static_cast<fast_index_type>(s.length());
-
- while (remaining > 0) {
- auto substring_length = node->substring_length();
- if (substring_length > 0) {
- auto substring_data = node->substring_data();
- if (remaining < substring_length) {
- // Input too short
- return -1;
- }
- for (fast_index_type i = 0; i < substring_length; ++i) {
- if (s[pos++] != substring_data[i]) {
- // Mismatching substring
- return -1;
- }
- --remaining;
- }
- if (remaining == 0) {
- // Matched node exactly
- return node->found_index_;
- }
- }
- // Lookup child using next input character
- if (node->child_lookup_ == -1) {
- // Input too long
- return -1;
- }
- auto c = static_cast<uint8_t>(s[pos++]);
- --remaining;
- auto child_index = lookup_table_[node->child_lookup_ * 256 + c];
- if (child_index == -1) {
- // Child not found
- return -1;
- }
- node = &nodes_[child_index];
- }
-
- // Input exhausted
- if (node->substring_.empty()) {
- // Matched node exactly
- return node->found_index_;
- } else {
- return -1;
- }
- }
-
- Status Validate() const;
-
- void Dump() const;
-
- protected:
- static constexpr size_t kNodeSize = 16;
- static constexpr auto kMaxSubstringLength =
- kNodeSize - 2 * sizeof(index_type) - sizeof(int8_t);
-
- struct Node {
- // If this node is a valid end of string, index of found string, otherwise -1
- index_type found_index_;
- // Base index for child lookup in lookup_table_ (-1 if no child nodes)
- index_type child_lookup_;
- // The substring for this node.
- SmallString<kMaxSubstringLength> substring_;
-
- fast_index_type substring_length() const {
- return static_cast<fast_index_type>(substring_.length());
- }
- const char* substring_data() const { return substring_.data(); }
- };
-
- static_assert(sizeof(Node) == kNodeSize, "Unexpected node size");
-
- ARROW_DISALLOW_COPY_AND_ASSIGN(Trie);
-
- void Dump(const Node* node, const std::string& indent) const;
-
- // Node table: entry 0 is the root node
- std::vector<Node> nodes_;
-
- // Indexed lookup structure: gives index in node table, or -1 if not found
- std::vector<index_type> lookup_table_;
-
- // Number of entries
- index_type size_;
-
- friend class TrieBuilder;
-};
-
-class ARROW_EXPORT TrieBuilder {
- using index_type = Trie::index_type;
- using fast_index_type = Trie::fast_index_type;
-
- public:
- TrieBuilder();
- Status Append(util::string_view s, bool allow_duplicate = false);
- Trie Finish();
-
- protected:
- // Extend the lookup table by 256 entries, return the index of the new span
- Status ExtendLookupTable(index_type* out_lookup_index);
- // Split the node given by the index at the substring index `split_at`
- Status SplitNode(fast_index_type node_index, fast_index_type split_at);
- // Append an already constructed child node to the parent
- Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node);
- // Create a matching child node from this parent
- Status CreateChildNode(Trie::Node* parent, uint8_t ch, util::string_view substring);
- Status CreateChildNode(Trie::Node* parent, char ch, util::string_view substring);
-
- Trie trie_;
-
- static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
-};
-
-} // namespace internal
-} // namespace arrow
+ fast_index_type remaining = static_cast<fast_index_type>(s.length());
+
+ while (remaining > 0) {
+ auto substring_length = node->substring_length();
+ if (substring_length > 0) {
+ auto substring_data = node->substring_data();
+ if (remaining < substring_length) {
+ // Input too short
+ return -1;
+ }
+ for (fast_index_type i = 0; i < substring_length; ++i) {
+ if (s[pos++] != substring_data[i]) {
+ // Mismatching substring
+ return -1;
+ }
+ --remaining;
+ }
+ if (remaining == 0) {
+ // Matched node exactly
+ return node->found_index_;
+ }
+ }
+ // Lookup child using next input character
+ if (node->child_lookup_ == -1) {
+ // Input too long
+ return -1;
+ }
+ auto c = static_cast<uint8_t>(s[pos++]);
+ --remaining;
+ auto child_index = lookup_table_[node->child_lookup_ * 256 + c];
+ if (child_index == -1) {
+ // Child not found
+ return -1;
+ }
+ node = &nodes_[child_index];
+ }
+
+ // Input exhausted
+ if (node->substring_.empty()) {
+ // Matched node exactly
+ return node->found_index_;
+ } else {
+ return -1;
+ }
+ }
+
+ Status Validate() const;
+
+ void Dump() const;
+
+ protected:
+ static constexpr size_t kNodeSize = 16;
+ static constexpr auto kMaxSubstringLength =
+ kNodeSize - 2 * sizeof(index_type) - sizeof(int8_t);
+
+ struct Node {
+ // If this node is a valid end of string, index of found string, otherwise -1
+ index_type found_index_;
+ // Base index for child lookup in lookup_table_ (-1 if no child nodes)
+ index_type child_lookup_;
+ // The substring for this node.
+ SmallString<kMaxSubstringLength> substring_;
+
+ fast_index_type substring_length() const {
+ return static_cast<fast_index_type>(substring_.length());
+ }
+ const char* substring_data() const { return substring_.data(); }
+ };
+
+ static_assert(sizeof(Node) == kNodeSize, "Unexpected node size");
+
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Trie);
+
+ void Dump(const Node* node, const std::string& indent) const;
+
+ // Node table: entry 0 is the root node
+ std::vector<Node> nodes_;
+
+ // Indexed lookup structure: gives index in node table, or -1 if not found
+ std::vector<index_type> lookup_table_;
+
+ // Number of entries
+ index_type size_;
+
+ friend class TrieBuilder;
+};
+
+class ARROW_EXPORT TrieBuilder {
+ using index_type = Trie::index_type;
+ using fast_index_type = Trie::fast_index_type;
+
+ public:
+ TrieBuilder();
+ Status Append(util::string_view s, bool allow_duplicate = false);
+ Trie Finish();
+
+ protected:
+ // Extend the lookup table by 256 entries, return the index of the new span
+ Status ExtendLookupTable(index_type* out_lookup_index);
+ // Split the node given by the index at the substring index `split_at`
+ Status SplitNode(fast_index_type node_index, fast_index_type split_at);
+ // Append an already constructed child node to the parent
+ Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node);
+ // Create a matching child node from this parent
+ Status CreateChildNode(Trie::Node* parent, uint8_t ch, util::string_view substring);
+ Status CreateChildNode(Trie::Node* parent, char ch, util::string_view substring);
+
+ Trie trie_;
+
+ static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
index ca107c2c69d..e82b189fb34 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
@@ -1,41 +1,41 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-namespace arrow {
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+
namespace internal {
struct Empty;
} // namespace internal
-
+
template <typename T = internal::Empty>
class WeakFuture;
class FutureWaiter;
class TimestampParser;
-namespace internal {
-
-class Executor;
-class TaskGroup;
-class ThreadPool;
-
-} // namespace internal
+namespace internal {
+
+class Executor;
+class TaskGroup;
+class ThreadPool;
+
+} // namespace internal
struct Compression {
/// \brief Compression algorithm
@@ -59,4 +59,4 @@ class Decompressor;
class Codec;
} // namespace util
-} // namespace arrow
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
index 80cc6297e39..4ff47affc39 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
@@ -1,47 +1,47 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <cstdint>
-#include <type_traits>
-
-namespace arrow {
-namespace internal {
-
-/// \brief Metafunction to allow checking if a type matches any of another set of types
-template <typename...>
-struct IsOneOf : std::false_type {}; /// Base case: nothing has matched
-
-template <typename T, typename U, typename... Args>
-struct IsOneOf<T, U, Args...> {
- /// Recursive case: T == U or T matches any other types provided (not including U).
- static constexpr bool value = std::is_same<T, U>::value || IsOneOf<T, Args...>::value;
-};
-
-/// \brief Shorthand for using IsOneOf + std::enable_if
-template <typename T, typename... Args>
-using EnableIfIsOneOf = typename std::enable_if<IsOneOf<T, Args...>::value, T>::type;
-
-/// \brief is_null_pointer from C++17
-template <typename T>
-struct is_null_pointer : std::is_same<std::nullptr_t, typename std::remove_cv<T>::type> {
-};
-
+#include <type_traits>
+
+namespace arrow {
+namespace internal {
+
+/// \brief Metafunction to allow checking if a type matches any of another set of types
+template <typename...>
+struct IsOneOf : std::false_type {}; /// Base case: nothing has matched
+
+template <typename T, typename U, typename... Args>
+struct IsOneOf<T, U, Args...> {
+ /// Recursive case: T == U or T matches any other types provided (not including U).
+ static constexpr bool value = std::is_same<T, U>::value || IsOneOf<T, Args...>::value;
+};
+
+/// \brief Shorthand for using IsOneOf + std::enable_if
+template <typename T, typename... Args>
+using EnableIfIsOneOf = typename std::enable_if<IsOneOf<T, Args...>::value, T>::type;
+
+/// \brief is_null_pointer from C++17
+template <typename T>
+struct is_null_pointer : std::is_same<std::nullptr_t, typename std::remove_cv<T>::type> {
+};
+
#ifdef __GLIBCXX__
// A aligned_union backport, because old libstdc++ versions don't include it.
@@ -82,5 +82,5 @@ using aligned_union = std::aligned_union<Len, T...>;
#endif
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/ubsan.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/ubsan.h
index 2d4b513894b..a8cbaeadd8f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/ubsan.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/ubsan.h
@@ -1,88 +1,88 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Contains utilities for making UBSan happy.
-
-#pragma once
-
-#include <cstring>
-#include <memory>
-#include <type_traits>
-
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-
-namespace internal {
-
-static uint8_t non_null_filler;
-
-} // namespace internal
-
-/// \brief Returns maybe_null if not null or a non-null pointer to an arbitrary memory
-/// that shouldn't be dereferenced.
-///
-/// Memset/Memcpy are undefined when a nullptr is passed as an argument use this utility
-/// method to wrap locations where this could happen.
-///
-/// Note: Flatbuffers has UBSan warnings if a zero length vector is passed.
-/// https://github.com/google/flatbuffers/pull/5355 is trying to resolve
-/// them.
-template <typename T>
-inline T* MakeNonNull(T* maybe_null) {
- if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
- return maybe_null;
- }
-
- return reinterpret_cast<T*>(&internal::non_null_filler);
-}
-
-template <typename T>
-inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoadAs(
- const uint8_t* unaligned) {
- typename std::remove_const<T>::type ret;
- std::memcpy(&ret, unaligned, sizeof(T));
- return ret;
-}
-
-template <typename T>
-inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoad(
- const T* unaligned) {
- typename std::remove_const<T>::type ret;
- std::memcpy(&ret, unaligned, sizeof(T));
- return ret;
-}
-
-template <typename U, typename T>
-inline typename std::enable_if<std::is_trivial<T>::value && std::is_trivial<U>::value &&
- sizeof(T) == sizeof(U),
- U>::type
-SafeCopy(T value) {
- typename std::remove_const<U>::type ret;
- std::memcpy(&ret, &value, sizeof(T));
- return ret;
-}
-
-template <typename T>
-inline typename std::enable_if<std::is_trivial<T>::value, void>::type SafeStore(
- void* unaligned, T value) {
- std::memcpy(unaligned, &value, sizeof(T));
-}
-
-} // namespace util
-} // namespace arrow
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains utilities for making UBSan happy.
+
+#pragma once
+
+#include <cstring>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+namespace internal {
+
+static uint8_t non_null_filler;
+
+} // namespace internal
+
+/// \brief Returns maybe_null if not null or a non-null pointer to an arbitrary memory
+/// that shouldn't be dereferenced.
+///
+/// Memset/Memcpy are undefined when a nullptr is passed as an argument use this utility
+/// method to wrap locations where this could happen.
+///
+/// Note: Flatbuffers has UBSan warnings if a zero length vector is passed.
+/// https://github.com/google/flatbuffers/pull/5355 is trying to resolve
+/// them.
+template <typename T>
+inline T* MakeNonNull(T* maybe_null) {
+ if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
+ return maybe_null;
+ }
+
+ return reinterpret_cast<T*>(&internal::non_null_filler);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoadAs(
+ const uint8_t* unaligned) {
+ typename std::remove_const<T>::type ret;
+ std::memcpy(&ret, unaligned, sizeof(T));
+ return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoad(
+ const T* unaligned) {
+ typename std::remove_const<T>::type ret;
+ std::memcpy(&ret, unaligned, sizeof(T));
+ return ret;
+}
+
+template <typename U, typename T>
+inline typename std::enable_if<std::is_trivial<T>::value && std::is_trivial<U>::value &&
+ sizeof(T) == sizeof(U),
+ U>::type
+SafeCopy(T value) {
+ typename std::remove_const<U>::type ret;
+ std::memcpy(&ret, &value, sizeof(T));
+ return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, void>::type SafeStore(
+ void* unaligned, T value) {
+ std::memcpy(unaligned, &value, sizeof(T));
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
index c19a7bc2eee..5e2641d1880 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
@@ -1,76 +1,76 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/uri.h"
-
-#include <cstring>
-#include <sstream>
-#include <vector>
-
-#include "arrow/util/string_view.h"
-#include "arrow/util/value_parsing.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/uri.h"
+
+#include <cstring>
+#include <sstream>
+#include <vector>
+
+#include "arrow/util/string_view.h"
+#include "arrow/util/value_parsing.h"
#include "contrib/restricted/uriparser/include/uriparser/Uri.h"
-
-namespace arrow {
-namespace internal {
-
-namespace {
-
-util::string_view TextRangeToView(const UriTextRangeStructA& range) {
- if (range.first == nullptr) {
- return "";
- } else {
- return {range.first, static_cast<size_t>(range.afterLast - range.first)};
- }
-}
-
-std::string TextRangeToString(const UriTextRangeStructA& range) {
- return std::string(TextRangeToView(range));
-}
-
-// There can be a difference between an absent field and an empty field.
-// For example, in "unix:/tmp/foo", the host is absent, while in
-// "unix:///tmp/foo", the host is empty but present.
-// This function helps distinguish.
-bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != nullptr; }
-
-#ifdef _WIN32
-bool IsDriveSpec(const util::string_view s) {
- return (s.length() >= 2 && s[1] == ':' &&
- ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z')));
-}
-#endif
-
-} // namespace
-
-std::string UriEscape(const std::string& s) {
- if (s.empty()) {
- // Avoid passing null pointer to uriEscapeExA
- return s;
- }
- std::string escaped;
- escaped.resize(3 * s.length());
-
- auto end = uriEscapeExA(s.data(), s.data() + s.length(), &escaped[0],
- /*spaceToPlus=*/URI_FALSE, /*normalizeBreaks=*/URI_FALSE);
- escaped.resize(end - &escaped[0]);
- return escaped;
-}
-
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+util::string_view TextRangeToView(const UriTextRangeStructA& range) {
+ if (range.first == nullptr) {
+ return "";
+ } else {
+ return {range.first, static_cast<size_t>(range.afterLast - range.first)};
+ }
+}
+
+std::string TextRangeToString(const UriTextRangeStructA& range) {
+ return std::string(TextRangeToView(range));
+}
+
+// There can be a difference between an absent field and an empty field.
+// For example, in "unix:/tmp/foo", the host is absent, while in
+// "unix:///tmp/foo", the host is empty but present.
+// This function helps distinguish.
+bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != nullptr; }
+
+#ifdef _WIN32
+bool IsDriveSpec(const util::string_view s) {
+ return (s.length() >= 2 && s[1] == ':' &&
+ ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z')));
+}
+#endif
+
+} // namespace
+
+std::string UriEscape(const std::string& s) {
+ if (s.empty()) {
+ // Avoid passing null pointer to uriEscapeExA
+ return s;
+ }
+ std::string escaped;
+ escaped.resize(3 * s.length());
+
+ auto end = uriEscapeExA(s.data(), s.data() + s.length(), &escaped[0],
+ /*spaceToPlus=*/URI_FALSE, /*normalizeBreaks=*/URI_FALSE);
+ escaped.resize(end - &escaped[0]);
+ return escaped;
+}
+
std::string UriUnescape(const util::string_view s) {
std::string result(s);
if (!result.empty()) {
@@ -93,200 +93,200 @@ std::string UriEncodeHost(const std::string& host) {
}
}
-struct Uri::Impl {
- Impl() : string_rep_(""), port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
-
- ~Impl() { uriFreeUriMembersA(&uri_); }
-
- void Reset() {
- uriFreeUriMembersA(&uri_);
- memset(&uri_, 0, sizeof(uri_));
- data_.clear();
- string_rep_.clear();
- path_segments_.clear();
- port_ = -1;
- }
-
- const std::string& KeepString(const std::string& s) {
- data_.push_back(s);
- return data_.back();
- }
-
- UriUriA uri_;
- // Keep alive strings that uriparser stores pointers to
- std::vector<std::string> data_;
- std::string string_rep_;
- int32_t port_;
- std::vector<util::string_view> path_segments_;
- bool is_file_uri_;
- bool is_absolute_path_;
-};
-
-Uri::Uri() : impl_(new Impl) {}
-
-Uri::~Uri() {}
-
-Uri::Uri(Uri&& u) : impl_(std::move(u.impl_)) {}
-
-Uri& Uri::operator=(Uri&& u) {
- impl_ = std::move(u.impl_);
- return *this;
-}
-
-std::string Uri::scheme() const { return TextRangeToString(impl_->uri_.scheme); }
-
-std::string Uri::host() const { return TextRangeToString(impl_->uri_.hostText); }
-
-bool Uri::has_host() const { return IsTextRangeSet(impl_->uri_.hostText); }
-
-std::string Uri::port_text() const { return TextRangeToString(impl_->uri_.portText); }
-
-int32_t Uri::port() const { return impl_->port_; }
-
-std::string Uri::username() const {
- auto userpass = TextRangeToView(impl_->uri_.userInfo);
- auto sep_pos = userpass.find_first_of(':');
- if (sep_pos == util::string_view::npos) {
- return UriUnescape(userpass);
- } else {
- return UriUnescape(userpass.substr(0, sep_pos));
- }
-}
-
-std::string Uri::password() const {
- auto userpass = TextRangeToView(impl_->uri_.userInfo);
- auto sep_pos = userpass.find_first_of(':');
- if (sep_pos == util::string_view::npos) {
- return std::string();
- } else {
- return UriUnescape(userpass.substr(sep_pos + 1));
- }
-}
-
-std::string Uri::path() const {
- const auto& segments = impl_->path_segments_;
-
- bool must_prepend_slash = impl_->is_absolute_path_;
-#ifdef _WIN32
- // On Windows, "file:///C:/foo" should have path "C:/foo", not "/C:/foo",
- // despite it being absolute.
- // (see https://tools.ietf.org/html/rfc8089#page-13)
- if (impl_->is_absolute_path_ && impl_->is_file_uri_ && segments.size() > 0 &&
- IsDriveSpec(segments[0])) {
- must_prepend_slash = false;
- }
-#endif
-
- std::stringstream ss;
- if (must_prepend_slash) {
- ss << "/";
- }
- bool first = true;
- for (const auto& seg : segments) {
- if (!first) {
- ss << "/";
- }
- first = false;
- ss << seg;
- }
- return std::move(ss).str();
-}
-
-std::string Uri::query_string() const { return TextRangeToString(impl_->uri_.query); }
-
-Result<std::vector<std::pair<std::string, std::string>>> Uri::query_items() const {
- const auto& query = impl_->uri_.query;
- UriQueryListA* query_list;
- int item_count;
- std::vector<std::pair<std::string, std::string>> items;
-
- if (query.first == nullptr) {
- return items;
- }
- if (uriDissectQueryMallocA(&query_list, &item_count, query.first, query.afterLast) !=
- URI_SUCCESS) {
- return Status::Invalid("Cannot parse query string: '", query_string(), "'");
- }
- std::unique_ptr<UriQueryListA, decltype(&uriFreeQueryListA)> query_guard(
- query_list, uriFreeQueryListA);
-
- items.reserve(item_count);
- while (query_list != nullptr) {
- if (query_list->value != nullptr) {
- items.emplace_back(query_list->key, query_list->value);
- } else {
- items.emplace_back(query_list->key, "");
- }
- query_list = query_list->next;
- }
- return items;
-}
-
-const std::string& Uri::ToString() const { return impl_->string_rep_; }
-
-Status Uri::Parse(const std::string& uri_string) {
- impl_->Reset();
-
- const auto& s = impl_->KeepString(uri_string);
- impl_->string_rep_ = s;
- const char* error_pos;
- if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) !=
- URI_SUCCESS) {
- return Status::Invalid("Cannot parse URI: '", uri_string, "'");
- }
-
- const auto scheme = TextRangeToView(impl_->uri_.scheme);
- if (scheme.empty()) {
- return Status::Invalid("URI has empty scheme: '", uri_string, "'");
- }
- impl_->is_file_uri_ = (scheme == "file");
-
- // Gather path segments
- auto path_seg = impl_->uri_.pathHead;
- while (path_seg != nullptr) {
- impl_->path_segments_.push_back(TextRangeToView(path_seg->text));
- path_seg = path_seg->next;
- }
-
- // Decide whether URI path is absolute
- impl_->is_absolute_path_ = false;
- if (impl_->uri_.absolutePath == URI_TRUE) {
- impl_->is_absolute_path_ = true;
- } else if (has_host() && impl_->path_segments_.size() > 0) {
- // When there's a host (even empty), uriparser considers the path relative.
- // Several URI parsers for Python all consider it absolute, though.
- // For example, the path for "file:///tmp/foo" is "/tmp/foo", not "tmp/foo".
- // Similarly, the path for "file://localhost/" is "/".
- // However, the path for "file://localhost" is "".
- impl_->is_absolute_path_ = true;
- }
-#ifdef _WIN32
- // There's an exception on Windows: "file:/C:foo/bar" is relative.
- if (impl_->is_file_uri_ && impl_->path_segments_.size() > 0) {
- const auto& first_seg = impl_->path_segments_[0];
- if (IsDriveSpec(first_seg) && (first_seg.length() >= 3 && first_seg[2] != '/')) {
- impl_->is_absolute_path_ = false;
- }
- }
-#endif
-
- if (impl_->is_file_uri_ && !impl_->is_absolute_path_) {
- return Status::Invalid("File URI cannot be relative: '", uri_string, "'");
- }
-
- // Parse port number
- auto port_text = TextRangeToView(impl_->uri_.portText);
- if (port_text.size()) {
- uint16_t port_num;
- if (!ParseValue<UInt16Type>(port_text.data(), port_text.size(), &port_num)) {
- return Status::Invalid("Invalid port number '", port_text, "' in URI '", uri_string,
- "'");
- }
- impl_->port_ = port_num;
- }
-
- return Status::OK();
-}
-
-} // namespace internal
-} // namespace arrow
+struct Uri::Impl {
+ Impl() : string_rep_(""), port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
+
+ ~Impl() { uriFreeUriMembersA(&uri_); }
+
+ void Reset() {
+ uriFreeUriMembersA(&uri_);
+ memset(&uri_, 0, sizeof(uri_));
+ data_.clear();
+ string_rep_.clear();
+ path_segments_.clear();
+ port_ = -1;
+ }
+
+ const std::string& KeepString(const std::string& s) {
+ data_.push_back(s);
+ return data_.back();
+ }
+
+ UriUriA uri_;
+ // Keep alive strings that uriparser stores pointers to
+ std::vector<std::string> data_;
+ std::string string_rep_;
+ int32_t port_;
+ std::vector<util::string_view> path_segments_;
+ bool is_file_uri_;
+ bool is_absolute_path_;
+};
+
+Uri::Uri() : impl_(new Impl) {}
+
+Uri::~Uri() {}
+
+Uri::Uri(Uri&& u) : impl_(std::move(u.impl_)) {}
+
+Uri& Uri::operator=(Uri&& u) {
+ impl_ = std::move(u.impl_);
+ return *this;
+}
+
+std::string Uri::scheme() const { return TextRangeToString(impl_->uri_.scheme); }
+
+std::string Uri::host() const { return TextRangeToString(impl_->uri_.hostText); }
+
+bool Uri::has_host() const { return IsTextRangeSet(impl_->uri_.hostText); }
+
+std::string Uri::port_text() const { return TextRangeToString(impl_->uri_.portText); }
+
+int32_t Uri::port() const { return impl_->port_; }
+
+std::string Uri::username() const {
+ auto userpass = TextRangeToView(impl_->uri_.userInfo);
+ auto sep_pos = userpass.find_first_of(':');
+ if (sep_pos == util::string_view::npos) {
+ return UriUnescape(userpass);
+ } else {
+ return UriUnescape(userpass.substr(0, sep_pos));
+ }
+}
+
+std::string Uri::password() const {
+ auto userpass = TextRangeToView(impl_->uri_.userInfo);
+ auto sep_pos = userpass.find_first_of(':');
+ if (sep_pos == util::string_view::npos) {
+ return std::string();
+ } else {
+ return UriUnescape(userpass.substr(sep_pos + 1));
+ }
+}
+
+std::string Uri::path() const {
+ const auto& segments = impl_->path_segments_;
+
+ bool must_prepend_slash = impl_->is_absolute_path_;
+#ifdef _WIN32
+ // On Windows, "file:///C:/foo" should have path "C:/foo", not "/C:/foo",
+ // despite it being absolute.
+ // (see https://tools.ietf.org/html/rfc8089#page-13)
+ if (impl_->is_absolute_path_ && impl_->is_file_uri_ && segments.size() > 0 &&
+ IsDriveSpec(segments[0])) {
+ must_prepend_slash = false;
+ }
+#endif
+
+ std::stringstream ss;
+ if (must_prepend_slash) {
+ ss << "/";
+ }
+ bool first = true;
+ for (const auto& seg : segments) {
+ if (!first) {
+ ss << "/";
+ }
+ first = false;
+ ss << seg;
+ }
+ return std::move(ss).str();
+}
+
+std::string Uri::query_string() const { return TextRangeToString(impl_->uri_.query); }
+
+Result<std::vector<std::pair<std::string, std::string>>> Uri::query_items() const {
+ const auto& query = impl_->uri_.query;
+ UriQueryListA* query_list;
+ int item_count;
+ std::vector<std::pair<std::string, std::string>> items;
+
+ if (query.first == nullptr) {
+ return items;
+ }
+ if (uriDissectQueryMallocA(&query_list, &item_count, query.first, query.afterLast) !=
+ URI_SUCCESS) {
+ return Status::Invalid("Cannot parse query string: '", query_string(), "'");
+ }
+ std::unique_ptr<UriQueryListA, decltype(&uriFreeQueryListA)> query_guard(
+ query_list, uriFreeQueryListA);
+
+ items.reserve(item_count);
+ while (query_list != nullptr) {
+ if (query_list->value != nullptr) {
+ items.emplace_back(query_list->key, query_list->value);
+ } else {
+ items.emplace_back(query_list->key, "");
+ }
+ query_list = query_list->next;
+ }
+ return items;
+}
+
+const std::string& Uri::ToString() const { return impl_->string_rep_; }
+
+Status Uri::Parse(const std::string& uri_string) {
+ impl_->Reset();
+
+ const auto& s = impl_->KeepString(uri_string);
+ impl_->string_rep_ = s;
+ const char* error_pos;
+ if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) !=
+ URI_SUCCESS) {
+ return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+ }
+
+ const auto scheme = TextRangeToView(impl_->uri_.scheme);
+ if (scheme.empty()) {
+ return Status::Invalid("URI has empty scheme: '", uri_string, "'");
+ }
+ impl_->is_file_uri_ = (scheme == "file");
+
+ // Gather path segments
+ auto path_seg = impl_->uri_.pathHead;
+ while (path_seg != nullptr) {
+ impl_->path_segments_.push_back(TextRangeToView(path_seg->text));
+ path_seg = path_seg->next;
+ }
+
+ // Decide whether URI path is absolute
+ impl_->is_absolute_path_ = false;
+ if (impl_->uri_.absolutePath == URI_TRUE) {
+ impl_->is_absolute_path_ = true;
+ } else if (has_host() && impl_->path_segments_.size() > 0) {
+ // When there's a host (even empty), uriparser considers the path relative.
+ // Several URI parsers for Python all consider it absolute, though.
+ // For example, the path for "file:///tmp/foo" is "/tmp/foo", not "tmp/foo".
+ // Similarly, the path for "file://localhost/" is "/".
+ // However, the path for "file://localhost" is "".
+ impl_->is_absolute_path_ = true;
+ }
+#ifdef _WIN32
+ // There's an exception on Windows: "file:/C:foo/bar" is relative.
+ if (impl_->is_file_uri_ && impl_->path_segments_.size() > 0) {
+ const auto& first_seg = impl_->path_segments_[0];
+ if (IsDriveSpec(first_seg) && (first_seg.length() >= 3 && first_seg[2] != '/')) {
+ impl_->is_absolute_path_ = false;
+ }
+ }
+#endif
+
+ if (impl_->is_file_uri_ && !impl_->is_absolute_path_) {
+ return Status::Invalid("File URI cannot be relative: '", uri_string, "'");
+ }
+
+ // Parse port number
+ auto port_text = TextRangeToView(impl_->uri_.portText);
+ if (port_text.size()) {
+ uint16_t port_num;
+ if (!ParseValue<UInt16Type>(port_text.data(), port_text.size(), &port_num)) {
+ return Status::Invalid("Invalid port number '", port_text, "' in URI '", uri_string,
+ "'");
+ }
+ impl_->port_ = port_num;
+ }
+
+ return Status::OK();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
index b4ffbb04dec..c190d381601 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
@@ -1,97 +1,97 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "arrow/type_fwd.h"
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/type_fwd.h"
#include "arrow/util/string_view.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace internal {
-
-/// \brief A parsed URI
-class ARROW_EXPORT Uri {
- public:
- Uri();
- ~Uri();
- Uri(Uri&&);
- Uri& operator=(Uri&&);
-
- // XXX Should we use util::string_view instead? These functions are
- // not performance-critical.
-
- /// The URI scheme, such as "http", or the empty string if the URI has no
- /// explicit scheme.
- std::string scheme() const;
-
- /// Whether the URI has an explicit host name. This may return true if
- /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns
- /// false is the URI has not host component at all (e.g. "file:/tmp/foo").
- bool has_host() const;
- /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the empty
- /// string is the URI does not have a host component.
- std::string host() const;
-
- /// The URI port number, as a string such as "80", or the empty string is the URI
- /// does not have a port number component.
- std::string port_text() const;
- /// The URI port parsed as an integer, or -1 if the URI does not have a port
- /// number component.
- int32_t port() const;
-
- /// The username specified in the URI.
- std::string username() const;
- /// The password specified in the URI.
- std::string password() const;
-
- /// The URI path component.
- std::string path() const;
-
- /// The URI query string
- std::string query_string() const;
-
- /// The URI query items
- ///
- /// Note this API doesn't allow differentiating between an empty value
- /// and a missing value, such in "a&b=1" vs. "a=&b=1".
- Result<std::vector<std::pair<std::string, std::string>>> query_items() const;
-
- /// Get the string representation of this URI.
- const std::string& ToString() const;
-
- /// Factory function to parse a URI from its string representation.
- Status Parse(const std::string& uri_string);
-
- private:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
-
-/// Percent-encode the input string, for use e.g. as a URI query parameter.
-ARROW_EXPORT
-std::string UriEscape(const std::string& s);
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A parsed URI
+class ARROW_EXPORT Uri {
+ public:
+ Uri();
+ ~Uri();
+ Uri(Uri&&);
+ Uri& operator=(Uri&&);
+
+ // XXX Should we use util::string_view instead? These functions are
+ // not performance-critical.
+
+ /// The URI scheme, such as "http", or the empty string if the URI has no
+ /// explicit scheme.
+ std::string scheme() const;
+
+ /// Whether the URI has an explicit host name. This may return true if
+ /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns
+ /// false is the URI has not host component at all (e.g. "file:/tmp/foo").
+ bool has_host() const;
+ /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the empty
+ /// string is the URI does not have a host component.
+ std::string host() const;
+
+ /// The URI port number, as a string such as "80", or the empty string is the URI
+ /// does not have a port number component.
+ std::string port_text() const;
+ /// The URI port parsed as an integer, or -1 if the URI does not have a port
+ /// number component.
+ int32_t port() const;
+
+ /// The username specified in the URI.
+ std::string username() const;
+ /// The password specified in the URI.
+ std::string password() const;
+
+ /// The URI path component.
+ std::string path() const;
+
+ /// The URI query string
+ std::string query_string() const;
+
+ /// The URI query items
+ ///
+ /// Note this API doesn't allow differentiating between an empty value
+ /// and a missing value, such in "a&b=1" vs. "a=&b=1".
+ Result<std::vector<std::pair<std::string, std::string>>> query_items() const;
+
+ /// Get the string representation of this URI.
+ const std::string& ToString() const;
+
+ /// Factory function to parse a URI from its string representation.
+ Status Parse(const std::string& uri_string);
+
+ private:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+/// Percent-encode the input string, for use e.g. as a URI query parameter.
+ARROW_EXPORT
+std::string UriEscape(const std::string& s);
+
ARROW_EXPORT
std::string UriUnescape(const arrow::util::string_view s);
@@ -100,5 +100,5 @@ std::string UriUnescape(const arrow::util::string_view s);
ARROW_EXPORT
std::string UriEncodeHost(const std::string& host);
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
index 11394d2e64c..58fc7918987 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
@@ -1,160 +1,160 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <cstdint>
-#include <iterator>
-#include <mutex>
-#include <stdexcept>
-#include <utility>
-
-#include "arrow/result.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/utf8.h"
-#include "arrow/vendored/utfcpp/checked.h"
-
-// Can be defined by utfcpp
-#ifdef NOEXCEPT
-#undef NOEXCEPT
-#endif
-
-namespace arrow {
-namespace util {
-namespace internal {
-
-// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
-
-// clang-format off
-const uint8_t utf8_small_table[] = { // NOLINT
- // The first part of the table maps bytes to character classes that
- // to reduce the size of the transition table and create bitmasks.
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // NOLINT
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // NOLINT
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // NOLINT
- 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, // NOLINT
-
- // The second part is a transition table that maps a combination
- // of a state of the automaton and a character class to a state.
- // Character classes are between 0 and 11, states are multiples of 12.
- 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, // NOLINT
- 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, // NOLINT
- 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, // NOLINT
- 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, // NOLINT
- 12,36,12,12,12,12,12,12,12,12,12,12, // NOLINT
-};
-// clang-format on
-
-uint16_t utf8_large_table[9 * 256] = {0xffff};
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <iterator>
+#include <mutex>
+#include <stdexcept>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/utf8.h"
+#include "arrow/vendored/utfcpp/checked.h"
+
+// Can be defined by utfcpp
+#ifdef NOEXCEPT
+#undef NOEXCEPT
+#endif
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+// clang-format off
+const uint8_t utf8_small_table[] = { // NOLINT
+ // The first part of the table maps bytes to character classes that
+ // to reduce the size of the transition table and create bitmasks.
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // NOLINT
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // NOLINT
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // NOLINT
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // NOLINT
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, // NOLINT
+
+ // The second part is a transition table that maps a combination
+ // of a state of the automaton and a character class to a state.
+ // Character classes are between 0 and 11, states are multiples of 12.
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, // NOLINT
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, // NOLINT
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, // NOLINT
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, // NOLINT
+ 12,36,12,12,12,12,12,12,12,12,12,12, // NOLINT
+};
+// clang-format on
+
+uint16_t utf8_large_table[9 * 256] = {0xffff};
+
const uint8_t utf8_byte_size_table[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
-static void InitializeLargeTable() {
- for (uint32_t state = 0; state < 9; ++state) {
- for (uint32_t byte = 0; byte < 256; ++byte) {
- uint32_t byte_class = utf8_small_table[byte];
- uint8_t next_state = utf8_small_table[256 + state * 12 + byte_class] / 12;
- DCHECK_LT(next_state, 9);
- utf8_large_table[state * 256 + byte] = static_cast<uint16_t>(next_state * 256);
- }
- }
-}
-
-ARROW_EXPORT void CheckUTF8Initialized() {
- DCHECK_EQ(utf8_large_table[0], 0)
- << "InitializeUTF8() must be called before calling UTF8 routines";
-}
-
-} // namespace internal
-
-static std::once_flag utf8_initialized;
-
-void InitializeUTF8() {
- std::call_once(utf8_initialized, internal::InitializeLargeTable);
-}
-
-static const uint8_t kBOM[] = {0xEF, 0xBB, 0xBF};
-
-Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size) {
- int64_t i;
- for (i = 0; i < static_cast<int64_t>(sizeof(kBOM)); ++i) {
- if (size == 0) {
- if (i == 0) {
- // Empty string
- return data;
- } else {
- return Status::Invalid("UTF8 string too short (truncated byte order mark?)");
- }
- }
- if (data[i] != kBOM[i]) {
- // BOM not found
- return data;
- }
- --size;
- }
- // BOM found
- return data + i;
-}
-
-namespace {
-
-// Some platforms (such as old MinGWs) don't have the <codecvt> header,
-// so call into a vendored utf8 implementation instead.
-
-std::wstring UTF8ToWideStringInternal(const std::string& source) {
- std::wstring ws;
-#if WCHAR_MAX > 0xFFFF
- ::utf8::utf8to32(source.begin(), source.end(), std::back_inserter(ws));
-#else
- ::utf8::utf8to16(source.begin(), source.end(), std::back_inserter(ws));
-#endif
- return ws;
-}
-
-std::string WideStringToUTF8Internal(const std::wstring& source) {
- std::string s;
-#if WCHAR_MAX > 0xFFFF
- ::utf8::utf32to8(source.begin(), source.end(), std::back_inserter(s));
-#else
- ::utf8::utf16to8(source.begin(), source.end(), std::back_inserter(s));
-#endif
- return s;
-}
-
-} // namespace
-
-Result<std::wstring> UTF8ToWideString(const std::string& source) {
- try {
- return UTF8ToWideStringInternal(source);
- } catch (std::exception& e) {
- return Status::Invalid(e.what());
- }
-}
-
-ARROW_EXPORT Result<std::string> WideStringToUTF8(const std::wstring& source) {
- try {
- return WideStringToUTF8Internal(source);
- } catch (std::exception& e) {
- return Status::Invalid(e.what());
- }
-}
-
-} // namespace util
-} // namespace arrow
+static void InitializeLargeTable() {
+ for (uint32_t state = 0; state < 9; ++state) {
+ for (uint32_t byte = 0; byte < 256; ++byte) {
+ uint32_t byte_class = utf8_small_table[byte];
+ uint8_t next_state = utf8_small_table[256 + state * 12 + byte_class] / 12;
+ DCHECK_LT(next_state, 9);
+ utf8_large_table[state * 256 + byte] = static_cast<uint16_t>(next_state * 256);
+ }
+ }
+}
+
+ARROW_EXPORT void CheckUTF8Initialized() {
+ DCHECK_EQ(utf8_large_table[0], 0)
+ << "InitializeUTF8() must be called before calling UTF8 routines";
+}
+
+} // namespace internal
+
+static std::once_flag utf8_initialized;
+
+void InitializeUTF8() {
+ std::call_once(utf8_initialized, internal::InitializeLargeTable);
+}
+
+static const uint8_t kBOM[] = {0xEF, 0xBB, 0xBF};
+
+Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size) {
+ int64_t i;
+ for (i = 0; i < static_cast<int64_t>(sizeof(kBOM)); ++i) {
+ if (size == 0) {
+ if (i == 0) {
+ // Empty string
+ return data;
+ } else {
+ return Status::Invalid("UTF8 string too short (truncated byte order mark?)");
+ }
+ }
+ if (data[i] != kBOM[i]) {
+ // BOM not found
+ return data;
+ }
+ --size;
+ }
+ // BOM found
+ return data + i;
+}
+
+namespace {
+
+// Some platforms (such as old MinGWs) don't have the <codecvt> header,
+// so call into a vendored utf8 implementation instead.
+
+std::wstring UTF8ToWideStringInternal(const std::string& source) {
+ std::wstring ws;
+#if WCHAR_MAX > 0xFFFF
+ ::utf8::utf8to32(source.begin(), source.end(), std::back_inserter(ws));
+#else
+ ::utf8::utf8to16(source.begin(), source.end(), std::back_inserter(ws));
+#endif
+ return ws;
+}
+
+std::string WideStringToUTF8Internal(const std::wstring& source) {
+ std::string s;
+#if WCHAR_MAX > 0xFFFF
+ ::utf8::utf32to8(source.begin(), source.end(), std::back_inserter(s));
+#else
+ ::utf8::utf16to8(source.begin(), source.end(), std::back_inserter(s));
+#endif
+ return s;
+}
+
+} // namespace
+
+Result<std::wstring> UTF8ToWideString(const std::string& source) {
+ try {
+ return UTF8ToWideStringInternal(source);
+ } catch (std::exception& e) {
+ return Status::Invalid(e.what());
+ }
+}
+
+ARROW_EXPORT Result<std::string> WideStringToUTF8(const std::wstring& source) {
+ try {
+ return WideStringToUTF8Internal(source);
+ } catch (std::exception& e) {
+ return Status::Invalid(e.what());
+ }
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
index 0ec3538b95c..17dcd473e95 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
@@ -1,167 +1,167 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <string>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+
#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
#error #include <xsimd/xsimd.hpp>
#endif
-#include "arrow/type_fwd.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/simd.h"
-#include "arrow/util/string_view.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/simd.h"
+#include "arrow/util/string_view.h"
#include "arrow/util/ubsan.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-namespace util {
-
-// Convert a UTF8 string to a wstring (either UTF16 or UTF32, depending
-// on the wchar_t width).
-ARROW_EXPORT Result<std::wstring> UTF8ToWideString(const std::string& source);
-
-// Similarly, convert a wstring to a UTF8 string.
-ARROW_EXPORT Result<std::string> WideStringToUTF8(const std::wstring& source);
-
-namespace internal {
-
-// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
-
-// A compact state table allowing UTF8 decoding using two dependent
-// lookups per byte. The first lookup determines the character class
-// and the second lookup reads the next state.
-// In this table states are multiples of 12.
-ARROW_EXPORT extern const uint8_t utf8_small_table[256 + 9 * 12];
-
-// Success / reject states when looked up in the small table
-static constexpr uint8_t kUTF8DecodeAccept = 0;
-static constexpr uint8_t kUTF8DecodeReject = 12;
-
-// An expanded state table allowing transitions using a single lookup
-// at the expense of a larger memory footprint (but on non-random data,
-// not all the table will end up accessed and cached).
-// In this table states are multiples of 256.
-ARROW_EXPORT extern uint16_t utf8_large_table[9 * 256];
-
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+// Convert a UTF8 string to a wstring (either UTF16 or UTF32, depending
+// on the wchar_t width).
+ARROW_EXPORT Result<std::wstring> UTF8ToWideString(const std::string& source);
+
+// Similarly, convert a wstring to a UTF8 string.
+ARROW_EXPORT Result<std::string> WideStringToUTF8(const std::wstring& source);
+
+namespace internal {
+
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+// A compact state table allowing UTF8 decoding using two dependent
+// lookups per byte. The first lookup determines the character class
+// and the second lookup reads the next state.
+// In this table states are multiples of 12.
+ARROW_EXPORT extern const uint8_t utf8_small_table[256 + 9 * 12];
+
+// Success / reject states when looked up in the small table
+static constexpr uint8_t kUTF8DecodeAccept = 0;
+static constexpr uint8_t kUTF8DecodeReject = 12;
+
+// An expanded state table allowing transitions using a single lookup
+// at the expense of a larger memory footprint (but on non-random data,
+// not all the table will end up accessed and cached).
+// In this table states are multiples of 256.
+ARROW_EXPORT extern uint16_t utf8_large_table[9 * 256];
+
ARROW_EXPORT extern const uint8_t utf8_byte_size_table[16];
-// Success / reject states when looked up in the large table
-static constexpr uint16_t kUTF8ValidateAccept = 0;
-static constexpr uint16_t kUTF8ValidateReject = 256;
-
-static inline uint8_t DecodeOneUTF8Byte(uint8_t byte, uint8_t state, uint32_t* codep) {
- uint8_t type = utf8_small_table[byte];
-
- *codep = (state != kUTF8DecodeAccept) ? (byte & 0x3fu) | (*codep << 6)
- : (0xff >> type) & (byte);
-
- state = utf8_small_table[256 + state + type];
- return state;
-}
-
-static inline uint16_t ValidateOneUTF8Byte(uint8_t byte, uint16_t state) {
- return utf8_large_table[state + byte];
-}
-
-ARROW_EXPORT void CheckUTF8Initialized();
-
-} // namespace internal
-
-// This function needs to be called before doing UTF8 validation.
-ARROW_EXPORT void InitializeUTF8();
-
-inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
- static constexpr uint64_t high_bits_64 = 0x8080808080808080ULL;
+// Success / reject states when looked up in the large table
+static constexpr uint16_t kUTF8ValidateAccept = 0;
+static constexpr uint16_t kUTF8ValidateReject = 256;
+
+static inline uint8_t DecodeOneUTF8Byte(uint8_t byte, uint8_t state, uint32_t* codep) {
+ uint8_t type = utf8_small_table[byte];
+
+ *codep = (state != kUTF8DecodeAccept) ? (byte & 0x3fu) | (*codep << 6)
+ : (0xff >> type) & (byte);
+
+ state = utf8_small_table[256 + state + type];
+ return state;
+}
+
+static inline uint16_t ValidateOneUTF8Byte(uint8_t byte, uint16_t state) {
+ return utf8_large_table[state + byte];
+}
+
+ARROW_EXPORT void CheckUTF8Initialized();
+
+} // namespace internal
+
+// This function needs to be called before doing UTF8 validation.
+ARROW_EXPORT void InitializeUTF8();
+
+inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
+ static constexpr uint64_t high_bits_64 = 0x8080808080808080ULL;
static constexpr uint32_t high_bits_32 = 0x80808080UL;
static constexpr uint16_t high_bits_16 = 0x8080U;
static constexpr uint8_t high_bits_8 = 0x80U;
-
-#ifndef NDEBUG
- internal::CheckUTF8Initialized();
-#endif
-
- while (size >= 8) {
- // XXX This is doing an unaligned access. Contemporary architectures
- // (x86-64, AArch64, PPC64) support it natively and often have good
- // performance nevertheless.
+
+#ifndef NDEBUG
+ internal::CheckUTF8Initialized();
+#endif
+
+ while (size >= 8) {
+ // XXX This is doing an unaligned access. Contemporary architectures
+ // (x86-64, AArch64, PPC64) support it natively and often have good
+ // performance nevertheless.
uint64_t mask64 = SafeLoadAs<uint64_t>(data);
if (ARROW_PREDICT_TRUE((mask64 & high_bits_64) == 0)) {
- // 8 bytes of pure ASCII, move forward
- size -= 8;
- data += 8;
- continue;
- }
- // Non-ASCII run detected.
- // We process at least 4 bytes, to avoid too many spurious 64-bit reads
- // in case the non-ASCII bytes are at the end of the tested 64-bit word.
- // We also only check for rejection at the end since that state is stable
- // (once in reject state, we always remain in reject state).
- // It is guaranteed that size >= 8 when arriving here, which allows
- // us to avoid size checks.
- uint16_t state = internal::kUTF8ValidateAccept;
- // Byte 0
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- // Byte 1
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- // Byte 2
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- // Byte 3
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- // Byte 4
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- if (state == internal::kUTF8ValidateAccept) {
- continue; // Got full char, switch back to ASCII detection
- }
- // Byte 5
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- if (state == internal::kUTF8ValidateAccept) {
- continue; // Got full char, switch back to ASCII detection
- }
- // Byte 6
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- if (state == internal::kUTF8ValidateAccept) {
- continue; // Got full char, switch back to ASCII detection
- }
- // Byte 7
- state = internal::ValidateOneUTF8Byte(*data++, state);
- --size;
- if (state == internal::kUTF8ValidateAccept) {
- continue; // Got full char, switch back to ASCII detection
- }
- // kUTF8ValidateAccept not reached along 4 transitions has to mean a rejection
- assert(state == internal::kUTF8ValidateReject);
- return false;
- }
-
+ // 8 bytes of pure ASCII, move forward
+ size -= 8;
+ data += 8;
+ continue;
+ }
+ // Non-ASCII run detected.
+ // We process at least 4 bytes, to avoid too many spurious 64-bit reads
+ // in case the non-ASCII bytes are at the end of the tested 64-bit word.
+ // We also only check for rejection at the end since that state is stable
+ // (once in reject state, we always remain in reject state).
+ // It is guaranteed that size >= 8 when arriving here, which allows
+ // us to avoid size checks.
+ uint16_t state = internal::kUTF8ValidateAccept;
+ // Byte 0
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ // Byte 1
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ // Byte 2
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ // Byte 3
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ // Byte 4
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ if (state == internal::kUTF8ValidateAccept) {
+ continue; // Got full char, switch back to ASCII detection
+ }
+ // Byte 5
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ if (state == internal::kUTF8ValidateAccept) {
+ continue; // Got full char, switch back to ASCII detection
+ }
+ // Byte 6
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ if (state == internal::kUTF8ValidateAccept) {
+ continue; // Got full char, switch back to ASCII detection
+ }
+ // Byte 7
+ state = internal::ValidateOneUTF8Byte(*data++, state);
+ --size;
+ if (state == internal::kUTF8ValidateAccept) {
+ continue; // Got full char, switch back to ASCII detection
+ }
+ // kUTF8ValidateAccept not reached along 4 transitions has to mean a rejection
+ assert(state == internal::kUTF8ValidateReject);
+ return false;
+ }
+
// Check if string tail is full ASCII (common case, fast)
if (size >= 4) {
uint32_t tail_mask = SafeLoadAs<uint32_t>(data + size - 4);
@@ -185,10 +185,10 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
}
// Fall back to UTF8 validation of tail string.
- // Note the state table is designed so that, once in the reject state,
- // we remain in that state until the end. So we needn't check for
- // rejection at each char (we don't gain much by short-circuiting here).
- uint16_t state = internal::kUTF8ValidateAccept;
+ // Note the state table is designed so that, once in the reject state,
+ // we remain in that state until the end. So we needn't check for
+ // rejection at each char (we don't gain much by short-circuiting here).
+ uint16_t state = internal::kUTF8ValidateAccept;
switch (size) {
case 7:
state = internal::ValidateOneUTF8Byte(data[size - 7], state);
@@ -206,95 +206,95 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
state = internal::ValidateOneUTF8Byte(data[size - 1], state);
default:
break;
- }
- return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept);
-}
-
-inline bool ValidateUTF8(const util::string_view& str) {
- const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
- const size_t length = str.size();
-
- return ValidateUTF8(data, length);
-}
-
-inline bool ValidateAsciiSw(const uint8_t* data, int64_t len) {
- uint8_t orall = 0;
-
- if (len >= 16) {
- uint64_t or1 = 0, or2 = 0;
- const uint8_t* data2 = data + 8;
-
- do {
- or1 |= *(const uint64_t*)data;
- or2 |= *(const uint64_t*)data2;
- data += 16;
- data2 += 16;
- len -= 16;
- } while (len >= 16);
-
- orall = !((or1 | or2) & 0x8080808080808080ULL) - 1;
- }
-
- while (len--) {
- orall |= *data++;
- }
-
- if (orall < 0x80) {
- return true;
- } else {
- return false;
- }
-}
-
+ }
+ return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept);
+}
+
+inline bool ValidateUTF8(const util::string_view& str) {
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
+ const size_t length = str.size();
+
+ return ValidateUTF8(data, length);
+}
+
+inline bool ValidateAsciiSw(const uint8_t* data, int64_t len) {
+ uint8_t orall = 0;
+
+ if (len >= 16) {
+ uint64_t or1 = 0, or2 = 0;
+ const uint8_t* data2 = data + 8;
+
+ do {
+ or1 |= *(const uint64_t*)data;
+ or2 |= *(const uint64_t*)data2;
+ data += 16;
+ data2 += 16;
+ len -= 16;
+ } while (len >= 16);
+
+ orall = !((or1 | or2) & 0x8080808080808080ULL) - 1;
+ }
+
+ while (len--) {
+ orall |= *data++;
+ }
+
+ if (orall < 0x80) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-inline bool ValidateAsciiSimd(const uint8_t* data, int64_t len) {
+inline bool ValidateAsciiSimd(const uint8_t* data, int64_t len) {
using simd_batch = xsimd::batch<int8_t, 16>;
-
- if (len >= 32) {
+
+ if (len >= 32) {
const simd_batch zero(static_cast<int8_t>(0));
- const uint8_t* data2 = data + 16;
+ const uint8_t* data2 = data + 16;
simd_batch or1 = zero, or2 = zero;
-
- while (len >= 32) {
+
+ while (len >= 32) {
or1 |= simd_batch(reinterpret_cast<const int8_t*>(data), xsimd::unaligned_mode{});
or2 |= simd_batch(reinterpret_cast<const int8_t*>(data2), xsimd::unaligned_mode{});
- data += 32;
- data2 += 32;
- len -= 32;
- }
-
+ data += 32;
+ data2 += 32;
+ len -= 32;
+ }
+
// To test for upper bit in all bytes, test whether any of them is negative
or1 |= or2;
if (xsimd::any(or1 < zero)) {
- return false;
- }
- }
-
- return ValidateAsciiSw(data, len);
-}
-#endif // ARROW_HAVE_SSE4_2
-
-inline bool ValidateAscii(const uint8_t* data, int64_t len) {
-#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
- return ValidateAsciiSimd(data, len);
-#else
- return ValidateAsciiSw(data, len);
-#endif
-}
-
-inline bool ValidateAscii(const util::string_view& str) {
- const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
- const size_t length = str.size();
-
- return ValidateAscii(data, length);
-}
-
-// Skip UTF8 byte order mark, if any.
-ARROW_EXPORT
-Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
-
-static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
-
+ return false;
+ }
+ }
+
+ return ValidateAsciiSw(data, len);
+}
+#endif // ARROW_HAVE_SSE4_2
+
+inline bool ValidateAscii(const uint8_t* data, int64_t len) {
+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
+ return ValidateAsciiSimd(data, len);
+#else
+ return ValidateAsciiSw(data, len);
+#endif
+}
+
+inline bool ValidateAscii(const util::string_view& str) {
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
+ const size_t length = str.size();
+
+ return ValidateAscii(data, length);
+}
+
+// Skip UTF8 byte order mark, if any.
+ARROW_EXPORT
+Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
+
+static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
+
// size of a valid UTF8 can be determined by looking at leading 4 bits of BYTE1
// utf8_byte_size_table[0..7] --> pure ascii chars --> 1B length
// utf8_byte_size_table[8..11] --> internal bytes --> 1B length
@@ -307,10 +307,10 @@ static inline uint8_t ValidUtf8CodepointByteSize(const uint8_t* codeunit) {
return internal::utf8_byte_size_table[*codeunit >> 4];
}
-static inline bool Utf8IsContinuation(const uint8_t codeunit) {
- return (codeunit & 0xC0) == 0x80; // upper two bits should be 10
-}
-
+static inline bool Utf8IsContinuation(const uint8_t codeunit) {
+ return (codeunit & 0xC0) == 0x80; // upper two bits should be 10
+}
+
static inline bool Utf8Is2ByteStart(const uint8_t codeunit) {
return (codeunit & 0xE0) == 0xC0; // upper three bits should be 110
}
@@ -323,74 +323,74 @@ static inline bool Utf8Is4ByteStart(const uint8_t codeunit) {
return (codeunit & 0xF8) == 0xF0; // upper five bits should be 11110
}
-static inline uint8_t* UTF8Encode(uint8_t* str, uint32_t codepoint) {
- if (codepoint < 0x80) {
- *str++ = codepoint;
- } else if (codepoint < 0x800) {
- *str++ = 0xC0 + (codepoint >> 6);
- *str++ = 0x80 + (codepoint & 0x3F);
- } else if (codepoint < 0x10000) {
- *str++ = 0xE0 + (codepoint >> 12);
- *str++ = 0x80 + ((codepoint >> 6) & 0x3F);
- *str++ = 0x80 + (codepoint & 0x3F);
- } else {
- // Assume proper codepoints are always passed
- assert(codepoint < kMaxUnicodeCodepoint);
- *str++ = 0xF0 + (codepoint >> 18);
- *str++ = 0x80 + ((codepoint >> 12) & 0x3F);
- *str++ = 0x80 + ((codepoint >> 6) & 0x3F);
- *str++ = 0x80 + (codepoint & 0x3F);
- }
- return str;
-}
-
-static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) {
- const uint8_t* str = *data;
+static inline uint8_t* UTF8Encode(uint8_t* str, uint32_t codepoint) {
+ if (codepoint < 0x80) {
+ *str++ = codepoint;
+ } else if (codepoint < 0x800) {
+ *str++ = 0xC0 + (codepoint >> 6);
+ *str++ = 0x80 + (codepoint & 0x3F);
+ } else if (codepoint < 0x10000) {
+ *str++ = 0xE0 + (codepoint >> 12);
+ *str++ = 0x80 + ((codepoint >> 6) & 0x3F);
+ *str++ = 0x80 + (codepoint & 0x3F);
+ } else {
+ // Assume proper codepoints are always passed
+ assert(codepoint < kMaxUnicodeCodepoint);
+ *str++ = 0xF0 + (codepoint >> 18);
+ *str++ = 0x80 + ((codepoint >> 12) & 0x3F);
+ *str++ = 0x80 + ((codepoint >> 6) & 0x3F);
+ *str++ = 0x80 + (codepoint & 0x3F);
+ }
+ return str;
+}
+
+static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) {
+ const uint8_t* str = *data;
if (*str < 0x80) { // ascii
- *codepoint = *str++;
- } else if (ARROW_PREDICT_FALSE(*str < 0xC0)) { // invalid non-ascii char
- return false;
- } else if (*str < 0xE0) {
- uint8_t code_unit_1 = (*str++) & 0x1F; // take last 5 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
- *codepoint = (code_unit_1 << 6) + code_unit_2;
- } else if (*str < 0xF0) {
- uint8_t code_unit_1 = (*str++) & 0x0F; // take last 4 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_3 = (*str++) & 0x3F; // take last 6 bits
- *codepoint = (code_unit_1 << 12) + (code_unit_2 << 6) + code_unit_3;
- } else if (*str < 0xF8) {
- uint8_t code_unit_1 = (*str++) & 0x07; // take last 3 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_3 = (*str++) & 0x3F; // take last 6 bits
- if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
- return false;
- }
- uint8_t code_unit_4 = (*str++) & 0x3F; // take last 6 bits
- *codepoint =
- (code_unit_1 << 18) + (code_unit_2 << 12) + (code_unit_3 << 6) + code_unit_4;
- } else { // invalid non-ascii char
- return false;
- }
- *data = str;
- return true;
-}
-
+ *codepoint = *str++;
+ } else if (ARROW_PREDICT_FALSE(*str < 0xC0)) { // invalid non-ascii char
+ return false;
+ } else if (*str < 0xE0) {
+ uint8_t code_unit_1 = (*str++) & 0x1F; // take last 5 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
+ *codepoint = (code_unit_1 << 6) + code_unit_2;
+ } else if (*str < 0xF0) {
+ uint8_t code_unit_1 = (*str++) & 0x0F; // take last 4 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_3 = (*str++) & 0x3F; // take last 6 bits
+ *codepoint = (code_unit_1 << 12) + (code_unit_2 << 6) + code_unit_3;
+ } else if (*str < 0xF8) {
+ uint8_t code_unit_1 = (*str++) & 0x07; // take last 3 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_2 = (*str++) & 0x3F; // take last 6 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_3 = (*str++) & 0x3F; // take last 6 bits
+ if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+ return false;
+ }
+ uint8_t code_unit_4 = (*str++) & 0x3F; // take last 6 bits
+ *codepoint =
+ (code_unit_1 << 18) + (code_unit_2 << 12) + (code_unit_3 << 6) + code_unit_4;
+ } else { // invalid non-ascii char
+ return false;
+ }
+ *data = str;
+ return true;
+}
+
static inline bool UTF8DecodeReverse(const uint8_t** data, uint32_t* codepoint) {
const uint8_t* str = *data;
if (*str < 0x80) { // ascii
@@ -430,22 +430,22 @@ static inline bool UTF8DecodeReverse(const uint8_t** data, uint32_t* codepoint)
return true;
}
-template <class UnaryOperation>
-static inline bool UTF8Transform(const uint8_t* first, const uint8_t* last,
- uint8_t** destination, UnaryOperation&& unary_op) {
- const uint8_t* i = first;
- uint8_t* out = *destination;
- while (i < last) {
- uint32_t codepoint = 0;
- if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
- return false;
- }
- out = UTF8Encode(out, unary_op(codepoint));
- }
- *destination = out;
- return true;
-}
-
+template <class UnaryOperation>
+static inline bool UTF8Transform(const uint8_t* first, const uint8_t* last,
+ uint8_t** destination, UnaryOperation&& unary_op) {
+ const uint8_t* i = first;
+ uint8_t* out = *destination;
+ while (i < last) {
+ uint32_t codepoint = 0;
+ if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
+ return false;
+ }
+ out = UTF8Encode(out, unary_op(codepoint));
+ }
+ *destination = out;
+ return true;
+}
+
template <class Predicate>
static inline bool UTF8FindIf(const uint8_t* first, const uint8_t* last,
Predicate&& predicate, const uint8_t** position) {
@@ -537,25 +537,25 @@ static inline bool UTF8ForEach(const std::string& s, UnaryFunction&& f) {
std::forward<UnaryFunction>(f));
}
-template <class UnaryPredicate>
-static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* result,
- UnaryPredicate&& predicate) {
- const uint8_t* i = first;
- while (i < last) {
- uint32_t codepoint = 0;
- if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
- return false;
- }
-
- if (!predicate(codepoint)) {
- *result = false;
- return true;
- }
- }
- *result = true;
- return true;
-}
-
+template <class UnaryPredicate>
+static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* result,
+ UnaryPredicate&& predicate) {
+ const uint8_t* i = first;
+ while (i < last) {
+ uint32_t codepoint = 0;
+ if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
+ return false;
+ }
+
+ if (!predicate(codepoint)) {
+ *result = false;
+ return true;
+ }
+ }
+ *result = true;
+ return true;
+}
+
/// Count the number of codepoints in the given string (assuming it is valid UTF8).
static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) {
int64_t length = 0;
@@ -566,5 +566,5 @@ static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) {
return length;
}
-} // namespace util
-} // namespace arrow
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
index 3b147366636..231aba18b60 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
@@ -1,87 +1,87 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/util/value_parsing.h"
-
-#include <string>
-#include <utility>
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/value_parsing.h"
+
+#include <string>
+#include <utility>
+
#include "contrib/restricted/fast_float/include/fast_float/fast_float.h"
-
-namespace arrow {
-namespace internal {
-
-bool StringToFloat(const char* s, size_t length, float* out) {
+
+namespace arrow {
+namespace internal {
+
+bool StringToFloat(const char* s, size_t length, float* out) {
const auto res = fast_float::from_chars(s, s + length, *out);
return res.ec == std::errc() && res.ptr == s + length;
-}
-
-bool StringToFloat(const char* s, size_t length, double* out) {
+}
+
+bool StringToFloat(const char* s, size_t length, double* out) {
const auto res = fast_float::from_chars(s, s + length, *out);
return res.ec == std::errc() && res.ptr == s + length;
-}
-
-// ----------------------------------------------------------------------
-// strptime-like parsing
-
-namespace {
-
-class StrptimeTimestampParser : public TimestampParser {
- public:
- explicit StrptimeTimestampParser(std::string format) : format_(std::move(format)) {}
-
- bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
- int64_t* out) const override {
- return ParseTimestampStrptime(s, length, format_.c_str(),
- /*ignore_time_in_day=*/false,
- /*allow_trailing_chars=*/false, out_unit, out);
- }
-
- const char* kind() const override { return "strptime"; }
-
- const char* format() const override { return format_.c_str(); }
-
- private:
- std::string format_;
-};
-
-class ISO8601Parser : public TimestampParser {
- public:
- ISO8601Parser() {}
-
- bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
- int64_t* out) const override {
- return ParseTimestampISO8601(s, length, out_unit, out);
- }
-
- const char* kind() const override { return "iso8601"; }
-};
-
-} // namespace
-} // namespace internal
-
-const char* TimestampParser::format() const { return ""; }
-
-std::shared_ptr<TimestampParser> TimestampParser::MakeStrptime(std::string format) {
- return std::make_shared<internal::StrptimeTimestampParser>(std::move(format));
-}
-
-std::shared_ptr<TimestampParser> TimestampParser::MakeISO8601() {
- return std::make_shared<internal::ISO8601Parser>();
-}
-
-} // namespace arrow
+}
+
+// ----------------------------------------------------------------------
+// strptime-like parsing
+
+namespace {
+
+class StrptimeTimestampParser : public TimestampParser {
+ public:
+ explicit StrptimeTimestampParser(std::string format) : format_(std::move(format)) {}
+
+ bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
+ int64_t* out) const override {
+ return ParseTimestampStrptime(s, length, format_.c_str(),
+ /*ignore_time_in_day=*/false,
+ /*allow_trailing_chars=*/false, out_unit, out);
+ }
+
+ const char* kind() const override { return "strptime"; }
+
+ const char* format() const override { return format_.c_str(); }
+
+ private:
+ std::string format_;
+};
+
+class ISO8601Parser : public TimestampParser {
+ public:
+ ISO8601Parser() {}
+
+ bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
+ int64_t* out) const override {
+ return ParseTimestampISO8601(s, length, out_unit, out);
+ }
+
+ const char* kind() const override { return "iso8601"; }
+};
+
+} // namespace
+} // namespace internal
+
+const char* TimestampParser::format() const { return ""; }
+
+std::shared_ptr<TimestampParser> TimestampParser::MakeStrptime(std::string format) {
+ return std::make_shared<internal::StrptimeTimestampParser>(std::move(format));
+}
+
+std::shared_ptr<TimestampParser> TimestampParser::MakeISO8601() {
+ return std::make_shared<internal::ISO8601Parser>();
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
index 00295d1b51f..8924dd39b9f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
@@ -1,491 +1,491 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This is a private header for string-to-number parsing utilities
-
-#pragma once
-
-#include <cassert>
-#include <chrono>
-#include <cstddef>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <string>
-#include <type_traits>
-
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/time.h"
-#include "arrow/util/visibility.h"
-#include "arrow/vendored/datetime.h"
-#include "arrow/vendored/strptime.h"
-
-namespace arrow {
-
-/// \brief A virtual string to timestamp parser
-class ARROW_EXPORT TimestampParser {
- public:
- virtual ~TimestampParser() = default;
-
- virtual bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
- int64_t* out) const = 0;
-
- virtual const char* kind() const = 0;
-
- virtual const char* format() const;
-
- /// \brief Create a TimestampParser that recognizes strptime-like format strings
- static std::shared_ptr<TimestampParser> MakeStrptime(std::string format);
-
- /// \brief Create a TimestampParser that recognizes (locale-agnostic) ISO8601
- /// timestamps
- static std::shared_ptr<TimestampParser> MakeISO8601();
-};
-
-namespace internal {
-
-/// \brief The entry point for conversion from strings.
-///
-/// Specializations of StringConverter for `ARROW_TYPE` must define:
-/// - A default constructible member type `value_type` which will be yielded on a
-/// successful parse.
-/// - The static member function `Convert`, callable with signature
-/// `(const ARROW_TYPE& t, const char* s, size_t length, value_type* out)`.
-/// `Convert` returns truthy for successful parses and assigns the parsed values to
-/// `*out`. Parameters required for parsing (for example a timestamp's TimeUnit)
-/// are acquired from the type parameter `t`.
-template <typename ARROW_TYPE, typename Enable = void>
-struct StringConverter;
-
-template <typename T>
-struct is_parseable {
- template <typename U, typename = typename StringConverter<U>::value_type>
- static std::true_type Test(U*);
-
- template <typename U>
- static std::false_type Test(...);
-
- static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
-};
-
-template <typename T, typename R = void>
-using enable_if_parseable = enable_if_t<is_parseable<T>::value, R>;
-
-template <>
-struct StringConverter<BooleanType> {
- using value_type = bool;
-
- static bool Convert(const BooleanType&, const char* s, size_t length, value_type* out) {
- if (length == 1) {
- // "0" or "1"?
- if (s[0] == '0') {
- *out = false;
- return true;
- }
- if (s[0] == '1') {
- *out = true;
- return true;
- }
- return false;
- }
- if (length == 4) {
- // "true"?
- *out = true;
- return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
- (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E'));
- }
- if (length == 5) {
- // "false"?
- *out = false;
- return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
- (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
- (s[4] == 'e' || s[4] == 'E'));
- }
- return false;
- }
-};
-
-// Ideas for faster float parsing:
-// - http://rapidjson.org/md_doc_internals.html#ParsingDouble
-// - https://github.com/google/double-conversion [used here]
-// - https://github.com/achan001/dtoa-fast
-
-ARROW_EXPORT
-bool StringToFloat(const char* s, size_t length, float* out);
-
-ARROW_EXPORT
-bool StringToFloat(const char* s, size_t length, double* out);
-
-template <>
-struct StringConverter<FloatType> {
- using value_type = float;
-
- static bool Convert(const FloatType&, const char* s, size_t length, value_type* out) {
- return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
- }
-};
-
-template <>
-struct StringConverter<DoubleType> {
- using value_type = double;
-
- static bool Convert(const DoubleType&, const char* s, size_t length, value_type* out) {
- return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
- }
-};
-
-// NOTE: HalfFloatType would require a half<->float conversion library
-
-inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c - '0'); }
-
-#define PARSE_UNSIGNED_ITERATION(C_TYPE) \
- if (length > 0) { \
- uint8_t digit = ParseDecimalDigit(*s++); \
- result = static_cast<C_TYPE>(result * 10U); \
- length--; \
- if (ARROW_PREDICT_FALSE(digit > 9U)) { \
- /* Non-digit */ \
- return false; \
- } \
- result = static_cast<C_TYPE>(result + digit); \
- } else { \
- break; \
- }
-
-#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \
- if (length > 0) { \
- if (ARROW_PREDICT_FALSE(result > std::numeric_limits<C_TYPE>::max() / 10U)) { \
- /* Overflow */ \
- return false; \
- } \
- uint8_t digit = ParseDecimalDigit(*s++); \
- result = static_cast<C_TYPE>(result * 10U); \
- C_TYPE new_result = static_cast<C_TYPE>(result + digit); \
- if (ARROW_PREDICT_FALSE(--length > 0)) { \
- /* Too many digits */ \
- return false; \
- } \
- if (ARROW_PREDICT_FALSE(digit > 9U)) { \
- /* Non-digit */ \
- return false; \
- } \
- if (ARROW_PREDICT_FALSE(new_result < result)) { \
- /* Overflow */ \
- return false; \
- } \
- result = new_result; \
- }
-
-inline bool ParseUnsigned(const char* s, size_t length, uint8_t* out) {
- uint8_t result = 0;
-
- do {
- PARSE_UNSIGNED_ITERATION(uint8_t);
- PARSE_UNSIGNED_ITERATION(uint8_t);
- PARSE_UNSIGNED_ITERATION_LAST(uint8_t);
- } while (false);
- *out = result;
- return true;
-}
-
-inline bool ParseUnsigned(const char* s, size_t length, uint16_t* out) {
- uint16_t result = 0;
- do {
- PARSE_UNSIGNED_ITERATION(uint16_t);
- PARSE_UNSIGNED_ITERATION(uint16_t);
- PARSE_UNSIGNED_ITERATION(uint16_t);
- PARSE_UNSIGNED_ITERATION(uint16_t);
- PARSE_UNSIGNED_ITERATION_LAST(uint16_t);
- } while (false);
- *out = result;
- return true;
-}
-
-inline bool ParseUnsigned(const char* s, size_t length, uint32_t* out) {
- uint32_t result = 0;
- do {
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
-
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
- PARSE_UNSIGNED_ITERATION(uint32_t);
-
- PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
- } while (false);
- *out = result;
- return true;
-}
-
-inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
- uint64_t result = 0;
- do {
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
-
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
-
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
-
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
- PARSE_UNSIGNED_ITERATION(uint64_t);
-
- PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
- } while (false);
- *out = result;
- return true;
-}
-
-#undef PARSE_UNSIGNED_ITERATION
-#undef PARSE_UNSIGNED_ITERATION_LAST
-
-template <class ARROW_TYPE>
-struct StringToUnsignedIntConverterMixin {
- using value_type = typename ARROW_TYPE::c_type;
-
- static bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
- if (ARROW_PREDICT_FALSE(length == 0)) {
- return false;
- }
- // Skip leading zeros
- while (length > 0 && *s == '0') {
- length--;
- s++;
- }
- return ParseUnsigned(s, length, out);
- }
-};
-
-template <>
-struct StringConverter<UInt8Type> : public StringToUnsignedIntConverterMixin<UInt8Type> {
- using StringToUnsignedIntConverterMixin<UInt8Type>::StringToUnsignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<UInt16Type>
- : public StringToUnsignedIntConverterMixin<UInt16Type> {
- using StringToUnsignedIntConverterMixin<UInt16Type>::StringToUnsignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<UInt32Type>
- : public StringToUnsignedIntConverterMixin<UInt32Type> {
- using StringToUnsignedIntConverterMixin<UInt32Type>::StringToUnsignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<UInt64Type>
- : public StringToUnsignedIntConverterMixin<UInt64Type> {
- using StringToUnsignedIntConverterMixin<UInt64Type>::StringToUnsignedIntConverterMixin;
-};
-
-template <class ARROW_TYPE>
-struct StringToSignedIntConverterMixin {
- using value_type = typename ARROW_TYPE::c_type;
- using unsigned_type = typename std::make_unsigned<value_type>::type;
-
- static bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
- static constexpr auto max_positive =
- static_cast<unsigned_type>(std::numeric_limits<value_type>::max());
- // Assuming two's complement
- static constexpr unsigned_type max_negative = max_positive + 1;
- bool negative = false;
- unsigned_type unsigned_value = 0;
-
- if (ARROW_PREDICT_FALSE(length == 0)) {
- return false;
- }
- if (*s == '-') {
- negative = true;
- s++;
- if (--length == 0) {
- return false;
- }
- }
- // Skip leading zeros
- while (length > 0 && *s == '0') {
- length--;
- s++;
- }
- if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) {
- return false;
- }
- if (negative) {
- if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) {
- return false;
- }
- // To avoid both compiler warnings (with unsigned negation)
- // and undefined behaviour (with signed negation overflow),
- // use the expanded formula for 2's complement negation.
- *out = static_cast<value_type>(~unsigned_value + 1);
- } else {
- if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) {
- return false;
- }
- *out = static_cast<value_type>(unsigned_value);
- }
- return true;
- }
-};
-
-template <>
-struct StringConverter<Int8Type> : public StringToSignedIntConverterMixin<Int8Type> {
- using StringToSignedIntConverterMixin<Int8Type>::StringToSignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<Int16Type> : public StringToSignedIntConverterMixin<Int16Type> {
- using StringToSignedIntConverterMixin<Int16Type>::StringToSignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<Int32Type> : public StringToSignedIntConverterMixin<Int32Type> {
- using StringToSignedIntConverterMixin<Int32Type>::StringToSignedIntConverterMixin;
-};
-
-template <>
-struct StringConverter<Int64Type> : public StringToSignedIntConverterMixin<Int64Type> {
- using StringToSignedIntConverterMixin<Int64Type>::StringToSignedIntConverterMixin;
-};
-
-namespace detail {
-
-// Inline-able ISO-8601 parser
-
-using ts_type = TimestampType::c_type;
-
-template <typename Duration>
-static inline bool ParseYYYY_MM_DD(const char* s, Duration* since_epoch) {
- uint16_t year = 0;
- uint8_t month = 0;
- uint8_t day = 0;
- if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 4, &year))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 5, 2, &month))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 8, 2, &day))) {
- return false;
- }
- arrow_vendored::date::year_month_day ymd{arrow_vendored::date::year{year},
- arrow_vendored::date::month{month},
- arrow_vendored::date::day{day}};
- if (ARROW_PREDICT_FALSE(!ymd.ok())) return false;
-
- *since_epoch = std::chrono::duration_cast<Duration>(
- arrow_vendored::date::sys_days{ymd}.time_since_epoch());
- return true;
-}
-
-template <typename Duration>
-static inline bool ParseHH(const char* s, Duration* out) {
- uint8_t hours = 0;
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(hours >= 24)) {
- return false;
- }
- *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours));
- return true;
-}
-
-template <typename Duration>
-static inline bool ParseHH_MM(const char* s, Duration* out) {
- uint8_t hours = 0;
- uint8_t minutes = 0;
- if (ARROW_PREDICT_FALSE(s[2] != ':')) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(hours >= 24)) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(minutes >= 60)) {
- return false;
- }
- *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
- std::chrono::minutes(minutes));
- return true;
-}
-
-template <typename Duration>
-static inline bool ParseHH_MM_SS(const char* s, Duration* out) {
- uint8_t hours = 0;
- uint8_t minutes = 0;
- uint8_t seconds = 0;
- if (ARROW_PREDICT_FALSE(s[2] != ':') || ARROW_PREDICT_FALSE(s[5] != ':')) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 6, 2, &seconds))) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(hours >= 24)) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(minutes >= 60)) {
- return false;
- }
- if (ARROW_PREDICT_FALSE(seconds >= 60)) {
- return false;
- }
- *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
- std::chrono::minutes(minutes) +
- std::chrono::seconds(seconds));
- return true;
-}
-
-static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit,
- uint32_t* out) {
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This is a private header for string-to-number parsing utilities
+
+#pragma once
+
+#include <cassert>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/time.h"
+#include "arrow/util/visibility.h"
+#include "arrow/vendored/datetime.h"
+#include "arrow/vendored/strptime.h"
+
+namespace arrow {
+
+/// \brief A virtual string to timestamp parser
+class ARROW_EXPORT TimestampParser {
+ public:
+ virtual ~TimestampParser() = default;
+
+ virtual bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
+ int64_t* out) const = 0;
+
+ virtual const char* kind() const = 0;
+
+ virtual const char* format() const;
+
+ /// \brief Create a TimestampParser that recognizes strptime-like format strings
+ static std::shared_ptr<TimestampParser> MakeStrptime(std::string format);
+
+ /// \brief Create a TimestampParser that recognizes (locale-agnostic) ISO8601
+ /// timestamps
+ static std::shared_ptr<TimestampParser> MakeISO8601();
+};
+
+namespace internal {
+
+/// \brief The entry point for conversion from strings.
+///
+/// Specializations of StringConverter for `ARROW_TYPE` must define:
+/// - A default constructible member type `value_type` which will be yielded on a
+/// successful parse.
+/// - The static member function `Convert`, callable with signature
+/// `(const ARROW_TYPE& t, const char* s, size_t length, value_type* out)`.
+/// `Convert` returns truthy for successful parses and assigns the parsed values to
+/// `*out`. Parameters required for parsing (for example a timestamp's TimeUnit)
+/// are acquired from the type parameter `t`.
+template <typename ARROW_TYPE, typename Enable = void>
+struct StringConverter;
+
+template <typename T>
+struct is_parseable {
+ template <typename U, typename = typename StringConverter<U>::value_type>
+ static std::true_type Test(U*);
+
+ template <typename U>
+ static std::false_type Test(...);
+
+ static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
+};
+
+template <typename T, typename R = void>
+using enable_if_parseable = enable_if_t<is_parseable<T>::value, R>;
+
+template <>
+struct StringConverter<BooleanType> {
+ using value_type = bool;
+
+ static bool Convert(const BooleanType&, const char* s, size_t length, value_type* out) {
+ if (length == 1) {
+ // "0" or "1"?
+ if (s[0] == '0') {
+ *out = false;
+ return true;
+ }
+ if (s[0] == '1') {
+ *out = true;
+ return true;
+ }
+ return false;
+ }
+ if (length == 4) {
+ // "true"?
+ *out = true;
+ return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
+ (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E'));
+ }
+ if (length == 5) {
+ // "false"?
+ *out = false;
+ return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
+ (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
+ (s[4] == 'e' || s[4] == 'E'));
+ }
+ return false;
+ }
+};
+
+// Ideas for faster float parsing:
+// - http://rapidjson.org/md_doc_internals.html#ParsingDouble
+// - https://github.com/google/double-conversion [used here]
+// - https://github.com/achan001/dtoa-fast
+
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, float* out);
+
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, double* out);
+
+template <>
+struct StringConverter<FloatType> {
+ using value_type = float;
+
+ static bool Convert(const FloatType&, const char* s, size_t length, value_type* out) {
+ return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
+ }
+};
+
+template <>
+struct StringConverter<DoubleType> {
+ using value_type = double;
+
+ static bool Convert(const DoubleType&, const char* s, size_t length, value_type* out) {
+ return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
+ }
+};
+
+// NOTE: HalfFloatType would require a half<->float conversion library
+
+inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c - '0'); }
+
+#define PARSE_UNSIGNED_ITERATION(C_TYPE) \
+ if (length > 0) { \
+ uint8_t digit = ParseDecimalDigit(*s++); \
+ result = static_cast<C_TYPE>(result * 10U); \
+ length--; \
+ if (ARROW_PREDICT_FALSE(digit > 9U)) { \
+ /* Non-digit */ \
+ return false; \
+ } \
+ result = static_cast<C_TYPE>(result + digit); \
+ } else { \
+ break; \
+ }
+
+#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \
+ if (length > 0) { \
+ if (ARROW_PREDICT_FALSE(result > std::numeric_limits<C_TYPE>::max() / 10U)) { \
+ /* Overflow */ \
+ return false; \
+ } \
+ uint8_t digit = ParseDecimalDigit(*s++); \
+ result = static_cast<C_TYPE>(result * 10U); \
+ C_TYPE new_result = static_cast<C_TYPE>(result + digit); \
+ if (ARROW_PREDICT_FALSE(--length > 0)) { \
+ /* Too many digits */ \
+ return false; \
+ } \
+ if (ARROW_PREDICT_FALSE(digit > 9U)) { \
+ /* Non-digit */ \
+ return false; \
+ } \
+ if (ARROW_PREDICT_FALSE(new_result < result)) { \
+ /* Overflow */ \
+ return false; \
+ } \
+ result = new_result; \
+ }
+
+inline bool ParseUnsigned(const char* s, size_t length, uint8_t* out) {
+ uint8_t result = 0;
+
+ do {
+ PARSE_UNSIGNED_ITERATION(uint8_t);
+ PARSE_UNSIGNED_ITERATION(uint8_t);
+ PARSE_UNSIGNED_ITERATION_LAST(uint8_t);
+ } while (false);
+ *out = result;
+ return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint16_t* out) {
+ uint16_t result = 0;
+ do {
+ PARSE_UNSIGNED_ITERATION(uint16_t);
+ PARSE_UNSIGNED_ITERATION(uint16_t);
+ PARSE_UNSIGNED_ITERATION(uint16_t);
+ PARSE_UNSIGNED_ITERATION(uint16_t);
+ PARSE_UNSIGNED_ITERATION_LAST(uint16_t);
+ } while (false);
+ *out = result;
+ return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint32_t* out) {
+ uint32_t result = 0;
+ do {
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+ PARSE_UNSIGNED_ITERATION(uint32_t);
+
+ PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
+ } while (false);
+ *out = result;
+ return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
+ uint64_t result = 0;
+ do {
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+ PARSE_UNSIGNED_ITERATION(uint64_t);
+
+ PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
+ } while (false);
+ *out = result;
+ return true;
+}
+
+#undef PARSE_UNSIGNED_ITERATION
+#undef PARSE_UNSIGNED_ITERATION_LAST
+
+template <class ARROW_TYPE>
+struct StringToUnsignedIntConverterMixin {
+ using value_type = typename ARROW_TYPE::c_type;
+
+ static bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
+ if (ARROW_PREDICT_FALSE(length == 0)) {
+ return false;
+ }
+ // Skip leading zeros
+ while (length > 0 && *s == '0') {
+ length--;
+ s++;
+ }
+ return ParseUnsigned(s, length, out);
+ }
+};
+
+template <>
+struct StringConverter<UInt8Type> : public StringToUnsignedIntConverterMixin<UInt8Type> {
+ using StringToUnsignedIntConverterMixin<UInt8Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt16Type>
+ : public StringToUnsignedIntConverterMixin<UInt16Type> {
+ using StringToUnsignedIntConverterMixin<UInt16Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt32Type>
+ : public StringToUnsignedIntConverterMixin<UInt32Type> {
+ using StringToUnsignedIntConverterMixin<UInt32Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt64Type>
+ : public StringToUnsignedIntConverterMixin<UInt64Type> {
+ using StringToUnsignedIntConverterMixin<UInt64Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <class ARROW_TYPE>
+struct StringToSignedIntConverterMixin {
+ using value_type = typename ARROW_TYPE::c_type;
+ using unsigned_type = typename std::make_unsigned<value_type>::type;
+
+ static bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
+ static constexpr auto max_positive =
+ static_cast<unsigned_type>(std::numeric_limits<value_type>::max());
+ // Assuming two's complement
+ static constexpr unsigned_type max_negative = max_positive + 1;
+ bool negative = false;
+ unsigned_type unsigned_value = 0;
+
+ if (ARROW_PREDICT_FALSE(length == 0)) {
+ return false;
+ }
+ if (*s == '-') {
+ negative = true;
+ s++;
+ if (--length == 0) {
+ return false;
+ }
+ }
+ // Skip leading zeros
+ while (length > 0 && *s == '0') {
+ length--;
+ s++;
+ }
+ if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) {
+ return false;
+ }
+ if (negative) {
+ if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) {
+ return false;
+ }
+ // To avoid both compiler warnings (with unsigned negation)
+ // and undefined behaviour (with signed negation overflow),
+ // use the expanded formula for 2's complement negation.
+ *out = static_cast<value_type>(~unsigned_value + 1);
+ } else {
+ if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) {
+ return false;
+ }
+ *out = static_cast<value_type>(unsigned_value);
+ }
+ return true;
+ }
+};
+
+template <>
+struct StringConverter<Int8Type> : public StringToSignedIntConverterMixin<Int8Type> {
+ using StringToSignedIntConverterMixin<Int8Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int16Type> : public StringToSignedIntConverterMixin<Int16Type> {
+ using StringToSignedIntConverterMixin<Int16Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int32Type> : public StringToSignedIntConverterMixin<Int32Type> {
+ using StringToSignedIntConverterMixin<Int32Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int64Type> : public StringToSignedIntConverterMixin<Int64Type> {
+ using StringToSignedIntConverterMixin<Int64Type>::StringToSignedIntConverterMixin;
+};
+
+namespace detail {
+
+// Inline-able ISO-8601 parser
+
+using ts_type = TimestampType::c_type;
+
+template <typename Duration>
+static inline bool ParseYYYY_MM_DD(const char* s, Duration* since_epoch) {
+ uint16_t year = 0;
+ uint8_t month = 0;
+ uint8_t day = 0;
+ if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 4, &year))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 5, 2, &month))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 8, 2, &day))) {
+ return false;
+ }
+ arrow_vendored::date::year_month_day ymd{arrow_vendored::date::year{year},
+ arrow_vendored::date::month{month},
+ arrow_vendored::date::day{day}};
+ if (ARROW_PREDICT_FALSE(!ymd.ok())) return false;
+
+ *since_epoch = std::chrono::duration_cast<Duration>(
+ arrow_vendored::date::sys_days{ymd}.time_since_epoch());
+ return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH(const char* s, Duration* out) {
+ uint8_t hours = 0;
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(hours >= 24)) {
+ return false;
+ }
+ *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours));
+ return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH_MM(const char* s, Duration* out) {
+ uint8_t hours = 0;
+ uint8_t minutes = 0;
+ if (ARROW_PREDICT_FALSE(s[2] != ':')) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(hours >= 24)) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(minutes >= 60)) {
+ return false;
+ }
+ *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
+ std::chrono::minutes(minutes));
+ return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH_MM_SS(const char* s, Duration* out) {
+ uint8_t hours = 0;
+ uint8_t minutes = 0;
+ uint8_t seconds = 0;
+ if (ARROW_PREDICT_FALSE(s[2] != ':') || ARROW_PREDICT_FALSE(s[5] != ':')) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 6, 2, &seconds))) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(hours >= 24)) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(minutes >= 60)) {
+ return false;
+ }
+ if (ARROW_PREDICT_FALSE(seconds >= 60)) {
+ return false;
+ }
+ *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
+ std::chrono::minutes(minutes) +
+ std::chrono::seconds(seconds));
+ return true;
+}
+
+static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit,
+ uint32_t* out) {
// The decimal point has been peeled off at this point
// Fail if number of decimal places provided exceeds what the unit can hold.
@@ -500,7 +500,7 @@ static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type
if (length < 3) {
omitted = 3 - length;
}
- break;
+ break;
case TimeUnit::MICRO:
if (ARROW_PREDICT_FALSE(length > 6)) {
return false;
@@ -508,7 +508,7 @@ static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type
if (length < 6) {
omitted = 6 - length;
}
- break;
+ break;
case TimeUnit::NANO:
if (ARROW_PREDICT_FALSE(length > 9)) {
return false;
@@ -516,11 +516,11 @@ static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type
if (length < 9) {
omitted = 9 - length;
}
- break;
- default:
- return false;
- }
-
+ break;
+ default:
+ return false;
+ }
+
if (ARROW_PREDICT_TRUE(omitted == 0)) {
return ParseUnsigned(s, length, out);
} else {
@@ -560,67 +560,67 @@ static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type
} else {
return false;
}
- }
-}
-
-} // namespace detail
-
-static inline bool ParseTimestampISO8601(const char* s, size_t length,
- TimeUnit::type unit,
- TimestampType::c_type* out) {
- using seconds_type = std::chrono::duration<TimestampType::c_type>;
-
- // We allow the following formats for all units:
- // - "YYYY-MM-DD"
+ }
+}
+
+} // namespace detail
+
+static inline bool ParseTimestampISO8601(const char* s, size_t length,
+ TimeUnit::type unit,
+ TimestampType::c_type* out) {
+ using seconds_type = std::chrono::duration<TimestampType::c_type>;
+
+ // We allow the following formats for all units:
+ // - "YYYY-MM-DD"
// - "YYYY-MM-DD[ T]hhZ?"
// - "YYYY-MM-DD[ T]hh:mmZ?"
// - "YYYY-MM-DD[ T]hh:mm:ssZ?"
- //
+ //
// We allow the following formats for unit == MILLI, MICRO, or NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?"
- //
+ //
// We allow the following formats for unit == MICRO, or NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?"
- //
+ //
// We allow the following formats for unit == NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?"
+ //
+ // UTC is always assumed, and the DataType's timezone is ignored.
//
- // UTC is always assumed, and the DataType's timezone is ignored.
- //
-
- if (ARROW_PREDICT_FALSE(length < 10)) return false;
-
- seconds_type seconds_since_epoch;
- if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
- return false;
- }
-
- if (length == 10) {
- *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
- return true;
- }
-
- if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
- return false;
- }
-
- if (s[length - 1] == 'Z') {
- --length;
- }
-
- seconds_type seconds_since_midnight;
- switch (length) {
- case 13: // YYYY-MM-DD[ T]hh
- if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) {
- return false;
- }
- break;
- case 16: // YYYY-MM-DD[ T]hh:mm
- if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) {
- return false;
- }
- break;
- case 19: // YYYY-MM-DD[ T]hh:mm:ss
+
+ if (ARROW_PREDICT_FALSE(length < 10)) return false;
+
+ seconds_type seconds_since_epoch;
+ if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
+ return false;
+ }
+
+ if (length == 10) {
+ *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
+ return true;
+ }
+
+ if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
+ return false;
+ }
+
+ if (s[length - 1] == 'Z') {
+ --length;
+ }
+
+ seconds_type seconds_since_midnight;
+ switch (length) {
+ case 13: // YYYY-MM-DD[ T]hh
+ if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) {
+ return false;
+ }
+ break;
+ case 16: // YYYY-MM-DD[ T]hh:mm
+ if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) {
+ return false;
+ }
+ break;
+ case 19: // YYYY-MM-DD[ T]hh:mm:ss
case 21: // YYYY-MM-DD[ T]hh:mm:ss.s
case 22: // YYYY-MM-DD[ T]hh:mm:ss.ss
case 23: // YYYY-MM-DD[ T]hh:mm:ss.sss
@@ -630,151 +630,151 @@ static inline bool ParseTimestampISO8601(const char* s, size_t length,
case 27: // YYYY-MM-DD[ T]hh:mm:ss.sssssss
case 28: // YYYY-MM-DD[ T]hh:mm:ss.ssssssss
case 29: // YYYY-MM-DD[ T]hh:mm:ss.sssssssss
- if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
- return false;
- }
- break;
- default:
- return false;
- }
-
- seconds_since_epoch += seconds_since_midnight;
-
- if (length <= 19) {
- *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
- return true;
- }
-
+ if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ seconds_since_epoch += seconds_since_midnight;
+
+ if (length <= 19) {
+ *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
+ return true;
+ }
+
if (ARROW_PREDICT_FALSE(s[19] != '.')) {
return false;
}
- uint32_t subseconds = 0;
- if (ARROW_PREDICT_FALSE(
+ uint32_t subseconds = 0;
+ if (ARROW_PREDICT_FALSE(
!detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) {
- return false;
- }
-
- *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds;
- return true;
-}
-
-/// \brief Returns time since the UNIX epoch in the requested unit
-static inline bool ParseTimestampStrptime(const char* buf, size_t length,
- const char* format, bool ignore_time_in_day,
- bool allow_trailing_chars, TimeUnit::type unit,
- int64_t* out) {
- // NOTE: strptime() is more than 10x faster than arrow_vendored::date::parse().
- // The buffer may not be nul-terminated
- std::string clean_copy(buf, length);
- struct tm result;
- memset(&result, 0, sizeof(struct tm));
-#ifdef _WIN32
- char* ret = arrow_strptime(clean_copy.c_str(), format, &result);
-#else
- char* ret = strptime(clean_copy.c_str(), format, &result);
-#endif
- if (ret == NULLPTR) {
- return false;
- }
- if (!allow_trailing_chars && static_cast<size_t>(ret - clean_copy.c_str()) != length) {
- return false;
- }
- // ignore the time part
- arrow_vendored::date::sys_seconds secs =
- arrow_vendored::date::sys_days(arrow_vendored::date::year(result.tm_year + 1900) /
- (result.tm_mon + 1) / result.tm_mday);
- if (!ignore_time_in_day) {
- secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) +
- std::chrono::seconds(result.tm_sec));
- }
- *out = util::CastSecondsToUnit(unit, secs.time_since_epoch().count());
- return true;
-}
-
-template <>
-struct StringConverter<TimestampType> {
- using value_type = int64_t;
-
- static bool Convert(const TimestampType& type, const char* s, size_t length,
- value_type* out) {
- return ParseTimestampISO8601(s, length, type.unit(), out);
- }
-};
-
-template <>
-struct StringConverter<DurationType>
- : public StringToSignedIntConverterMixin<DurationType> {
- using StringToSignedIntConverterMixin<DurationType>::StringToSignedIntConverterMixin;
-};
-
-template <typename DATE_TYPE>
-struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
- using value_type = typename DATE_TYPE::c_type;
-
- using duration_type =
- typename std::conditional<std::is_same<DATE_TYPE, Date32Type>::value,
- arrow_vendored::date::days,
- std::chrono::milliseconds>::type;
-
- static bool Convert(const DATE_TYPE& type, const char* s, size_t length,
- value_type* out) {
- if (length != 10) return false;
-
- duration_type since_epoch;
- if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &since_epoch))) {
- return false;
- }
-
- *out = static_cast<value_type>(since_epoch.count());
- return true;
- }
-};
-
-template <typename TIME_TYPE>
-struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
- using value_type = typename TIME_TYPE::c_type;
-
- static bool Convert(const TIME_TYPE& type, const char* s, size_t length,
- value_type* out) {
- if (length < 8) return false;
- auto unit = type.unit();
-
- std::chrono::seconds since_midnight;
- if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
- return false;
- }
-
- *out = static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
-
- if (length == 8) {
- return true;
- }
-
- uint32_t subseconds_count = 0;
- if (ARROW_PREDICT_FALSE(
+ return false;
+ }
+
+ *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds;
+ return true;
+}
+
+/// \brief Returns time since the UNIX epoch in the requested unit
+static inline bool ParseTimestampStrptime(const char* buf, size_t length,
+ const char* format, bool ignore_time_in_day,
+ bool allow_trailing_chars, TimeUnit::type unit,
+ int64_t* out) {
+ // NOTE: strptime() is more than 10x faster than arrow_vendored::date::parse().
+ // The buffer may not be nul-terminated
+ std::string clean_copy(buf, length);
+ struct tm result;
+ memset(&result, 0, sizeof(struct tm));
+#ifdef _WIN32
+ char* ret = arrow_strptime(clean_copy.c_str(), format, &result);
+#else
+ char* ret = strptime(clean_copy.c_str(), format, &result);
+#endif
+ if (ret == NULLPTR) {
+ return false;
+ }
+ if (!allow_trailing_chars && static_cast<size_t>(ret - clean_copy.c_str()) != length) {
+ return false;
+ }
+ // ignore the time part
+ arrow_vendored::date::sys_seconds secs =
+ arrow_vendored::date::sys_days(arrow_vendored::date::year(result.tm_year + 1900) /
+ (result.tm_mon + 1) / result.tm_mday);
+ if (!ignore_time_in_day) {
+ secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) +
+ std::chrono::seconds(result.tm_sec));
+ }
+ *out = util::CastSecondsToUnit(unit, secs.time_since_epoch().count());
+ return true;
+}
+
+template <>
+struct StringConverter<TimestampType> {
+ using value_type = int64_t;
+
+ static bool Convert(const TimestampType& type, const char* s, size_t length,
+ value_type* out) {
+ return ParseTimestampISO8601(s, length, type.unit(), out);
+ }
+};
+
+template <>
+struct StringConverter<DurationType>
+ : public StringToSignedIntConverterMixin<DurationType> {
+ using StringToSignedIntConverterMixin<DurationType>::StringToSignedIntConverterMixin;
+};
+
+template <typename DATE_TYPE>
+struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
+ using value_type = typename DATE_TYPE::c_type;
+
+ using duration_type =
+ typename std::conditional<std::is_same<DATE_TYPE, Date32Type>::value,
+ arrow_vendored::date::days,
+ std::chrono::milliseconds>::type;
+
+ static bool Convert(const DATE_TYPE& type, const char* s, size_t length,
+ value_type* out) {
+ if (length != 10) return false;
+
+ duration_type since_epoch;
+ if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &since_epoch))) {
+ return false;
+ }
+
+ *out = static_cast<value_type>(since_epoch.count());
+ return true;
+ }
+};
+
+template <typename TIME_TYPE>
+struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
+ using value_type = typename TIME_TYPE::c_type;
+
+ static bool Convert(const TIME_TYPE& type, const char* s, size_t length,
+ value_type* out) {
+ if (length < 8) return false;
+ auto unit = type.unit();
+
+ std::chrono::seconds since_midnight;
+ if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
+ return false;
+ }
+
+ *out = static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
+
+ if (length == 8) {
+ return true;
+ }
+
+ uint32_t subseconds_count = 0;
+ if (ARROW_PREDICT_FALSE(
!detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
- return false;
- }
-
- *out += subseconds_count;
- return true;
- }
-};
-
-/// \brief Convenience wrappers around internal::StringConverter.
-template <typename T>
-bool ParseValue(const T& type, const char* s, size_t length,
- typename StringConverter<T>::value_type* out) {
- return StringConverter<T>::Convert(type, s, length, out);
-}
-
-template <typename T>
-enable_if_parameter_free<T, bool> ParseValue(
- const char* s, size_t length, typename StringConverter<T>::value_type* out) {
- static T type;
- return StringConverter<T>::Convert(type, s, length, out);
-}
-
-} // namespace internal
-} // namespace arrow
+ return false;
+ }
+
+ *out += subseconds_count;
+ return true;
+ }
+};
+
+/// \brief Convenience wrappers around internal::StringConverter.
+template <typename T>
+bool ParseValue(const T& type, const char* s, size_t length,
+ typename StringConverter<T>::value_type* out) {
+ return StringConverter<T>::Convert(type, s, length, out);
+}
+
+template <typename T>
+enable_if_parameter_free<T, bool> ParseValue(
+ const char* s, size_t length, typename StringConverter<T>::value_type* out) {
+ static T type;
+ return StringConverter<T>::Convert(type, s, length, out);
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
index b4b0d8f6f31..42f119a0319 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
@@ -1,33 +1,33 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <cstddef>
#include <exception>
#include <type_traits>
#include <utility>
-
+
#include "arrow/util/macros.h"
#include "arrow/util/type_traits.h"
-namespace arrow {
-namespace util {
-
+namespace arrow {
+namespace util {
+
/// \brief a std::variant-like discriminated union
///
/// Simplifications from std::variant:
@@ -58,7 +58,7 @@ namespace util {
/// which is more conformant with our code style.
template <typename... T>
class Variant;
-
+
namespace detail {
template <typename T, typename = void>
@@ -435,5 +435,5 @@ bool holds_alternative(const Variant<T...>& v) {
return v.template get<U>();
}
-} // namespace util
-} // namespace arrow
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
index 041bdb424a7..f4302eac1e7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
@@ -1,81 +1,81 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
#include <algorithm>
-#include <utility>
-#include <vector>
-
+#include <utility>
+#include <vector>
+
#include "arrow/result.h"
#include "arrow/util/algorithm.h"
#include "arrow/util/functional.h"
-#include "arrow/util/logging.h"
-
-namespace arrow {
-namespace internal {
-
-template <typename T>
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename T>
std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
- DCHECK(!values.empty());
- DCHECK_LT(index, values.size());
- std::vector<T> out;
- out.reserve(values.size() - 1);
- for (size_t i = 0; i < index; ++i) {
- out.push_back(values[i]);
- }
- for (size_t i = index + 1; i < values.size(); ++i) {
- out.push_back(values[i]);
- }
- return out;
-}
-
-template <typename T>
+ DCHECK(!values.empty());
+ DCHECK_LT(index, values.size());
+ std::vector<T> out;
+ out.reserve(values.size() - 1);
+ for (size_t i = 0; i < index; ++i) {
+ out.push_back(values[i]);
+ }
+ for (size_t i = index + 1; i < values.size(); ++i) {
+ out.push_back(values[i]);
+ }
+ return out;
+}
+
+template <typename T>
std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
- DCHECK_LE(index, values.size());
- std::vector<T> out;
- out.reserve(values.size() + 1);
- for (size_t i = 0; i < index; ++i) {
- out.push_back(values[i]);
- }
- out.emplace_back(std::move(new_element));
- for (size_t i = index; i < values.size(); ++i) {
- out.push_back(values[i]);
- }
- return out;
-}
-
-template <typename T>
+ DCHECK_LE(index, values.size());
+ std::vector<T> out;
+ out.reserve(values.size() + 1);
+ for (size_t i = 0; i < index; ++i) {
+ out.push_back(values[i]);
+ }
+ out.emplace_back(std::move(new_element));
+ for (size_t i = index; i < values.size(); ++i) {
+ out.push_back(values[i]);
+ }
+ return out;
+}
+
+template <typename T>
std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
- DCHECK_LE(index, values.size());
- std::vector<T> out;
- out.reserve(values.size());
- for (size_t i = 0; i < index; ++i) {
- out.push_back(values[i]);
- }
- out.emplace_back(std::move(new_element));
- for (size_t i = index + 1; i < values.size(); ++i) {
- out.push_back(values[i]);
- }
- return out;
-}
-
+ DCHECK_LE(index, values.size());
+ std::vector<T> out;
+ out.reserve(values.size());
+ for (size_t i = 0; i < index; ++i) {
+ out.push_back(values[i]);
+ }
+ out.emplace_back(std::move(new_element));
+ for (size_t i = index + 1; i < values.size(); ++i) {
+ out.push_back(values[i]);
+ }
+ return out;
+}
+
template <typename T, typename Predicate>
std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) {
auto new_end =
@@ -168,5 +168,5 @@ Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) {
return std::move(out);
}
-} // namespace internal
-} // namespace arrow
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/visibility.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/visibility.h
index dd9ac45e9bb..04ffd42db4a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/visibility.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/visibility.h
@@ -1,45 +1,45 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_STATIC
-#define ARROW_EXPORT
-#elif defined(ARROW_EXPORTING)
-#define ARROW_EXPORT __declspec(dllexport)
-#else
-#define ARROW_EXPORT __declspec(dllimport)
-#endif
-
-#define ARROW_NO_EXPORT
-#define ARROW_FORCE_INLINE __forceinline
-#else // Not Windows
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT __attribute__((visibility("hidden")))
-#define ARROW_FORCE_INLINE
-#endif
-#endif // Non-Windows
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_STATIC
+#define ARROW_EXPORT
+#elif defined(ARROW_EXPORTING)
+#define ARROW_EXPORT __declspec(dllexport)
+#else
+#define ARROW_EXPORT __declspec(dllimport)
+#endif
+
+#define ARROW_NO_EXPORT
+#define ARROW_FORCE_INLINE __forceinline
+#else // Not Windows
+#ifndef ARROW_EXPORT
+#define ARROW_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef ARROW_NO_EXPORT
+#define ARROW_NO_EXPORT __attribute__((visibility("hidden")))
+#define ARROW_FORCE_INLINE
+#endif
+#endif // Non-Windows
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_compatibility.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_compatibility.h
index 64a2772c41c..0ba5d588c6f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_compatibility.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_compatibility.h
@@ -1,42 +1,42 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#ifdef _WIN32
-
-// Windows defines min and max macros that mess up std::min/max
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-
-#define WIN32_LEAN_AND_MEAN
-
-// Set Windows 7 as a conservative minimum for Apache Arrow
-#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
-#undef _WIN32_WINNT
-#endif
-#ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x601
-#endif
-
-#include <winsock2.h>
-#include <windows.h>
-
-#include "arrow/util/windows_fixup.h"
-
-#endif // _WIN32
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef _WIN32
+
+// Windows defines min and max macros that mess up std::min/max
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+
+// Set Windows 7 as a conservative minimum for Apache Arrow
+#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
+#undef _WIN32_WINNT
+#endif
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x601
+#endif
+
+#include <winsock2.h>
+#include <windows.h>
+
+#include "arrow/util/windows_fixup.h"
+
+#endif // _WIN32
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
index 2949ac4ab76..8692706c19a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
@@ -1,24 +1,24 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This header needs to be included multiple times.
-
-#ifdef _WIN32
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This header needs to be included multiple times.
+
+#ifdef _WIN32
+
#ifdef max
#undef max
#endif
@@ -26,27 +26,27 @@
#undef min
#endif
-// The Windows API defines macros from *File resolving to either
-// *FileA or *FileW. Need to undo them.
-#ifdef CopyFile
-#undef CopyFile
-#endif
-#ifdef CreateFile
-#undef CreateFile
-#endif
-#ifdef DeleteFile
-#undef DeleteFile
-#endif
-
-// Other annoying Windows macro definitions...
-#ifdef IN
-#undef IN
-#endif
-#ifdef OUT
-#undef OUT
-#endif
-
-// Note that we can't undefine OPTIONAL, because it can be used in other
-// Windows headers...
-
-#endif // _WIN32
+// The Windows API defines macros from *File resolving to either
+// *FileA or *FileW. Need to undo them.
+#ifdef CopyFile
+#undef CopyFile
+#endif
+#ifdef CreateFile
+#undef CreateFile
+#endif
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+
+// Other annoying Windows macro definitions...
+#ifdef IN
+#undef IN
+#endif
+#ifdef OUT
+#undef OUT
+#endif
+
+// Note that we can't undefine OPTIONAL, because it can be used in other
+// Windows headers...
+
+#endif // _WIN32
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/base64.cpp b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/base64.cpp
index 50ece19455e..8ce196d061a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/base64.cpp
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/base64.cpp
@@ -1,128 +1,128 @@
-/*
- base64.cpp and base64.h
-
- base64 encoding and decoding with C++.
-
- Version: 1.01.00
-
- Copyright (C) 2004-2017 René Nyffenegger
-
- This source code is provided 'as-is', without any express or implied
- warranty. In no event will the author be held liable for any damages
- arising from the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
-
- 1. The origin of this source code must not be misrepresented; you must not
- claim that you wrote the original source code. If you use this source code
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
-
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original source code.
-
- 3. This notice may not be removed or altered from any source distribution.
-
- René Nyffenegger rene.nyffenegger@adp-gmbh.ch
-
-*/
-
-#include "arrow/util/base64.h"
-#include <iostream>
-
-namespace arrow {
-namespace util {
-
-static const std::string base64_chars =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-
-
-static inline bool is_base64(unsigned char c) {
- return (isalnum(c) || (c == '+') || (c == '/'));
-}
-
-std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
- std::string ret;
- int i = 0;
- int j = 0;
- unsigned char char_array_3[3];
- unsigned char char_array_4[4];
-
- while (in_len--) {
- char_array_3[i++] = *(bytes_to_encode++);
- if (i == 3) {
- char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
- char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
- char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
- char_array_4[3] = char_array_3[2] & 0x3f;
-
- for(i = 0; (i <4) ; i++)
- ret += base64_chars[char_array_4[i]];
- i = 0;
- }
- }
-
- if (i)
- {
- for(j = i; j < 3; j++)
- char_array_3[j] = '\0';
-
- char_array_4[0] = ( char_array_3[0] & 0xfc) >> 2;
- char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
- char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
-
- for (j = 0; (j < i + 1); j++)
- ret += base64_chars[char_array_4[j]];
-
- while((i++ < 3))
- ret += '=';
-
- }
-
- return ret;
-
-}
-
-std::string base64_decode(std::string const& encoded_string) {
- size_t in_len = encoded_string.size();
- int i = 0;
- int j = 0;
- int in_ = 0;
- unsigned char char_array_4[4], char_array_3[3];
- std::string ret;
-
- while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
- char_array_4[i++] = encoded_string[in_]; in_++;
- if (i ==4) {
- for (i = 0; i <4; i++)
- char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff;
-
- char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4);
- char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
- char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
-
- for (i = 0; (i < 3); i++)
- ret += char_array_3[i];
- i = 0;
- }
- }
-
- if (i) {
- for (j = 0; j < i; j++)
- char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff;
-
- char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
- char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
-
- for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
- }
-
- return ret;
-}
-
-} // namespace util
-} // namespace arrow
+/*
+ base64.cpp and base64.h
+
+ base64 encoding and decoding with C++.
+
+ Version: 1.01.00
+
+ Copyright (C) 2004-2017 René Nyffenegger
+
+ This source code is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this source code must not be misrepresented; you must not
+ claim that you wrote the original source code. If you use this source code
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original source code.
+
+ 3. This notice may not be removed or altered from any source distribution.
+
+ René Nyffenegger rene.nyffenegger@adp-gmbh.ch
+
+*/
+
+#include "arrow/util/base64.h"
+#include <iostream>
+
+namespace arrow {
+namespace util {
+
+static const std::string base64_chars =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+
+static inline bool is_base64(unsigned char c) {
+ return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
+ std::string ret;
+ int i = 0;
+ int j = 0;
+ unsigned char char_array_3[3];
+ unsigned char char_array_4[4];
+
+ while (in_len--) {
+ char_array_3[i++] = *(bytes_to_encode++);
+ if (i == 3) {
+ char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+ char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+ char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+ char_array_4[3] = char_array_3[2] & 0x3f;
+
+ for(i = 0; (i <4) ; i++)
+ ret += base64_chars[char_array_4[i]];
+ i = 0;
+ }
+ }
+
+ if (i)
+ {
+ for(j = i; j < 3; j++)
+ char_array_3[j] = '\0';
+
+ char_array_4[0] = ( char_array_3[0] & 0xfc) >> 2;
+ char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+ char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+
+ for (j = 0; (j < i + 1); j++)
+ ret += base64_chars[char_array_4[j]];
+
+ while((i++ < 3))
+ ret += '=';
+
+ }
+
+ return ret;
+
+}
+
+std::string base64_decode(std::string const& encoded_string) {
+ size_t in_len = encoded_string.size();
+ int i = 0;
+ int j = 0;
+ int in_ = 0;
+ unsigned char char_array_4[4], char_array_3[3];
+ std::string ret;
+
+ while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
+ char_array_4[i++] = encoded_string[in_]; in_++;
+ if (i ==4) {
+ for (i = 0; i <4; i++)
+ char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff;
+
+ char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4);
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+ for (i = 0; (i < 3); i++)
+ ret += char_array_3[i];
+ i = 0;
+ }
+ }
+
+ if (i) {
+ for (j = 0; j < i; j++)
+ char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff;
+
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+
+ for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
+ }
+
+ return ret;
+}
+
+} // namespace util
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime.h
index e437cdcbc2d..0c3c42a2923 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime.h
@@ -1,26 +1,26 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/vendored/datetime/date.h" // IWYU pragma: export
-#include "arrow/vendored/datetime/tz.h" // IWYU pragma: export
-
-// Can be defined by date.h.
-#ifdef NOEXCEPT
-#undef NOEXCEPT
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/datetime/date.h" // IWYU pragma: export
+#include "arrow/vendored/datetime/tz.h" // IWYU pragma: export
+
+// Can be defined by date.h.
+#ifdef NOEXCEPT
+#undef NOEXCEPT
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/date.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/date.h
index 6d0455a354b..a9dac1ee4b3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/date.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/date.h
@@ -1,7949 +1,7949 @@
-#ifndef DATE_H
-#define DATE_H
-
-// The MIT License (MIT)
-//
-// Copyright (c) 2015, 2016, 2017 Howard Hinnant
-// Copyright (c) 2016 Adrian Colomitchi
-// Copyright (c) 2017 Florian Dang
-// Copyright (c) 2017 Paul Thompson
-// Copyright (c) 2018, 2019 Tomasz Kamiński
-// Copyright (c) 2019 Jiangang Zhuang
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// Our apologies. When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
-
-#ifndef HAS_STRING_VIEW
-# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
-# define HAS_STRING_VIEW 1
-# else
-# define HAS_STRING_VIEW 0
-# endif
-#endif // HAS_STRING_VIEW
-
-#include <cassert>
-#include <algorithm>
-#include <cctype>
-#include <chrono>
-#include <climits>
-#if !(__cplusplus >= 201402)
-# include <cmath>
-#endif
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-#include <ctime>
-#include <ios>
-#include <istream>
-#include <iterator>
-#include <limits>
-#include <locale>
-#include <memory>
-#include <ostream>
-#include <ratio>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#if HAS_STRING_VIEW
-# include <string_view>
-#endif
-#include <utility>
-#include <type_traits>
-
-#ifdef __GNUC__
-# pragma GCC diagnostic push
-# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 7)
-# pragma GCC diagnostic ignored "-Wpedantic"
-# endif
-# if __GNUC__ < 5
- // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
-# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-# endif
-#endif
-
-#ifdef _MSC_VER
-# pragma warning(push)
-// warning C4127: conditional expression is constant
-# pragma warning(disable : 4127)
-#endif
-
-namespace arrow_vendored
-{
-namespace date
-{
-
-//---------------+
-// Configuration |
-//---------------+
-
-#ifndef ONLY_C_LOCALE
-# define ONLY_C_LOCALE 0
-#endif
-
-#if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910))
-// MSVC
-# ifndef _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
-# define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
-# endif
-# if _MSC_VER < 1910
-// before VS2017
-# define CONSTDATA const
-# define CONSTCD11
-# define CONSTCD14
-# define NOEXCEPT _NOEXCEPT
-# else
-// VS2017 and later
-# define CONSTDATA constexpr const
-# define CONSTCD11 constexpr
-# define CONSTCD14 constexpr
-# define NOEXCEPT noexcept
-# endif
-
-#elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150
-// Oracle Developer Studio 12.6 and earlier
-# define CONSTDATA constexpr const
-# define CONSTCD11 constexpr
-# define CONSTCD14
-# define NOEXCEPT noexcept
-
-#elif __cplusplus >= 201402
-// C++14
-# define CONSTDATA constexpr const
-# define CONSTCD11 constexpr
-# define CONSTCD14 constexpr
-# define NOEXCEPT noexcept
-#else
-// C++11
-# define CONSTDATA constexpr const
-# define CONSTCD11 constexpr
-# define CONSTCD14
-# define NOEXCEPT noexcept
-#endif
-
-#ifndef HAS_UNCAUGHT_EXCEPTIONS
-# if __cplusplus > 201703 || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L)
-# define HAS_UNCAUGHT_EXCEPTIONS 1
-# else
-# define HAS_UNCAUGHT_EXCEPTIONS 0
-# endif
-#endif // HAS_UNCAUGHT_EXCEPTIONS
-
-#ifndef HAS_VOID_T
-# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
-# define HAS_VOID_T 1
-# else
-# define HAS_VOID_T 0
-# endif
-#endif // HAS_VOID_T
-
-// Protect from Oracle sun macro
-#ifdef sun
-# undef sun
-#endif
-
-// Work around for a NVCC compiler bug which causes it to fail
-// to compile std::ratio_{multiply,divide} when used directly
-// in the std::chrono::duration template instantiations below
-namespace detail {
-template <typename R1, typename R2>
-using ratio_multiply = decltype(std::ratio_multiply<R1, R2>{});
-
-template <typename R1, typename R2>
-using ratio_divide = decltype(std::ratio_divide<R1, R2>{});
-} // namespace detail
-
-//-----------+
-// Interface |
-//-----------+
-
-// durations
-
-using days = std::chrono::duration
- <int, detail::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
-
-using weeks = std::chrono::duration
- <int, detail::ratio_multiply<std::ratio<7>, days::period>>;
-
-using years = std::chrono::duration
- <int, detail::ratio_multiply<std::ratio<146097, 400>, days::period>>;
-
-using months = std::chrono::duration
- <int, detail::ratio_divide<years::period, std::ratio<12>>>;
-
-// time_point
-
-template <class Duration>
- using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
-
-using sys_days = sys_time<days>;
-using sys_seconds = sys_time<std::chrono::seconds>;
-
-struct local_t {};
-
-template <class Duration>
- using local_time = std::chrono::time_point<local_t, Duration>;
-
-using local_seconds = local_time<std::chrono::seconds>;
-using local_days = local_time<days>;
-
-// types
-
-struct last_spec
-{
- explicit last_spec() = default;
-};
-
-class day;
-class month;
-class year;
-
-class weekday;
-class weekday_indexed;
-class weekday_last;
-
-class month_day;
-class month_day_last;
-class month_weekday;
-class month_weekday_last;
-
-class year_month;
-
-class year_month_day;
-class year_month_day_last;
-class year_month_weekday;
-class year_month_weekday_last;
-
-// date composition operators
-
-CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT;
-CONSTCD11 year_month operator/(const year& y, int m) NOEXCEPT;
-
-CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT;
-CONSTCD11 month_day operator/(const day& d, int m) NOEXCEPT;
-CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT;
-CONSTCD11 month_day operator/(const month& m, int d) NOEXCEPT;
-CONSTCD11 month_day operator/(int m, const day& d) NOEXCEPT;
-
-CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT;
-CONSTCD11 month_day_last operator/(int m, last_spec) NOEXCEPT;
-CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT;
-CONSTCD11 month_day_last operator/(last_spec, int m) NOEXCEPT;
-
-CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT;
-CONSTCD11 month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT;
-CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT;
-
-CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT;
-
-CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const year_month& ym, int d) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT;
-CONSTCD11 year_month_day operator/(int y, const month_day& md) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const month_day& md, int y) NOEXCEPT;
-
-CONSTCD11
- year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT;
-CONSTCD11
- year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
-CONSTCD11
- year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT;
-CONSTCD11
- year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
-CONSTCD11
- year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator/(const year& y, const month_weekday& mwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator/(int y, const month_weekday& mwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator/(const month_weekday& mwd, const year& y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator/(const month_weekday& mwd, int y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator/(int y, const month_weekday_last& mwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, int y) NOEXCEPT;
-
-// Detailed interface
-
-// day
-
-class day
-{
- unsigned char d_;
-
-public:
- day() = default;
- explicit CONSTCD11 day(unsigned d) NOEXCEPT;
-
- CONSTCD14 day& operator++() NOEXCEPT;
- CONSTCD14 day operator++(int) NOEXCEPT;
- CONSTCD14 day& operator--() NOEXCEPT;
- CONSTCD14 day operator--(int) NOEXCEPT;
-
- CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
- CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
-
- CONSTCD11 explicit operator unsigned() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT;
-
-CONSTCD11 day operator+(const day& x, const days& y) NOEXCEPT;
-CONSTCD11 day operator+(const days& x, const day& y) NOEXCEPT;
-CONSTCD11 day operator-(const day& x, const days& y) NOEXCEPT;
-CONSTCD11 days operator-(const day& x, const day& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const day& d);
-
-// month
-
-class month
-{
- unsigned char m_;
-
-public:
- month() = default;
- explicit CONSTCD11 month(unsigned m) NOEXCEPT;
-
- CONSTCD14 month& operator++() NOEXCEPT;
- CONSTCD14 month operator++(int) NOEXCEPT;
- CONSTCD14 month& operator--() NOEXCEPT;
- CONSTCD14 month operator--(int) NOEXCEPT;
-
- CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
- CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
-
- CONSTCD11 explicit operator unsigned() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT;
-
-CONSTCD14 month operator+(const month& x, const months& y) NOEXCEPT;
-CONSTCD14 month operator+(const months& x, const month& y) NOEXCEPT;
-CONSTCD14 month operator-(const month& x, const months& y) NOEXCEPT;
-CONSTCD14 months operator-(const month& x, const month& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month& m);
-
-// year
-
-class year
-{
- short y_;
-
-public:
- year() = default;
- explicit CONSTCD11 year(int y) NOEXCEPT;
-
- CONSTCD14 year& operator++() NOEXCEPT;
- CONSTCD14 year operator++(int) NOEXCEPT;
- CONSTCD14 year& operator--() NOEXCEPT;
- CONSTCD14 year operator--(int) NOEXCEPT;
-
- CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
- CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
-
- CONSTCD11 year operator-() const NOEXCEPT;
- CONSTCD11 year operator+() const NOEXCEPT;
-
- CONSTCD11 bool is_leap() const NOEXCEPT;
-
- CONSTCD11 explicit operator int() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-
- static CONSTCD11 year min() NOEXCEPT { return year{-32767}; }
- static CONSTCD11 year max() NOEXCEPT { return year{32767}; }
-};
-
-CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT;
-
-CONSTCD11 year operator+(const year& x, const years& y) NOEXCEPT;
-CONSTCD11 year operator+(const years& x, const year& y) NOEXCEPT;
-CONSTCD11 year operator-(const year& x, const years& y) NOEXCEPT;
-CONSTCD11 years operator-(const year& x, const year& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year& y);
-
-// weekday
-
-class weekday
-{
- unsigned char wd_;
-public:
- weekday() = default;
- explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
- CONSTCD14 weekday(const sys_days& dp) NOEXCEPT;
- CONSTCD14 explicit weekday(const local_days& dp) NOEXCEPT;
-
- CONSTCD14 weekday& operator++() NOEXCEPT;
- CONSTCD14 weekday operator++(int) NOEXCEPT;
- CONSTCD14 weekday& operator--() NOEXCEPT;
- CONSTCD14 weekday operator--(int) NOEXCEPT;
-
- CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
- CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
-
- CONSTCD11 bool ok() const NOEXCEPT;
-
- CONSTCD11 unsigned c_encoding() const NOEXCEPT;
- CONSTCD11 unsigned iso_encoding() const NOEXCEPT;
-
- CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
- CONSTCD11 weekday_last operator[](last_spec) const NOEXCEPT;
-
-private:
- static CONSTCD14 unsigned char weekday_from_days(int z) NOEXCEPT;
-
- friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
- friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
- friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
- template<class CharT, class Traits>
- friend std::basic_ostream<CharT, Traits>&
- operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
- friend class weekday_indexed;
-};
-
-CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT;
-
-CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
-CONSTCD14 weekday operator+(const days& x, const weekday& y) NOEXCEPT;
-CONSTCD14 weekday operator-(const weekday& x, const days& y) NOEXCEPT;
-CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
-
-// weekday_indexed
-
-class weekday_indexed
-{
- unsigned char wd_ : 4;
- unsigned char index_ : 4;
-
-public:
- weekday_indexed() = default;
- CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
-
- CONSTCD11 date::weekday weekday() const NOEXCEPT;
- CONSTCD11 unsigned index() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi);
-
-// weekday_last
-
-class weekday_last
-{
- date::weekday wd_;
-
-public:
- explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
-
- CONSTCD11 date::weekday weekday() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl);
-
-namespace detail
-{
-
-struct unspecified_month_disambiguator {};
-
-} // namespace detail
-
-// year_month
-
-class year_month
-{
- date::year y_;
- date::month m_;
-
-public:
- year_month() = default;
- CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
-
- CONSTCD11 date::year year() const NOEXCEPT;
- CONSTCD11 date::month month() const NOEXCEPT;
-
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
- CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
- CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
-
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT;
-
-CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT;
-CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT;
-CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym);
-
-// month_day
-
-class month_day
-{
- date::month m_;
- date::day d_;
-
-public:
- month_day() = default;
- CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
-
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::day day() const NOEXCEPT;
-
- CONSTCD14 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md);
-
-// month_day_last
-
-class month_day_last
-{
- date::month m_;
-
-public:
- CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
-
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl);
-
-// month_weekday
-
-class month_weekday
-{
- date::month m_;
- date::weekday_indexed wdi_;
-public:
- CONSTCD11 month_weekday(const date::month& m,
- const date::weekday_indexed& wdi) NOEXCEPT;
-
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
-
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd);
-
-// month_weekday_last
-
-class month_weekday_last
-{
- date::month m_;
- date::weekday_last wdl_;
-
-public:
- CONSTCD11 month_weekday_last(const date::month& m,
- const date::weekday_last& wd) NOEXCEPT;
-
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
-
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11
- bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
-CONSTCD11
- bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl);
-
-// class year_month_day
-
-class year_month_day
-{
- date::year y_;
- date::month m_;
- date::day d_;
-
-public:
- year_month_day() = default;
- CONSTCD11 year_month_day(const date::year& y, const date::month& m,
- const date::day& d) NOEXCEPT;
- CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
-
- CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
- CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
-
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
- CONSTCD14 year_month_day& operator+=(const years& y) NOEXCEPT;
- CONSTCD14 year_month_day& operator-=(const years& y) NOEXCEPT;
-
- CONSTCD11 date::year year() const NOEXCEPT;
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::day day() const NOEXCEPT;
-
- CONSTCD14 operator sys_days() const NOEXCEPT;
- CONSTCD14 explicit operator local_days() const NOEXCEPT;
- CONSTCD14 bool ok() const NOEXCEPT;
-
-private:
- static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
- CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT;
-CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT;
-CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT;
-CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd);
-
-// year_month_day_last
-
-class year_month_day_last
-{
- date::year y_;
- date::month_day_last mdl_;
-
-public:
- CONSTCD11 year_month_day_last(const date::year& y,
- const date::month_day_last& mdl) NOEXCEPT;
-
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
- CONSTCD14 year_month_day_last& operator+=(const years& y) NOEXCEPT;
- CONSTCD14 year_month_day_last& operator-=(const years& y) NOEXCEPT;
-
- CONSTCD11 date::year year() const NOEXCEPT;
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
- CONSTCD14 date::day day() const NOEXCEPT;
-
- CONSTCD14 operator sys_days() const NOEXCEPT;
- CONSTCD14 explicit operator local_days() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11
- bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
- bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
- bool operator< (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
- bool operator> (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
- bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
- bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last
-operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl);
-
-// year_month_weekday
-
-class year_month_weekday
-{
- date::year y_;
- date::month m_;
- date::weekday_indexed wdi_;
-
-public:
- year_month_weekday() = default;
- CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
- const date::weekday_indexed& wdi) NOEXCEPT;
- CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
- CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
-
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
- CONSTCD14 year_month_weekday& operator+=(const years& y) NOEXCEPT;
- CONSTCD14 year_month_weekday& operator-=(const years& y) NOEXCEPT;
-
- CONSTCD11 date::year year() const NOEXCEPT;
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::weekday weekday() const NOEXCEPT;
- CONSTCD11 unsigned index() const NOEXCEPT;
- CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
-
- CONSTCD14 operator sys_days() const NOEXCEPT;
- CONSTCD14 explicit operator local_days() const NOEXCEPT;
- CONSTCD14 bool ok() const NOEXCEPT;
-
-private:
- static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
- CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11
- bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
-CONSTCD11
- bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi);
-
-// year_month_weekday_last
-
-class year_month_weekday_last
-{
- date::year y_;
- date::month m_;
- date::weekday_last wdl_;
-
-public:
- CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
- const date::weekday_last& wdl) NOEXCEPT;
-
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
- template<class = detail::unspecified_month_disambiguator>
- CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
- CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
- CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
-
- CONSTCD11 date::year year() const NOEXCEPT;
- CONSTCD11 date::month month() const NOEXCEPT;
- CONSTCD11 date::weekday weekday() const NOEXCEPT;
- CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
-
- CONSTCD14 operator sys_days() const NOEXCEPT;
- CONSTCD14 explicit operator local_days() const NOEXCEPT;
- CONSTCD11 bool ok() const NOEXCEPT;
-
-private:
- CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11
-bool
-operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
-
-CONSTCD11
-bool
-operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
-operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT;
-
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl);
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals
-{
-
-CONSTCD11 date::day operator "" _d(unsigned long long d) NOEXCEPT;
-CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
-
-} // inline namespace literals
-#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-// CONSTDATA date::month January{1};
-// CONSTDATA date::month February{2};
-// CONSTDATA date::month March{3};
-// CONSTDATA date::month April{4};
-// CONSTDATA date::month May{5};
-// CONSTDATA date::month June{6};
-// CONSTDATA date::month July{7};
-// CONSTDATA date::month August{8};
-// CONSTDATA date::month September{9};
-// CONSTDATA date::month October{10};
-// CONSTDATA date::month November{11};
-// CONSTDATA date::month December{12};
-//
-// CONSTDATA date::weekday Sunday{0u};
-// CONSTDATA date::weekday Monday{1u};
-// CONSTDATA date::weekday Tuesday{2u};
-// CONSTDATA date::weekday Wednesday{3u};
-// CONSTDATA date::weekday Thursday{4u};
-// CONSTDATA date::weekday Friday{5u};
-// CONSTDATA date::weekday Saturday{6u};
-
-#if HAS_VOID_T
-
-template <class T, class = std::void_t<>>
-struct is_clock
- : std::false_type
-{};
-
-template <class T>
-struct is_clock<T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
- typename T::duration, typename T::time_point,
- decltype(T::is_steady)>>
- : std::true_type
-{};
-
-#endif // HAS_VOID_T
-
-//----------------+
-// Implementation |
-//----------------+
-
-// utilities
-namespace detail {
-
-template<class CharT, class Traits = std::char_traits<CharT>>
-class save_istream
-{
-protected:
- std::basic_ios<CharT, Traits>& is_;
- CharT fill_;
- std::ios::fmtflags flags_;
- std::streamsize width_;
- std::basic_ostream<CharT, Traits>* tie_;
- std::locale loc_;
-
-public:
- ~save_istream()
- {
- is_.fill(fill_);
- is_.flags(flags_);
- is_.width(width_);
- is_.imbue(loc_);
- is_.tie(tie_);
- }
-
- save_istream(const save_istream&) = delete;
- save_istream& operator=(const save_istream&) = delete;
-
- explicit save_istream(std::basic_ios<CharT, Traits>& is)
- : is_(is)
- , fill_(is.fill())
- , flags_(is.flags())
- , width_(is.width(0))
- , tie_(is.tie(nullptr))
- , loc_(is.getloc())
- {
- if (tie_ != nullptr)
- tie_->flush();
- }
-};
-
-template<class CharT, class Traits = std::char_traits<CharT>>
-class save_ostream
- : private save_istream<CharT, Traits>
-{
-public:
- ~save_ostream()
- {
- if ((this->flags_ & std::ios::unitbuf) &&
-#if HAS_UNCAUGHT_EXCEPTIONS
- std::uncaught_exceptions() == 0 &&
-#else
- !std::uncaught_exception() &&
-#endif
- this->is_.good())
- this->is_.rdbuf()->pubsync();
- }
-
- save_ostream(const save_ostream&) = delete;
- save_ostream& operator=(const save_ostream&) = delete;
-
- explicit save_ostream(std::basic_ios<CharT, Traits>& os)
- : save_istream<CharT, Traits>(os)
- {
- }
-};
-
-template <class T>
-struct choose_trunc_type
-{
- static const int digits = std::numeric_limits<T>::digits;
- using type = typename std::conditional
- <
- digits < 32,
- std::int32_t,
- typename std::conditional
- <
- digits < 64,
- std::int64_t,
-#ifdef __SIZEOF_INT128__
- __int128
-#else
- std::int64_t
-#endif
- >::type
- >::type;
-};
-
-template <class T>
-CONSTCD11
-inline
-typename std::enable_if
-<
- !std::chrono::treat_as_floating_point<T>::value,
- T
->::type
-trunc(T t) NOEXCEPT
-{
- return t;
-}
-
-template <class T>
-CONSTCD14
-inline
-typename std::enable_if
-<
- std::chrono::treat_as_floating_point<T>::value,
- T
->::type
-trunc(T t) NOEXCEPT
-{
- using std::numeric_limits;
- using I = typename choose_trunc_type<T>::type;
- CONSTDATA auto digits = numeric_limits<T>::digits;
- static_assert(digits < numeric_limits<I>::digits, "");
- CONSTDATA auto max = I{1} << (digits-1);
- CONSTDATA auto min = -max;
- const auto negative = t < T{0};
- if (min <= t && t <= max && t != 0 && t == t)
- {
- t = static_cast<T>(static_cast<I>(t));
- if (t == 0 && negative)
- t = -t;
- }
- return t;
-}
-
-template <std::intmax_t Xp, std::intmax_t Yp>
-struct static_gcd
-{
- static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
-};
-
-template <std::intmax_t Xp>
-struct static_gcd<Xp, 0>
-{
- static const std::intmax_t value = Xp;
-};
-
-template <>
-struct static_gcd<0, 0>
-{
- static const std::intmax_t value = 1;
-};
-
-template <class R1, class R2>
-struct no_overflow
-{
-private:
- static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
- static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
- static const std::intmax_t n1 = R1::num / gcd_n1_n2;
- static const std::intmax_t d1 = R1::den / gcd_d1_d2;
- static const std::intmax_t n2 = R2::num / gcd_n1_n2;
- static const std::intmax_t d2 = R2::den / gcd_d1_d2;
- static const std::intmax_t max = std::numeric_limits<std::intmax_t>::max();
-
- template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
- struct mul // overflow == false
- {
- static const std::intmax_t value = Xp * Yp;
- };
-
- template <std::intmax_t Xp, std::intmax_t Yp>
- struct mul<Xp, Yp, true>
- {
- static const std::intmax_t value = 1;
- };
-
-public:
- static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
- typedef std::ratio<mul<n1, d2, !value>::value,
- mul<n2, d1, !value>::value> type;
-};
-
-} // detail
-
-// trunc towards zero
-template <class To, class Rep, class Period>
-CONSTCD11
-inline
-typename std::enable_if
-<
- detail::no_overflow<Period, typename To::period>::value,
- To
->::type
-trunc(const std::chrono::duration<Rep, Period>& d)
-{
- return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
-}
-
-template <class To, class Rep, class Period>
-CONSTCD11
-inline
-typename std::enable_if
-<
- !detail::no_overflow<Period, typename To::period>::value,
- To
->::type
-trunc(const std::chrono::duration<Rep, Period>& d)
-{
- using std::chrono::duration_cast;
- using std::chrono::duration;
- using rep = typename std::common_type<Rep, typename To::rep>::type;
- return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
-}
-
-#ifndef HAS_CHRONO_ROUNDING
-# if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined (__clang__)))
-# define HAS_CHRONO_ROUNDING 1
-# elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
-# define HAS_CHRONO_ROUNDING 1
-# elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
-# define HAS_CHRONO_ROUNDING 1
-# else
-# define HAS_CHRONO_ROUNDING 0
-# endif
-#endif // HAS_CHRONO_ROUNDING
-
-#if HAS_CHRONO_ROUNDING == 0
-
-// round down
-template <class To, class Rep, class Period>
-CONSTCD14
-inline
-typename std::enable_if
-<
- detail::no_overflow<Period, typename To::period>::value,
- To
->::type
-floor(const std::chrono::duration<Rep, Period>& d)
-{
- auto t = trunc<To>(d);
- if (t > d)
- return t - To{1};
- return t;
-}
-
-template <class To, class Rep, class Period>
-CONSTCD14
-inline
-typename std::enable_if
-<
- !detail::no_overflow<Period, typename To::period>::value,
- To
->::type
-floor(const std::chrono::duration<Rep, Period>& d)
-{
- using rep = typename std::common_type<Rep, typename To::rep>::type;
- return floor<To>(floor<std::chrono::duration<rep>>(d));
-}
-
-// round to nearest, to even on tie
-template <class To, class Rep, class Period>
-CONSTCD14
-inline
-To
-round(const std::chrono::duration<Rep, Period>& d)
-{
- auto t0 = floor<To>(d);
- auto t1 = t0 + To{1};
- if (t1 == To{0} && t0 < To{0})
- t1 = -t1;
- auto diff0 = d - t0;
- auto diff1 = t1 - d;
- if (diff0 == diff1)
- {
- if (t0 - trunc<To>(t0/2)*2 == To{0})
- return t0;
- return t1;
- }
- if (diff0 < diff1)
- return t0;
- return t1;
-}
-
-// round up
-template <class To, class Rep, class Period>
-CONSTCD14
-inline
-To
-ceil(const std::chrono::duration<Rep, Period>& d)
-{
- auto t = trunc<To>(d);
- if (t < d)
- return t + To{1};
- return t;
-}
-
-template <class Rep, class Period,
- class = typename std::enable_if
- <
- std::numeric_limits<Rep>::is_signed
- >::type>
-CONSTCD11
-std::chrono::duration<Rep, Period>
-abs(std::chrono::duration<Rep, Period> d)
-{
- return d >= d.zero() ? d : -d;
-}
-
-// round down
-template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-floor(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
- using std::chrono::time_point;
- return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
-}
-
-// round to nearest, to even on tie
-template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-round(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
- using std::chrono::time_point;
- return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
-}
-
-// round up
-template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-ceil(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
- using std::chrono::time_point;
- return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
-}
-
-#else // HAS_CHRONO_ROUNDING == 1
-
-using std::chrono::floor;
-using std::chrono::ceil;
-using std::chrono::round;
-using std::chrono::abs;
-
-#endif // HAS_CHRONO_ROUNDING
-
-// trunc towards zero
-template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-trunc(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
- using std::chrono::time_point;
- return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
-}
-
-// day
-
-CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast<decltype(d_)>(d)) {}
-CONSTCD14 inline day& day::operator++() NOEXCEPT {++d_; return *this;}
-CONSTCD14 inline day day::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline day& day::operator--() NOEXCEPT {--d_; return *this;}
-CONSTCD14 inline day day::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {*this = *this + d; return *this;}
-CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {*this = *this - d; return *this;}
-CONSTCD11 inline day::operator unsigned() const NOEXCEPT {return d_;}
-CONSTCD11 inline bool day::ok() const NOEXCEPT {return 1 <= d_ && d_ <= 31;}
-
-CONSTCD11
-inline
-bool
-operator==(const day& x, const day& y) NOEXCEPT
-{
- return static_cast<unsigned>(x) == static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const day& x, const day& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const day& x, const day& y) NOEXCEPT
-{
- return static_cast<unsigned>(x) < static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator>(const day& x, const day& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const day& x, const day& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const day& x, const day& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-CONSTCD11
-inline
-days
-operator-(const day& x, const day& y) NOEXCEPT
-{
- return days{static_cast<days::rep>(static_cast<unsigned>(x)
- - static_cast<unsigned>(y))};
-}
-
-CONSTCD11
-inline
-day
-operator+(const day& x, const days& y) NOEXCEPT
-{
- return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
-}
-
-CONSTCD11
-inline
-day
-operator+(const days& x, const day& y) NOEXCEPT
-{
- return y + x;
-}
-
-CONSTCD11
-inline
-day
-operator-(const day& x, const days& y) NOEXCEPT
-{
- return x + -y;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const day& d)
-{
- detail::save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << static_cast<unsigned>(d);
- if (!d.ok())
- os << " is not a valid day";
- return os;
-}
-
-// month
-
-CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast<decltype(m_)>(m)) {}
-CONSTCD14 inline month& month::operator++() NOEXCEPT {*this += months{1}; return *this;}
-CONSTCD14 inline month month::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline month& month::operator--() NOEXCEPT {*this -= months{1}; return *this;}
-CONSTCD14 inline month month::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-
-CONSTCD14
-inline
-month&
-month::operator+=(const months& m) NOEXCEPT
-{
- *this = *this + m;
- return *this;
-}
-
-CONSTCD14
-inline
-month&
-month::operator-=(const months& m) NOEXCEPT
-{
- *this = *this - m;
- return *this;
-}
-
-CONSTCD11 inline month::operator unsigned() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool month::ok() const NOEXCEPT {return 1 <= m_ && m_ <= 12;}
-
-CONSTCD11
-inline
-bool
-operator==(const month& x, const month& y) NOEXCEPT
-{
- return static_cast<unsigned>(x) == static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const month& x, const month& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const month& x, const month& y) NOEXCEPT
-{
- return static_cast<unsigned>(x) < static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator>(const month& x, const month& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const month& x, const month& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const month& x, const month& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-CONSTCD14
-inline
-months
-operator-(const month& x, const month& y) NOEXCEPT
-{
- auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
- return months(d <= 11 ? d : d + 12);
-}
-
-CONSTCD14
-inline
-month
-operator+(const month& x, const months& y) NOEXCEPT
-{
- auto const mu = static_cast<long long>(static_cast<unsigned>(x)) + y.count() - 1;
- auto const yr = (mu >= 0 ? mu : mu-11) / 12;
- return month{static_cast<unsigned>(mu - yr * 12 + 1)};
-}
-
-CONSTCD14
-inline
-month
-operator+(const months& x, const month& y) NOEXCEPT
-{
- return y + x;
-}
-
-CONSTCD14
-inline
-month
-operator-(const month& x, const months& y) NOEXCEPT
-{
- return x + -y;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month& m)
-{
- if (m.ok())
- {
- CharT fmt[] = {'%', 'b', 0};
- os << format(os.getloc(), fmt, m);
- }
- else
- os << static_cast<unsigned>(m) << " is not a valid month";
- return os;
-}
-
-// year
-
-CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast<decltype(y_)>(y)) {}
-CONSTCD14 inline year& year::operator++() NOEXCEPT {++y_; return *this;}
-CONSTCD14 inline year year::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline year& year::operator--() NOEXCEPT {--y_; return *this;}
-CONSTCD14 inline year year::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {*this = *this + y; return *this;}
-CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {*this = *this - y; return *this;}
-CONSTCD11 inline year year::operator-() const NOEXCEPT {return year{-y_};}
-CONSTCD11 inline year year::operator+() const NOEXCEPT {return *this;}
-
-CONSTCD11
-inline
-bool
-year::is_leap() const NOEXCEPT
-{
- return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
-}
-
-CONSTCD11 inline year::operator int() const NOEXCEPT {return y_;}
-
-CONSTCD11
-inline
-bool
-year::ok() const NOEXCEPT
-{
- return y_ != std::numeric_limits<short>::min();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year& x, const year& y) NOEXCEPT
-{
- return static_cast<int>(x) == static_cast<int>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year& x, const year& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const year& x, const year& y) NOEXCEPT
-{
- return static_cast<int>(x) < static_cast<int>(y);
-}
-
-CONSTCD11
-inline
-bool
-operator>(const year& x, const year& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const year& x, const year& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const year& x, const year& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-CONSTCD11
-inline
-years
-operator-(const year& x, const year& y) NOEXCEPT
-{
- return years{static_cast<int>(x) - static_cast<int>(y)};
-}
-
-CONSTCD11
-inline
-year
-operator+(const year& x, const years& y) NOEXCEPT
-{
- return year{static_cast<int>(x) + y.count()};
-}
-
-CONSTCD11
-inline
-year
-operator+(const years& x, const year& y) NOEXCEPT
-{
- return y + x;
-}
-
-CONSTCD11
-inline
-year
-operator-(const year& x, const years& y) NOEXCEPT
-{
- return year{static_cast<int>(x) - y.count()};
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year& y)
-{
- detail::save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::internal);
- os.width(4 + (y < year{0}));
- os.imbue(std::locale::classic());
- os << static_cast<int>(y);
- if (!y.ok())
- os << " is not a valid year";
- return os;
-}
-
-// weekday
-
-CONSTCD14
-inline
-unsigned char
-weekday::weekday_from_days(int z) NOEXCEPT
-{
- auto u = static_cast<unsigned>(z);
- return static_cast<unsigned char>(z >= -4 ? (u+4) % 7 : u % 7);
-}
-
-CONSTCD11
-inline
-weekday::weekday(unsigned wd) NOEXCEPT
- : wd_(static_cast<decltype(wd_)>(wd != 7 ? wd : 0))
- {}
-
-CONSTCD14
-inline
-weekday::weekday(const sys_days& dp) NOEXCEPT
- : wd_(weekday_from_days(dp.time_since_epoch().count()))
- {}
-
-CONSTCD14
-inline
-weekday::weekday(const local_days& dp) NOEXCEPT
- : wd_(weekday_from_days(dp.time_since_epoch().count()))
- {}
-
-CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {*this += days{1}; return *this;}
-CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {*this -= days{1}; return *this;}
-CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-
-CONSTCD14
-inline
-weekday&
-weekday::operator+=(const days& d) NOEXCEPT
-{
- *this = *this + d;
- return *this;
-}
-
-CONSTCD14
-inline
-weekday&
-weekday::operator-=(const days& d) NOEXCEPT
-{
- *this = *this - d;
- return *this;
-}
-
-CONSTCD11 inline bool weekday::ok() const NOEXCEPT {return wd_ <= 6;}
-
-CONSTCD11
-inline
-unsigned weekday::c_encoding() const NOEXCEPT
-{
- return unsigned{wd_};
-}
-
-CONSTCD11
-inline
-unsigned weekday::iso_encoding() const NOEXCEPT
-{
- return unsigned{((wd_ == 0u) ? 7u : wd_)};
-}
-
-CONSTCD11
-inline
-bool
-operator==(const weekday& x, const weekday& y) NOEXCEPT
-{
- return x.wd_ == y.wd_;
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const weekday& x, const weekday& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD14
-inline
-days
-operator-(const weekday& x, const weekday& y) NOEXCEPT
-{
- auto const wdu = x.wd_ - y.wd_;
- auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
- return days{wdu - wk * 7};
-}
-
-CONSTCD14
-inline
-weekday
-operator+(const weekday& x, const days& y) NOEXCEPT
-{
- auto const wdu = static_cast<long long>(static_cast<unsigned>(x.wd_)) + y.count();
- auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
- return weekday{static_cast<unsigned>(wdu - wk * 7)};
-}
-
-CONSTCD14
-inline
-weekday
-operator+(const days& x, const weekday& y) NOEXCEPT
-{
- return y + x;
-}
-
-CONSTCD14
-inline
-weekday
-operator-(const weekday& x, const days& y) NOEXCEPT
-{
- return x + -y;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd)
-{
- if (wd.ok())
- {
- CharT fmt[] = {'%', 'a', 0};
- os << format(fmt, wd);
- }
- else
- os << static_cast<unsigned>(wd.wd_) << " is not a valid weekday";
- return os;
-}
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals
-{
-
-CONSTCD11
-inline
-date::day
-operator "" _d(unsigned long long d) NOEXCEPT
-{
- return date::day{static_cast<unsigned>(d)};
-}
-
-CONSTCD11
-inline
-date::year
-operator "" _y(unsigned long long y) NOEXCEPT
-{
- return date::year(static_cast<int>(y));
-}
-#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-CONSTDATA date::last_spec last{};
-
-CONSTDATA date::month jan{1};
-CONSTDATA date::month feb{2};
-CONSTDATA date::month mar{3};
-CONSTDATA date::month apr{4};
-CONSTDATA date::month may{5};
-CONSTDATA date::month jun{6};
-CONSTDATA date::month jul{7};
-CONSTDATA date::month aug{8};
-CONSTDATA date::month sep{9};
-CONSTDATA date::month oct{10};
-CONSTDATA date::month nov{11};
-CONSTDATA date::month dec{12};
-
-CONSTDATA date::weekday sun{0u};
-CONSTDATA date::weekday mon{1u};
-CONSTDATA date::weekday tue{2u};
-CONSTDATA date::weekday wed{3u};
-CONSTDATA date::weekday thu{4u};
-CONSTDATA date::weekday fri{5u};
-CONSTDATA date::weekday sat{6u};
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-} // inline namespace literals
-#endif
-
-CONSTDATA date::month January{1};
-CONSTDATA date::month February{2};
-CONSTDATA date::month March{3};
-CONSTDATA date::month April{4};
-CONSTDATA date::month May{5};
-CONSTDATA date::month June{6};
-CONSTDATA date::month July{7};
-CONSTDATA date::month August{8};
-CONSTDATA date::month September{9};
-CONSTDATA date::month October{10};
-CONSTDATA date::month November{11};
-CONSTDATA date::month December{12};
-
-CONSTDATA date::weekday Monday{1};
-CONSTDATA date::weekday Tuesday{2};
-CONSTDATA date::weekday Wednesday{3};
-CONSTDATA date::weekday Thursday{4};
-CONSTDATA date::weekday Friday{5};
-CONSTDATA date::weekday Saturday{6};
-CONSTDATA date::weekday Sunday{7};
-
-// weekday_indexed
-
-CONSTCD11
-inline
-weekday
-weekday_indexed::weekday() const NOEXCEPT
-{
- return date::weekday{static_cast<unsigned>(wd_)};
-}
-
-CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT {return index_;}
-
-CONSTCD11
-inline
-bool
-weekday_indexed::ok() const NOEXCEPT
-{
- return weekday().ok() && 1 <= index_ && index_ <= 5;
-}
-
-#ifdef __GNUC__
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wconversion"
-#endif // __GNUC__
-
-CONSTCD11
-inline
-weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
- : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd.wd_)))
- , index_(static_cast<decltype(index_)>(index))
- {}
-
-#ifdef __GNUC__
-# pragma GCC diagnostic pop
-#endif // __GNUC__
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi)
-{
- os << wdi.weekday() << '[' << wdi.index();
- if (!(1 <= wdi.index() && wdi.index() <= 5))
- os << " is not a valid index";
- os << ']';
- return os;
-}
-
-CONSTCD11
-inline
-weekday_indexed
-weekday::operator[](unsigned index) const NOEXCEPT
-{
- return {*this, index};
-}
-
-CONSTCD11
-inline
-bool
-operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
-{
- return x.weekday() == y.weekday() && x.index() == y.index();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-// weekday_last
-
-CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT {return wd_;}
-CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT {return wd_.ok();}
-CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {}
-
-CONSTCD11
-inline
-bool
-operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT
-{
- return x.weekday() == y.weekday();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl)
-{
- return os << wdl.weekday() << "[last]";
-}
-
-CONSTCD11
-inline
-weekday_last
-weekday::operator[](last_spec) const NOEXCEPT
-{
- return weekday_last{*this};
-}
-
-// year_month
-
-CONSTCD11
-inline
-year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
- : y_(y)
- , m_(m)
- {}
-
-CONSTCD11 inline year year_month::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool year_month::ok() const NOEXCEPT {return y_.ok() && m_.ok();}
-
-template<class>
-CONSTCD14
-inline
-year_month&
-year_month::operator+=(const months& dm) NOEXCEPT
-{
- *this = *this + dm;
- return *this;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month&
-year_month::operator-=(const months& dm) NOEXCEPT
-{
- *this = *this - dm;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month&
-year_month::operator+=(const years& dy) NOEXCEPT
-{
- *this = *this + dy;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month&
-year_month::operator-=(const years& dy) NOEXCEPT
-{
- *this = *this - dy;
- return *this;
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year_month& x, const year_month& y) NOEXCEPT
-{
- return x.year() == y.year() && x.month() == y.month();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year_month& x, const year_month& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const year_month& x, const year_month& y) NOEXCEPT
-{
- return x.year() < y.year() ? true
- : (x.year() > y.year() ? false
- : (x.month() < y.month()));
-}
-
-CONSTCD11
-inline
-bool
-operator>(const year_month& x, const year_month& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const year_month& x, const year_month& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const year_month& x, const year_month& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-template<class>
-CONSTCD14
-inline
-year_month
-operator+(const year_month& ym, const months& dm) NOEXCEPT
-{
- auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
- auto dy = (dmi >= 0 ? dmi : dmi-11) / 12;
- dmi = dmi - dy * 12 + 1;
- return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
-}
-
-template<class>
-CONSTCD14
-inline
-year_month
-operator+(const months& dm, const year_month& ym) NOEXCEPT
-{
- return ym + dm;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month
-operator-(const year_month& ym, const months& dm) NOEXCEPT
-{
- return ym + -dm;
-}
-
-CONSTCD11
-inline
-months
-operator-(const year_month& x, const year_month& y) NOEXCEPT
-{
- return (x.year() - y.year()) +
- months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
-}
-
-CONSTCD11
-inline
-year_month
-operator+(const year_month& ym, const years& dy) NOEXCEPT
-{
- return (ym.year() + dy) / ym.month();
-}
-
-CONSTCD11
-inline
-year_month
-operator+(const years& dy, const year_month& ym) NOEXCEPT
-{
- return ym + dy;
-}
-
-CONSTCD11
-inline
-year_month
-operator-(const year_month& ym, const years& dy) NOEXCEPT
-{
- return ym + -dy;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym)
-{
- return os << ym.year() << '/' << ym.month();
-}
-
-// month_day
-
-CONSTCD11
-inline
-month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT
- : m_(m)
- , d_(d)
- {}
-
-CONSTCD11 inline date::month month_day::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline date::day month_day::day() const NOEXCEPT {return d_;}
-
-CONSTCD14
-inline
-bool
-month_day::ok() const NOEXCEPT
-{
- CONSTDATA date::day d[] =
- {
- date::day(31), date::day(29), date::day(31),
- date::day(30), date::day(31), date::day(30),
- date::day(31), date::day(31), date::day(30),
- date::day(31), date::day(30), date::day(31)
- };
- return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_)-1];
-}
-
-CONSTCD11
-inline
-bool
-operator==(const month_day& x, const month_day& y) NOEXCEPT
-{
- return x.month() == y.month() && x.day() == y.day();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const month_day& x, const month_day& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const month_day& x, const month_day& y) NOEXCEPT
-{
- return x.month() < y.month() ? true
- : (x.month() > y.month() ? false
- : (x.day() < y.day()));
-}
-
-CONSTCD11
-inline
-bool
-operator>(const month_day& x, const month_day& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const month_day& x, const month_day& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const month_day& x, const month_day& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md)
-{
- return os << md.month() << '/' << md.day();
-}
-
-// month_day_last
-
-CONSTCD11 inline month month_day_last::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT {return m_.ok();}
-CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {}
-
-CONSTCD11
-inline
-bool
-operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return x.month() == y.month();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return x.month() < y.month();
-}
-
-CONSTCD11
-inline
-bool
-operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl)
-{
- return os << mdl.month() << "/last";
-}
-
-// month_weekday
-
-CONSTCD11
-inline
-month_weekday::month_weekday(const date::month& m,
- const date::weekday_indexed& wdi) NOEXCEPT
- : m_(m)
- , wdi_(wdi)
- {}
-
-CONSTCD11 inline month month_weekday::month() const NOEXCEPT {return m_;}
-
-CONSTCD11
-inline
-weekday_indexed
-month_weekday::weekday_indexed() const NOEXCEPT
-{
- return wdi_;
-}
-
-CONSTCD11
-inline
-bool
-month_weekday::ok() const NOEXCEPT
-{
- return m_.ok() && wdi_.ok();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT
-{
- return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd)
-{
- return os << mwd.month() << '/' << mwd.weekday_indexed();
-}
-
-// month_weekday_last
-
-CONSTCD11
-inline
-month_weekday_last::month_weekday_last(const date::month& m,
- const date::weekday_last& wdl) NOEXCEPT
- : m_(m)
- , wdl_(wdl)
- {}
-
-CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT {return m_;}
-
-CONSTCD11
-inline
-weekday_last
-month_weekday_last::weekday_last() const NOEXCEPT
-{
- return wdl_;
-}
-
-CONSTCD11
-inline
-bool
-month_weekday_last::ok() const NOEXCEPT
-{
- return m_.ok() && wdl_.ok();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
-{
- return x.month() == y.month() && x.weekday_last() == y.weekday_last();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl)
-{
- return os << mwdl.month() << '/' << mwdl.weekday_last();
-}
-
-// year_month_day_last
-
-CONSTCD11
-inline
-year_month_day_last::year_month_day_last(const date::year& y,
- const date::month_day_last& mdl) NOEXCEPT
- : y_(y)
- , mdl_(mdl)
- {}
-
-template<class>
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator+=(const months& m) NOEXCEPT
-{
- *this = *this + m;
- return *this;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator-=(const months& m) NOEXCEPT
-{
- *this = *this - m;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator+=(const years& y) NOEXCEPT
-{
- *this = *this + y;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator-=(const years& y) NOEXCEPT
-{
- *this = *this - y;
- return *this;
-}
-
-CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {return mdl_.month();}
-
-CONSTCD11
-inline
-month_day_last
-year_month_day_last::month_day_last() const NOEXCEPT
-{
- return mdl_;
-}
-
-CONSTCD14
-inline
-day
-year_month_day_last::day() const NOEXCEPT
-{
- CONSTDATA date::day d[] =
- {
- date::day(31), date::day(28), date::day(31),
- date::day(30), date::day(31), date::day(30),
- date::day(31), date::day(31), date::day(30),
- date::day(31), date::day(30), date::day(31)
- };
- return (month() != February || !y_.is_leap()) && mdl_.ok() ?
- d[static_cast<unsigned>(month()) - 1] : date::day{29};
-}
-
-CONSTCD14
-inline
-year_month_day_last::operator sys_days() const NOEXCEPT
-{
- return sys_days(year()/month()/day());
-}
-
-CONSTCD14
-inline
-year_month_day_last::operator local_days() const NOEXCEPT
-{
- return local_days(year()/month()/day());
-}
-
-CONSTCD11
-inline
-bool
-year_month_day_last::ok() const NOEXCEPT
-{
- return y_.ok() && mdl_.ok();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return x.year() == y.year() && x.month_day_last() == y.month_day_last();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return x.year() < y.year() ? true
- : (x.year() > y.year() ? false
- : (x.month_day_last() < y.month_day_last()));
-}
-
-CONSTCD11
-inline
-bool
-operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl)
-{
- return os << ymdl.year() << '/' << ymdl.month_day_last();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
-{
- return (ymdl.year() / ymdl.month() + dm) / last;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT
-{
- return ymdl + dm;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
-{
- return ymdl + (-dm);
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
-{
- return {ymdl.year()+dy, ymdl.month_day_last()};
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT
-{
- return ymdl + dy;
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
-{
- return ymdl + (-dy);
-}
-
-// year_month_day
-
-CONSTCD11
-inline
-year_month_day::year_month_day(const date::year& y, const date::month& m,
- const date::day& d) NOEXCEPT
- : y_(y)
- , m_(m)
- , d_(d)
- {}
-
-CONSTCD14
-inline
-year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
- : y_(ymdl.year())
- , m_(ymdl.month())
- , d_(ymdl.day())
- {}
-
-CONSTCD14
-inline
-year_month_day::year_month_day(sys_days dp) NOEXCEPT
- : year_month_day(from_days(dp.time_since_epoch()))
- {}
-
-CONSTCD14
-inline
-year_month_day::year_month_day(local_days dp) NOEXCEPT
- : year_month_day(from_days(dp.time_since_epoch()))
- {}
-
-CONSTCD11 inline year year_month_day::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_day::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline day year_month_day::day() const NOEXCEPT {return d_;}
-
-template<class>
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator+=(const months& m) NOEXCEPT
-{
- *this = *this + m;
- return *this;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator-=(const months& m) NOEXCEPT
-{
- *this = *this - m;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator+=(const years& y) NOEXCEPT
-{
- *this = *this + y;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator-=(const years& y) NOEXCEPT
-{
- *this = *this - y;
- return *this;
-}
-
-CONSTCD14
-inline
-days
-year_month_day::to_days() const NOEXCEPT
-{
- static_assert(std::numeric_limits<unsigned>::digits >= 18,
- "This algorithm has not been ported to a 16 bit unsigned integer");
- static_assert(std::numeric_limits<int>::digits >= 20,
- "This algorithm has not been ported to a 16 bit signed integer");
- auto const y = static_cast<int>(y_) - (m_ <= February);
- auto const m = static_cast<unsigned>(m_);
- auto const d = static_cast<unsigned>(d_);
- auto const era = (y >= 0 ? y : y-399) / 400;
- auto const yoe = static_cast<unsigned>(y - era * 400); // [0, 399]
- auto const doy = (153*(m > 2 ? m-3 : m+9) + 2)/5 + d-1; // [0, 365]
- auto const doe = yoe * 365 + yoe/4 - yoe/100 + doy; // [0, 146096]
- return days{era * 146097 + static_cast<int>(doe) - 719468};
-}
-
-CONSTCD14
-inline
-year_month_day::operator sys_days() const NOEXCEPT
-{
- return sys_days{to_days()};
-}
-
-CONSTCD14
-inline
-year_month_day::operator local_days() const NOEXCEPT
-{
- return local_days{to_days()};
-}
-
-CONSTCD14
-inline
-bool
-year_month_day::ok() const NOEXCEPT
-{
- if (!(y_.ok() && m_.ok()))
- return false;
- return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-CONSTCD11
-inline
-bool
-operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return x.year() < y.year() ? true
- : (x.year() > y.year() ? false
- : (x.month() < y.month() ? true
- : (x.month() > y.month() ? false
- : (x.day() < y.day()))));
-}
-
-CONSTCD11
-inline
-bool
-operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return y < x;
-}
-
-CONSTCD11
-inline
-bool
-operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return !(y < x);
-}
-
-CONSTCD11
-inline
-bool
-operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
- return !(x < y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd)
-{
- detail::save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.imbue(std::locale::classic());
- os << ymd.year() << '-';
- os.width(2);
- os << static_cast<unsigned>(ymd.month()) << '-';
- os << ymd.day();
- if (!ymd.ok())
- os << " is not a valid date";
- return os;
-}
-
-CONSTCD14
-inline
-year_month_day
-year_month_day::from_days(days dp) NOEXCEPT
-{
- static_assert(std::numeric_limits<unsigned>::digits >= 18,
- "This algorithm has not been ported to a 16 bit unsigned integer");
- static_assert(std::numeric_limits<int>::digits >= 20,
- "This algorithm has not been ported to a 16 bit signed integer");
- auto const z = dp.count() + 719468;
- auto const era = (z >= 0 ? z : z - 146096) / 146097;
- auto const doe = static_cast<unsigned>(z - era * 146097); // [0, 146096]
- auto const yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; // [0, 399]
- auto const y = static_cast<days::rep>(yoe) + era * 400;
- auto const doy = doe - (365*yoe + yoe/4 - yoe/100); // [0, 365]
- auto const mp = (5*doy + 2)/153; // [0, 11]
- auto const d = doy - (153*mp+2)/5 + 1; // [1, 31]
- auto const m = mp < 10 ? mp+3 : mp-9; // [1, 12]
- return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator+(const year_month_day& ymd, const months& dm) NOEXCEPT
-{
- return (ymd.year() / ymd.month() + dm) / ymd.day();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator+(const months& dm, const year_month_day& ymd) NOEXCEPT
-{
- return ymd + dm;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator-(const year_month_day& ymd, const months& dm) NOEXCEPT
-{
- return ymd + (-dm);
-}
-
-CONSTCD11
-inline
-year_month_day
-operator+(const year_month_day& ymd, const years& dy) NOEXCEPT
-{
- return (ymd.year() + dy) / ymd.month() / ymd.day();
-}
-
-CONSTCD11
-inline
-year_month_day
-operator+(const years& dy, const year_month_day& ymd) NOEXCEPT
-{
- return ymd + dy;
-}
-
-CONSTCD11
-inline
-year_month_day
-operator-(const year_month_day& ymd, const years& dy) NOEXCEPT
-{
- return ymd + (-dy);
-}
-
-// year_month_weekday
-
-CONSTCD11
-inline
-year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
- const date::weekday_indexed& wdi)
- NOEXCEPT
- : y_(y)
- , m_(m)
- , wdi_(wdi)
- {}
-
-CONSTCD14
-inline
-year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
- : year_month_weekday(from_days(dp.time_since_epoch()))
- {}
-
-CONSTCD14
-inline
-year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
- : year_month_weekday(from_days(dp.time_since_epoch()))
- {}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator+=(const months& m) NOEXCEPT
-{
- *this = *this + m;
- return *this;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator-=(const months& m) NOEXCEPT
-{
- *this = *this - m;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator+=(const years& y) NOEXCEPT
-{
- *this = *this + y;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator-=(const years& y) NOEXCEPT
-{
- *this = *this - y;
- return *this;
-}
-
-CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT {return m_;}
-
-CONSTCD11
-inline
-weekday
-year_month_weekday::weekday() const NOEXCEPT
-{
- return wdi_.weekday();
-}
-
-CONSTCD11
-inline
-unsigned
-year_month_weekday::index() const NOEXCEPT
-{
- return wdi_.index();
-}
-
-CONSTCD11
-inline
-weekday_indexed
-year_month_weekday::weekday_indexed() const NOEXCEPT
-{
- return wdi_;
-}
-
-CONSTCD14
-inline
-year_month_weekday::operator sys_days() const NOEXCEPT
-{
- return sys_days{to_days()};
-}
-
-CONSTCD14
-inline
-year_month_weekday::operator local_days() const NOEXCEPT
-{
- return local_days{to_days()};
-}
-
-CONSTCD14
-inline
-bool
-year_month_weekday::ok() const NOEXCEPT
-{
- if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1)
- return false;
- if (wdi_.index() <= 4)
- return true;
- auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_/m_/1)) +
- days((wdi_.index()-1)*7 + 1);
- return static_cast<unsigned>(d2.count()) <= static_cast<unsigned>((y_/m_/last).day());
-}
-
-CONSTCD14
-inline
-year_month_weekday
-year_month_weekday::from_days(days d) NOEXCEPT
-{
- sys_days dp{d};
- auto const wd = date::weekday(dp);
- auto const ymd = year_month_day(dp);
- return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day())-1)/7+1]};
-}
-
-CONSTCD14
-inline
-days
-year_month_weekday::to_days() const NOEXCEPT
-{
- auto d = sys_days(y_/m_/1);
- return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index()-1)*7})
- ).time_since_epoch();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
-{
- return x.year() == y.year() && x.month() == y.month() &&
- x.weekday_indexed() == y.weekday_indexed();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi)
-{
- return os << ymwdi.year() << '/' << ymwdi.month()
- << '/' << ymwdi.weekday_indexed();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
-{
- return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT
-{
- return ymwd + dm;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
-{
- return ymwd + (-dm);
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
-{
- return {ymwd.year()+dy, ymwd.month(), ymwd.weekday_indexed()};
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT
-{
- return ymwd + dy;
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
-{
- return ymwd + (-dy);
-}
-
-// year_month_weekday_last
-
-CONSTCD11
-inline
-year_month_weekday_last::year_month_weekday_last(const date::year& y,
- const date::month& m,
- const date::weekday_last& wdl) NOEXCEPT
- : y_(y)
- , m_(m)
- , wdl_(wdl)
- {}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator+=(const months& m) NOEXCEPT
-{
- *this = *this + m;
- return *this;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator-=(const months& m) NOEXCEPT
-{
- *this = *this - m;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator+=(const years& y) NOEXCEPT
-{
- *this = *this + y;
- return *this;
-}
-
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator-=(const years& y) NOEXCEPT
-{
- *this = *this - y;
- return *this;
-}
-
-CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT {return m_;}
-
-CONSTCD11
-inline
-weekday
-year_month_weekday_last::weekday() const NOEXCEPT
-{
- return wdl_.weekday();
-}
-
-CONSTCD11
-inline
-weekday_last
-year_month_weekday_last::weekday_last() const NOEXCEPT
-{
- return wdl_;
-}
-
-CONSTCD14
-inline
-year_month_weekday_last::operator sys_days() const NOEXCEPT
-{
- return sys_days{to_days()};
-}
-
-CONSTCD14
-inline
-year_month_weekday_last::operator local_days() const NOEXCEPT
-{
- return local_days{to_days()};
-}
-
-CONSTCD11
-inline
-bool
-year_month_weekday_last::ok() const NOEXCEPT
-{
- return y_.ok() && m_.ok() && wdl_.ok();
-}
-
-CONSTCD14
-inline
-days
-year_month_weekday_last::to_days() const NOEXCEPT
-{
- auto const d = sys_days(y_/m_/last);
- return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
-}
-
-CONSTCD11
-inline
-bool
-operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
-{
- return x.year() == y.year() && x.month() == y.month() &&
- x.weekday_last() == y.weekday_last();
-}
-
-CONSTCD11
-inline
-bool
-operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
-{
- return !(x == y);
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl)
-{
- return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
-{
- return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT
-{
- return ymwdl + dm;
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
-{
- return ymwdl + (-dm);
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
-{
- return {ymwdl.year()+dy, ymwdl.month(), ymwdl.weekday_last()};
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT
-{
- return ymwdl + dy;
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
-{
- return ymwdl + (-dy);
-}
-
-// year_month from operator/()
-
-CONSTCD11
-inline
-year_month
-operator/(const year& y, const month& m) NOEXCEPT
-{
- return {y, m};
-}
-
-CONSTCD11
-inline
-year_month
-operator/(const year& y, int m) NOEXCEPT
-{
- return y / month(static_cast<unsigned>(m));
-}
-
-// month_day from operator/()
-
-CONSTCD11
-inline
-month_day
-operator/(const month& m, const day& d) NOEXCEPT
-{
- return {m, d};
-}
-
-CONSTCD11
-inline
-month_day
-operator/(const day& d, const month& m) NOEXCEPT
-{
- return m / d;
-}
-
-CONSTCD11
-inline
-month_day
-operator/(const month& m, int d) NOEXCEPT
-{
- return m / day(static_cast<unsigned>(d));
-}
-
-CONSTCD11
-inline
-month_day
-operator/(int m, const day& d) NOEXCEPT
-{
- return month(static_cast<unsigned>(m)) / d;
-}
-
-CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT {return m / d;}
-
-// month_day_last from operator/()
-
-CONSTCD11
-inline
-month_day_last
-operator/(const month& m, last_spec) NOEXCEPT
-{
- return month_day_last{m};
-}
-
-CONSTCD11
-inline
-month_day_last
-operator/(last_spec, const month& m) NOEXCEPT
-{
- return m/last;
-}
-
-CONSTCD11
-inline
-month_day_last
-operator/(int m, last_spec) NOEXCEPT
-{
- return month(static_cast<unsigned>(m))/last;
-}
-
-CONSTCD11
-inline
-month_day_last
-operator/(last_spec, int m) NOEXCEPT
-{
- return m/last;
-}
-
-// month_weekday from operator/()
-
-CONSTCD11
-inline
-month_weekday
-operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT
-{
- return {m, wdi};
-}
-
-CONSTCD11
-inline
-month_weekday
-operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT
-{
- return m / wdi;
-}
-
-CONSTCD11
-inline
-month_weekday
-operator/(int m, const weekday_indexed& wdi) NOEXCEPT
-{
- return month(static_cast<unsigned>(m)) / wdi;
-}
-
-CONSTCD11
-inline
-month_weekday
-operator/(const weekday_indexed& wdi, int m) NOEXCEPT
-{
- return m / wdi;
-}
-
-// month_weekday_last from operator/()
-
-CONSTCD11
-inline
-month_weekday_last
-operator/(const month& m, const weekday_last& wdl) NOEXCEPT
-{
- return {m, wdl};
-}
-
-CONSTCD11
-inline
-month_weekday_last
-operator/(const weekday_last& wdl, const month& m) NOEXCEPT
-{
- return m / wdl;
-}
-
-CONSTCD11
-inline
-month_weekday_last
-operator/(int m, const weekday_last& wdl) NOEXCEPT
-{
- return month(static_cast<unsigned>(m)) / wdl;
-}
-
-CONSTCD11
-inline
-month_weekday_last
-operator/(const weekday_last& wdl, int m) NOEXCEPT
-{
- return m / wdl;
-}
-
-// year_month_day from operator/()
-
-CONSTCD11
-inline
-year_month_day
-operator/(const year_month& ym, const day& d) NOEXCEPT
-{
- return {ym.year(), ym.month(), d};
-}
-
-CONSTCD11
-inline
-year_month_day
-operator/(const year_month& ym, int d) NOEXCEPT
-{
- return ym / day(static_cast<unsigned>(d));
-}
-
-CONSTCD11
-inline
-year_month_day
-operator/(const year& y, const month_day& md) NOEXCEPT
-{
- return y / md.month() / md.day();
-}
-
-CONSTCD11
-inline
-year_month_day
-operator/(int y, const month_day& md) NOEXCEPT
-{
- return year(y) / md;
-}
-
-CONSTCD11
-inline
-year_month_day
-operator/(const month_day& md, const year& y) NOEXCEPT
-{
- return y / md;
-}
-
-CONSTCD11
-inline
-year_month_day
-operator/(const month_day& md, int y) NOEXCEPT
-{
- return year(y) / md;
-}
-
-// year_month_day_last from operator/()
-
-CONSTCD11
-inline
-year_month_day_last
-operator/(const year_month& ym, last_spec) NOEXCEPT
-{
- return {ym.year(), month_day_last{ym.month()}};
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator/(const year& y, const month_day_last& mdl) NOEXCEPT
-{
- return {y, mdl};
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator/(int y, const month_day_last& mdl) NOEXCEPT
-{
- return year(y) / mdl;
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator/(const month_day_last& mdl, const year& y) NOEXCEPT
-{
- return y / mdl;
-}
-
-CONSTCD11
-inline
-year_month_day_last
-operator/(const month_day_last& mdl, int y) NOEXCEPT
-{
- return year(y) / mdl;
-}
-
-// year_month_weekday from operator/()
-
-CONSTCD11
-inline
-year_month_weekday
-operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT
-{
- return {ym.year(), ym.month(), wdi};
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator/(const year& y, const month_weekday& mwd) NOEXCEPT
-{
- return {y, mwd.month(), mwd.weekday_indexed()};
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator/(int y, const month_weekday& mwd) NOEXCEPT
-{
- return year(y) / mwd;
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator/(const month_weekday& mwd, const year& y) NOEXCEPT
-{
- return y / mwd;
-}
-
-CONSTCD11
-inline
-year_month_weekday
-operator/(const month_weekday& mwd, int y) NOEXCEPT
-{
- return year(y) / mwd;
-}
-
-// year_month_weekday_last from operator/()
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT
-{
- return {ym.year(), ym.month(), wdl};
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT
-{
- return {y, mwdl.month(), mwdl.weekday_last()};
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator/(int y, const month_weekday_last& mwdl) NOEXCEPT
-{
- return year(y) / mwdl;
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT
-{
- return y / mwdl;
-}
-
-CONSTCD11
-inline
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, int y) NOEXCEPT
-{
- return year(y) / mwdl;
-}
-
-template <class Duration>
-struct fields;
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const fields<Duration>& fds, const std::string* abbrev = nullptr,
- const std::chrono::seconds* offset_sec = nullptr);
-
-template <class CharT, class Traits, class Duration, class Alloc>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr);
-
-// hh_mm_ss
-
-namespace detail
-{
-
-struct undocumented {explicit undocumented() = default;};
-
-// width<n>::value is the number of fractional decimal digits in 1/n
-// width<0>::value and width<1>::value are defined to be 0
-// If 1/n takes more than 18 fractional decimal digits,
-// the result is truncated to 19.
-// Example: width<2>::value == 1
-// Example: width<3>::value == 19
-// Example: width<4>::value == 2
-// Example: width<10>::value == 1
-// Example: width<1000>::value == 3
-template <std::uint64_t n, std::uint64_t d = 10, unsigned w = 0,
- bool should_continue = !(n < 2) && d != 0 && (w < 19)>
-struct width
-{
- static CONSTDATA unsigned value = 1 + width<n, d%n*10, w+1>::value;
-};
-
-template <std::uint64_t n, std::uint64_t d, unsigned w>
-struct width<n, d, w, false>
-{
- static CONSTDATA unsigned value = 0;
-};
-
-template <unsigned exp>
-struct static_pow10
-{
-private:
- static CONSTDATA std::uint64_t h = static_pow10<exp/2>::value;
-public:
- static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
-};
-
-template <>
-struct static_pow10<0>
-{
- static CONSTDATA std::uint64_t value = 1;
-};
-
-template <class Duration>
-class decimal_format_seconds
-{
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- using rep = typename CT::rep;
-public:
- static unsigned constexpr width = detail::width<CT::period::den>::value < 19 ?
- detail::width<CT::period::den>::value : 6u;
- using precision = std::chrono::duration<rep,
- std::ratio<1, static_pow10<width>::value>>;
-
-private:
- std::chrono::seconds s_;
- precision sub_s_;
-
-public:
- CONSTCD11 decimal_format_seconds()
- : s_()
- , sub_s_()
- {}
-
- CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
- : s_(std::chrono::duration_cast<std::chrono::seconds>(d))
- , sub_s_(std::chrono::duration_cast<precision>(d - s_))
- {}
-
- CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;}
- CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;}
- CONSTCD11 precision subseconds() const NOEXCEPT {return sub_s_;}
-
- CONSTCD14 precision to_duration() const NOEXCEPT
- {
- return s_ + sub_s_;
- }
-
- CONSTCD11 bool in_conventional_range() const NOEXCEPT
- {
- return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1};
- }
-
- template <class CharT, class Traits>
- friend
- std::basic_ostream<CharT, Traits>&
- operator<<(std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x)
- {
- return x.print(os, std::chrono::treat_as_floating_point<rep>{});
- }
-
- template <class CharT, class Traits>
- std::basic_ostream<CharT, Traits>&
- print(std::basic_ostream<CharT, Traits>& os, std::true_type) const
- {
- date::detail::save_ostream<CharT, Traits> _(os);
- std::chrono::duration<rep> d = s_ + sub_s_;
- if (d < std::chrono::seconds{10})
- os << '0';
- os << std::fixed << d.count();
- return os;
- }
-
- template <class CharT, class Traits>
- std::basic_ostream<CharT, Traits>&
- print(std::basic_ostream<CharT, Traits>& os, std::false_type) const
- {
- date::detail::save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << s_.count();
- if (width > 0)
- {
-#if !ONLY_C_LOCALE
- os << std::use_facet<std::numpunct<CharT>>(os.getloc()).decimal_point();
-#else
- os << '.';
-#endif
- date::detail::save_ostream<CharT, Traits> _s(os);
- os.imbue(std::locale::classic());
- os.width(width);
- os << sub_s_.count();
- }
- return os;
- }
-};
-
-template <class Rep, class Period>
-inline
-CONSTCD11
-typename std::enable_if
- <
- std::numeric_limits<Rep>::is_signed,
- std::chrono::duration<Rep, Period>
- >::type
-abs(std::chrono::duration<Rep, Period> d)
-{
- return d >= d.zero() ? +d : -d;
-}
-
-template <class Rep, class Period>
-inline
-CONSTCD11
-typename std::enable_if
- <
- !std::numeric_limits<Rep>::is_signed,
- std::chrono::duration<Rep, Period>
- >::type
-abs(std::chrono::duration<Rep, Period> d)
-{
- return d;
-}
-
-} // namespace detail
-
-template <class Duration>
-class hh_mm_ss
-{
- using dfs = detail::decimal_format_seconds<typename std::common_type<Duration,
- std::chrono::seconds>::type>;
-
- std::chrono::hours h_;
- std::chrono::minutes m_;
- dfs s_;
- bool neg_;
-
-public:
- static unsigned CONSTDATA fractional_width = dfs::width;
- using precision = typename dfs::precision;
-
- CONSTCD11 hh_mm_ss() NOEXCEPT
- : hh_mm_ss(Duration::zero())
- {}
-
- CONSTCD11 explicit hh_mm_ss(Duration d) NOEXCEPT
- : h_(std::chrono::duration_cast<std::chrono::hours>(detail::abs(d)))
- , m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(d)) - h_)
- , s_(detail::abs(d) - h_ - m_)
- , neg_(d < Duration::zero())
- {}
-
- CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
- CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
- CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();}
- CONSTCD14 std::chrono::seconds&
- seconds(detail::undocumented) NOEXCEPT {return s_.seconds();}
- CONSTCD11 precision subseconds() const NOEXCEPT {return s_.subseconds();}
- CONSTCD11 bool is_negative() const NOEXCEPT {return neg_;}
-
- CONSTCD11 explicit operator precision() const NOEXCEPT {return to_duration();}
- CONSTCD11 precision to_duration() const NOEXCEPT
- {return (s_.to_duration() + m_ + h_) * (1-2*neg_);}
-
- CONSTCD11 bool in_conventional_range() const NOEXCEPT
- {
- return !neg_ && h_ < days{1} && m_ < std::chrono::hours{1} &&
- s_.in_conventional_range();
- }
-
-private:
-
- template <class charT, class traits>
- friend
- std::basic_ostream<charT, traits>&
- operator<<(std::basic_ostream<charT, traits>& os, hh_mm_ss const& tod)
- {
- if (tod.is_negative())
- os << '-';
- if (tod.h_ < std::chrono::hours{10})
- os << '0';
- os << tod.h_.count() << ':';
- if (tod.m_ < std::chrono::minutes{10})
- os << '0';
- os << tod.m_.count() << ':' << tod.s_;
- return os;
- }
-
- template <class CharT, class Traits, class Duration2>
- friend
- std::basic_ostream<CharT, Traits>&
- date::to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const fields<Duration2>& fds, const std::string* abbrev,
- const std::chrono::seconds* offset_sec);
-
- template <class CharT, class Traits, class Duration2, class Alloc>
- friend
- std::basic_istream<CharT, Traits>&
- date::from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- fields<Duration2>& fds,
- std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
-};
-
-inline
-CONSTCD14
-bool
-is_am(std::chrono::hours const& h) NOEXCEPT
-{
- using std::chrono::hours;
- return hours{0} <= h && h < hours{12};
-}
-
-inline
-CONSTCD14
-bool
-is_pm(std::chrono::hours const& h) NOEXCEPT
-{
- using std::chrono::hours;
- return hours{12} <= h && h < hours{24};
-}
-
-inline
-CONSTCD14
-std::chrono::hours
-make12(std::chrono::hours h) NOEXCEPT
-{
- using std::chrono::hours;
- if (h < hours{12})
- {
- if (h == hours{0})
- h = hours{12};
- }
- else
- {
- if (h != hours{12})
- h = h - hours{12};
- }
- return h;
-}
-
-inline
-CONSTCD14
-std::chrono::hours
-make24(std::chrono::hours h, bool is_pm) NOEXCEPT
-{
- using std::chrono::hours;
- if (is_pm)
- {
- if (h != hours{12})
- h = h + hours{12};
- }
- else if (h == hours{12})
- h = hours{0};
- return h;
-}
-
-template <class Duration>
-using time_of_day = hh_mm_ss<Duration>;
-
-template <class Rep, class Period,
- class = typename std::enable_if
- <!std::chrono::treat_as_floating_point<Rep>::value>::type>
-CONSTCD11
-inline
-hh_mm_ss<std::chrono::duration<Rep, Period>>
-make_time(const std::chrono::duration<Rep, Period>& d)
-{
- return hh_mm_ss<std::chrono::duration<Rep, Period>>(d);
-}
-
-template <class CharT, class Traits, class Duration>
-inline
-typename std::enable_if
-<
- !std::chrono::treat_as_floating_point<typename Duration::rep>::value &&
- std::ratio_less<typename Duration::period, days::period>::value
- , std::basic_ostream<CharT, Traits>&
->::type
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
-{
- auto const dp = date::floor<days>(tp);
- return os << year_month_day(dp) << ' ' << make_time(tp-dp);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_days& dp)
-{
- return os << year_month_day(dp);
-}
-
-template <class CharT, class Traits, class Duration>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut)
-{
- return (os << sys_time<Duration>{ut.time_since_epoch()});
-}
-
-namespace detail
-{
-
-template <class CharT, std::size_t N>
-class string_literal;
-
-template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-inline
-CONSTCD14
-string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
- N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
-
-template <class CharT, std::size_t N>
-class string_literal
-{
- CharT p_[N];
-
- CONSTCD11 string_literal() NOEXCEPT
- : p_{}
- {}
-
-public:
- using const_iterator = const CharT*;
-
- string_literal(string_literal const&) = default;
- string_literal& operator=(string_literal const&) = delete;
-
- template <std::size_t N1 = 2,
- class = typename std::enable_if<N1 == N>::type>
- CONSTCD11 string_literal(CharT c) NOEXCEPT
- : p_{c}
- {
- }
-
- template <std::size_t N1 = 3,
- class = typename std::enable_if<N1 == N>::type>
- CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT
- : p_{c1, c2}
- {
- }
-
- template <std::size_t N1 = 4,
- class = typename std::enable_if<N1 == N>::type>
- CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT
- : p_{c1, c2, c3}
- {
- }
-
- CONSTCD14 string_literal(const CharT(&a)[N]) NOEXCEPT
- : p_{}
- {
- for (std::size_t i = 0; i < N; ++i)
- p_[i] = a[i];
- }
-
- template <class U = CharT,
- class = typename std::enable_if<(1 < sizeof(U))>::type>
- CONSTCD14 string_literal(const char(&a)[N]) NOEXCEPT
- : p_{}
- {
- for (std::size_t i = 0; i < N; ++i)
- p_[i] = a[i];
- }
-
- template <class CharT2,
- class = typename std::enable_if<!std::is_same<CharT2, CharT>::value>::type>
- CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT
- : p_{}
- {
- for (std::size_t i = 0; i < N; ++i)
- p_[i] = a[i];
- }
-
- CONSTCD11 const CharT* data() const NOEXCEPT {return p_;}
- CONSTCD11 std::size_t size() const NOEXCEPT {return N-1;}
-
- CONSTCD11 const_iterator begin() const NOEXCEPT {return p_;}
- CONSTCD11 const_iterator end() const NOEXCEPT {return p_ + N-1;}
-
- CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT
- {
- return p_[n];
- }
-
- template <class Traits>
- friend
- std::basic_ostream<CharT, Traits>&
- operator<<(std::basic_ostream<CharT, Traits>& os, const string_literal& s)
- {
- return os << s.p_;
- }
-
- template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
- friend
- CONSTCD14
- string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
- N1 + N2 - 1>
- operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
-};
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 3>
-operator+(const string_literal<CharT, 2>& x, const string_literal<CharT, 2>& y) NOEXCEPT
-{
- return string_literal<CharT, 3>(x[0], y[0]);
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 4>
-operator+(const string_literal<CharT, 3>& x, const string_literal<CharT, 2>& y) NOEXCEPT
-{
- return string_literal<CharT, 4>(x[0], x[1], y[0]);
-}
-
-template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-CONSTCD14
-inline
-string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
- N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT
-{
- using CT = typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type;
-
- string_literal<CT, N1 + N2 - 1> r;
- std::size_t i = 0;
- for (; i < N1-1; ++i)
- r.p_[i] = CT(x.p_[i]);
- for (std::size_t j = 0; j < N2; ++j, ++i)
- r.p_[i] = CT(y.p_[j]);
-
- return r;
-}
-
-
-template <class CharT, class Traits, class Alloc, std::size_t N>
-inline
-std::basic_string<CharT, Traits, Alloc>
-operator+(std::basic_string<CharT, Traits, Alloc> x, const string_literal<CharT, N>& y)
-{
- x.append(y.data(), y.size());
- return x;
-}
-
-#if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \
- && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
-
-template <class CharT,
- class = std::enable_if_t<std::is_same<CharT, char>::value ||
- std::is_same<CharT, wchar_t>::value ||
- std::is_same<CharT, char16_t>::value ||
- std::is_same<CharT, char32_t>::value>>
-CONSTCD14
-inline
-string_literal<CharT, 2>
-msl(CharT c) NOEXCEPT
-{
- return string_literal<CharT, 2>{c};
-}
-
-CONSTCD14
-inline
-std::size_t
-to_string_len(std::intmax_t i)
-{
- std::size_t r = 0;
- do
- {
- i /= 10;
- ++r;
- } while (i > 0);
- return r;
-}
-
-template <std::intmax_t N>
-CONSTCD14
-inline
-std::enable_if_t
-<
- N < 10,
- string_literal<char, to_string_len(N)+1>
->
-msl() NOEXCEPT
-{
- return msl(char(N % 10 + '0'));
-}
-
-template <std::intmax_t N>
-CONSTCD14
-inline
-std::enable_if_t
-<
- 10 <= N,
- string_literal<char, to_string_len(N)+1>
->
-msl() NOEXCEPT
-{
- return msl<N/10>() + msl(char(N % 10 + '0'));
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14
-inline
-std::enable_if_t
-<
- std::ratio<N, D>::type::den != 1,
- string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) +
- to_string_len(std::ratio<N, D>::type::den) + 4>
->
-msl(std::ratio<N, D>) NOEXCEPT
-{
- using R = typename std::ratio<N, D>::type;
- return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) +
- msl<R::den>() + msl(CharT{']'});
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14
-inline
-std::enable_if_t
-<
- std::ratio<N, D>::type::den == 1,
- string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>
->
-msl(std::ratio<N, D>) NOEXCEPT
-{
- using R = typename std::ratio<N, D>::type;
- return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
-}
-
-
-#else // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
-
-inline
-std::string
-to_string(std::uint64_t x)
-{
- return std::to_string(x);
-}
-
-template <class CharT>
-inline
-std::basic_string<CharT>
-to_string(std::uint64_t x)
-{
- auto y = std::to_string(x);
- return std::basic_string<CharT>(y.begin(), y.end());
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-inline
-typename std::enable_if
-<
- std::ratio<N, D>::type::den != 1,
- std::basic_string<CharT>
->::type
-msl(std::ratio<N, D>)
-{
- using R = typename std::ratio<N, D>::type;
- return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
- to_string<CharT>(R::den) + CharT{']'};
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-inline
-typename std::enable_if
-<
- std::ratio<N, D>::type::den == 1,
- std::basic_string<CharT>
->::type
-msl(std::ratio<N, D>)
-{
- using R = typename std::ratio<N, D>::type;
- return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
-}
-
-#endif // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::atto) NOEXCEPT
-{
- return string_literal<CharT, 2>{'a'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::femto) NOEXCEPT
-{
- return string_literal<CharT, 2>{'f'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::pico) NOEXCEPT
-{
- return string_literal<CharT, 2>{'p'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::nano) NOEXCEPT
-{
- return string_literal<CharT, 2>{'n'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-typename std::enable_if
-<
- std::is_same<CharT, char>::value,
- string_literal<char, 3>
->::type
-msl(std::micro) NOEXCEPT
-{
- return string_literal<char, 3>{'\xC2', '\xB5'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-typename std::enable_if
-<
- !std::is_same<CharT, char>::value,
- string_literal<CharT, 2>
->::type
-msl(std::micro) NOEXCEPT
-{
- return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::milli) NOEXCEPT
-{
- return string_literal<CharT, 2>{'m'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::centi) NOEXCEPT
-{
- return string_literal<CharT, 2>{'c'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 3>
-msl(std::deca) NOEXCEPT
-{
- return string_literal<CharT, 3>{'d', 'a'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::deci) NOEXCEPT
-{
- return string_literal<CharT, 2>{'d'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::hecto) NOEXCEPT
-{
- return string_literal<CharT, 2>{'h'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::kilo) NOEXCEPT
-{
- return string_literal<CharT, 2>{'k'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::mega) NOEXCEPT
-{
- return string_literal<CharT, 2>{'M'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::giga) NOEXCEPT
-{
- return string_literal<CharT, 2>{'G'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::tera) NOEXCEPT
-{
- return string_literal<CharT, 2>{'T'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::peta) NOEXCEPT
-{
- return string_literal<CharT, 2>{'P'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::exa) NOEXCEPT
-{
- return string_literal<CharT, 2>{'E'};
-}
-
-template <class CharT, class Period>
-CONSTCD11
-inline
-auto
-get_units(Period p)
- -> decltype(msl<CharT>(p) + string_literal<CharT, 2>{'s'})
-{
- return msl<CharT>(p) + string_literal<CharT, 2>{'s'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<1>)
-{
- return string_literal<CharT, 2>{'s'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<3600>)
-{
- return string_literal<CharT, 2>{'h'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 4>
-get_units(std::ratio<60>)
-{
- return string_literal<CharT, 4>{'m', 'i', 'n'};
-}
-
-template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<86400>)
-{
- return string_literal<CharT, 2>{'d'};
-}
-
-template <class CharT, class Traits = std::char_traits<CharT>>
-struct make_string;
-
-template <>
-struct make_string<char>
-{
- template <class Rep>
- static
- std::string
- from(Rep n)
- {
- return std::to_string(n);
- }
-};
-
-template <class Traits>
-struct make_string<char, Traits>
-{
- template <class Rep>
- static
- std::basic_string<char, Traits>
- from(Rep n)
- {
- auto s = std::to_string(n);
- return std::basic_string<char, Traits>(s.begin(), s.end());
- }
-};
-
-template <>
-struct make_string<wchar_t>
-{
- template <class Rep>
- static
- std::wstring
- from(Rep n)
- {
- return std::to_wstring(n);
- }
-};
-
-template <class Traits>
-struct make_string<wchar_t, Traits>
-{
- template <class Rep>
- static
- std::basic_string<wchar_t, Traits>
- from(Rep n)
- {
- auto s = std::to_wstring(n);
- return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
- }
-};
-
-} // namespace detail
-
-// to_stream
-
-CONSTDATA year nanyear{-32768};
-
-template <class Duration>
-struct fields
-{
- year_month_day ymd{nanyear/0/0};
- weekday wd{8u};
- hh_mm_ss<Duration> tod{};
- bool has_tod = false;
-
- fields() = default;
-
- fields(year_month_day ymd_) : ymd(ymd_) {}
- fields(weekday wd_) : wd(wd_) {}
- fields(hh_mm_ss<Duration> tod_) : tod(tod_), has_tod(true) {}
-
- fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
- fields(year_month_day ymd_, hh_mm_ss<Duration> tod_) : ymd(ymd_), tod(tod_),
- has_tod(true) {}
-
- fields(weekday wd_, hh_mm_ss<Duration> tod_) : wd(wd_), tod(tod_), has_tod(true) {}
-
- fields(year_month_day ymd_, weekday wd_, hh_mm_ss<Duration> tod_)
- : ymd(ymd_)
- , wd(wd_)
- , tod(tod_)
- , has_tod(true)
- {}
-};
-
-namespace detail
-{
-
-template <class CharT, class Traits, class Duration>
-unsigned
-extract_weekday(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
-{
- if (!fds.ymd.ok() && !fds.wd.ok())
- {
- // fds does not contain a valid weekday
- os.setstate(std::ios::failbit);
- return 8;
- }
- weekday wd;
- if (fds.ymd.ok())
- {
- wd = weekday{sys_days(fds.ymd)};
- if (fds.wd.ok() && wd != fds.wd)
- {
- // fds.ymd and fds.wd are inconsistent
- os.setstate(std::ios::failbit);
- return 8;
- }
- }
- else
- wd = fds.wd;
- return static_cast<unsigned>((wd - Sunday).count());
-}
-
-template <class CharT, class Traits, class Duration>
-unsigned
-extract_month(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
-{
- if (!fds.ymd.month().ok())
- {
- // fds does not contain a valid month
- os.setstate(std::ios::failbit);
- return 0;
- }
- return static_cast<unsigned>(fds.ymd.month());
-}
-
-} // namespace detail
-
-#if ONLY_C_LOCALE
-
-namespace detail
-{
-
-inline
-std::pair<const std::string*, const std::string*>
-weekday_names()
-{
- static const std::string nm[] =
- {
- "Sunday",
- "Monday",
- "Tuesday",
- "Wednesday",
- "Thursday",
- "Friday",
- "Saturday",
- "Sun",
- "Mon",
- "Tue",
- "Wed",
- "Thu",
- "Fri",
- "Sat"
- };
- return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
-}
-
-inline
-std::pair<const std::string*, const std::string*>
-month_names()
-{
- static const std::string nm[] =
- {
- "January",
- "February",
- "March",
- "April",
- "May",
- "June",
- "July",
- "August",
- "September",
- "October",
- "November",
- "December",
- "Jan",
- "Feb",
- "Mar",
- "Apr",
- "May",
- "Jun",
- "Jul",
- "Aug",
- "Sep",
- "Oct",
- "Nov",
- "Dec"
- };
- return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
-}
-
-inline
-std::pair<const std::string*, const std::string*>
-ampm_names()
-{
- static const std::string nm[] =
- {
- "AM",
- "PM"
- };
- return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
-}
-
-template <class CharT, class Traits, class FwdIter>
-FwdIter
-scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke)
-{
- size_t nkw = static_cast<size_t>(std::distance(kb, ke));
- const unsigned char doesnt_match = '\0';
- const unsigned char might_match = '\1';
- const unsigned char does_match = '\2';
- unsigned char statbuf[100];
- unsigned char* status = statbuf;
- std::unique_ptr<unsigned char, void(*)(void*)> stat_hold(0, free);
- if (nkw > sizeof(statbuf))
- {
- status = (unsigned char*)std::malloc(nkw);
- if (status == nullptr)
- throw std::bad_alloc();
- stat_hold.reset(status);
- }
- size_t n_might_match = nkw; // At this point, any keyword might match
- size_t n_does_match = 0; // but none of them definitely do
- // Initialize all statuses to might_match, except for "" keywords are does_match
- unsigned char* st = status;
- for (auto ky = kb; ky != ke; ++ky, ++st)
- {
- if (!ky->empty())
- *st = might_match;
- else
- {
- *st = does_match;
- --n_might_match;
- ++n_does_match;
- }
- }
- // While there might be a match, test keywords against the next CharT
- for (size_t indx = 0; is && n_might_match > 0; ++indx)
- {
- // Peek at the next CharT but don't consume it
- auto ic = is.peek();
- if (ic == EOF)
- {
- is.setstate(std::ios::eofbit);
- break;
- }
- auto c = static_cast<char>(toupper(ic));
- bool consume = false;
- // For each keyword which might match, see if the indx character is c
- // If a match if found, consume c
- // If a match is found, and that is the last character in the keyword,
- // then that keyword matches.
- // If the keyword doesn't match this character, then change the keyword
- // to doesn't match
- st = status;
- for (auto ky = kb; ky != ke; ++ky, ++st)
- {
- if (*st == might_match)
- {
- if (c == static_cast<char>(toupper((*ky)[indx])))
- {
- consume = true;
- if (ky->size() == indx+1)
- {
- *st = does_match;
- --n_might_match;
- ++n_does_match;
- }
- }
- else
- {
- *st = doesnt_match;
- --n_might_match;
- }
- }
- }
- // consume if we matched a character
- if (consume)
- {
- (void)is.get();
- // If we consumed a character and there might be a matched keyword that
- // was marked matched on a previous iteration, then such keywords
- // are now marked as not matching.
- if (n_might_match + n_does_match > 1)
- {
- st = status;
- for (auto ky = kb; ky != ke; ++ky, ++st)
- {
- if (*st == does_match && ky->size() != indx+1)
- {
- *st = doesnt_match;
- --n_does_match;
- }
- }
- }
- }
- }
- // We've exited the loop because we hit eof and/or we have no more "might matches".
- // Return the first matching result
- for (st = status; kb != ke; ++kb, ++st)
- if (*st == does_match)
- break;
- if (kb == ke)
- is.setstate(std::ios::failbit);
- return kb;
-}
-
-} // namespace detail
-
-#endif // ONLY_C_LOCALE
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const fields<Duration>& fds, const std::string* abbrev,
- const std::chrono::seconds* offset_sec)
-{
-#if ONLY_C_LOCALE
- using detail::weekday_names;
- using detail::month_names;
- using detail::ampm_names;
-#endif
- using detail::save_ostream;
- using detail::get_units;
- using detail::extract_weekday;
- using detail::extract_month;
- using std::ios;
- using std::chrono::duration_cast;
- using std::chrono::seconds;
- using std::chrono::minutes;
- using std::chrono::hours;
- date::detail::save_ostream<CharT, Traits> ss(os);
- os.fill(' ');
- os.flags(std::ios::skipws | std::ios::dec);
- os.width(0);
- tm tm{};
- bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero();
-#if !ONLY_C_LOCALE
- auto& facet = std::use_facet<std::time_put<CharT>>(os.getloc());
-#endif
- const CharT* command = nullptr;
- CharT modified = CharT{};
- for (; *fmt; ++fmt)
- {
- switch (*fmt)
- {
- case 'a':
- case 'A':
- if (command)
- {
- if (modified == CharT{})
- {
- tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
- if (os.fail())
- return os;
-#if !ONLY_C_LOCALE
- const CharT f[] = {'%', *fmt};
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else // ONLY_C_LOCALE
- os << weekday_names().first[tm.tm_wday+7*(*fmt == 'a')];
-#endif // ONLY_C_LOCALE
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'b':
- case 'B':
- case 'h':
- if (command)
- {
- if (modified == CharT{})
- {
- tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
-#if !ONLY_C_LOCALE
- const CharT f[] = {'%', *fmt};
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else // ONLY_C_LOCALE
- os << month_names().first[tm.tm_mon+12*(*fmt != 'B')];
-#endif // ONLY_C_LOCALE
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'c':
- case 'x':
- if (command)
- {
- if (modified == CharT{'O'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.ok())
- os.setstate(std::ios::failbit);
- if (*fmt == 'c' && !fds.has_tod)
- os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
- tm = std::tm{};
- auto const& ymd = fds.ymd;
- auto ld = local_days(ymd);
- if (*fmt == 'c')
- {
- tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
- tm.tm_min = static_cast<int>(fds.tod.minutes().count());
- tm.tm_hour = static_cast<int>(fds.tod.hours().count());
- }
- tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
- tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
- tm.tm_year = static_cast<int>(ymd.year()) - 1900;
- tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
- if (os.fail())
- return os;
- tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
- CharT f[3] = {'%'};
- auto fe = std::begin(f) + 1;
- if (modified == CharT{'E'})
- *fe++ = modified;
- *fe++ = *fmt;
- facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
-#else // ONLY_C_LOCALE
- if (*fmt == 'c')
- {
- auto wd = static_cast<int>(extract_weekday(os, fds));
- os << weekday_names().first[static_cast<unsigned>(wd)+7]
- << ' ';
- os << month_names().first[extract_month(os, fds)-1+12] << ' ';
- auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
- if (d < 10)
- os << ' ';
- os << d << ' '
- << make_time(duration_cast<seconds>(fds.tod.to_duration()))
- << ' ' << fds.ymd.year();
-
- }
- else // *fmt == 'x'
- {
- auto const& ymd = fds.ymd;
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
- os.width(2);
- os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
- os.width(2);
- os << static_cast<int>(ymd.year()) % 100;
- }
-#endif // ONLY_C_LOCALE
- }
- command = nullptr;
- modified = CharT{};
- }
- else
- os << *fmt;
- break;
- case 'C':
- if (command)
- {
- if (modified == CharT{'O'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.year().ok())
- os.setstate(std::ios::failbit);
- auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- if (y >= 0)
- {
- os.width(2);
- os << y/100;
- }
- else
- {
- os << CharT{'-'};
- os.width(2);
- os << -(y-99)/100;
- }
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'E'})
- {
- tm.tm_year = y - 1900;
- CharT f[3] = {'%', 'E', 'C'};
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- command = nullptr;
- modified = CharT{};
- }
- else
- os << *fmt;
- break;
- case 'd':
- case 'e':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.day().ok())
- os.setstate(std::ios::failbit);
- auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- save_ostream<CharT, Traits> _(os);
- if (*fmt == CharT{'d'})
- os.fill('0');
- else
- os.fill(' ');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << d;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- tm.tm_mday = d;
- CharT f[3] = {'%', 'O', *fmt};
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- command = nullptr;
- modified = CharT{};
- }
- else
- os << *fmt;
- break;
- case 'D':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.ymd.ok())
- os.setstate(std::ios::failbit);
- auto const& ymd = fds.ymd;
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
- os.width(2);
- os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
- os.width(2);
- os << static_cast<int>(ymd.year()) % 100;
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'F':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.ymd.ok())
- os.setstate(std::ios::failbit);
- auto const& ymd = fds.ymd;
- save_ostream<CharT, Traits> _(os);
- os.imbue(std::locale::classic());
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(4);
- os << static_cast<int>(ymd.year()) << CharT{'-'};
- os.width(2);
- os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
- os.width(2);
- os << static_cast<unsigned>(ymd.day());
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'g':
- case 'G':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.ymd.ok())
- os.setstate(std::ios::failbit);
- auto ld = local_days(fds.ymd);
- auto y = year_month_day{ld + days{3}}.year();
- auto start = local_days((y-years{1})/December/Thursday[last]) +
- (Monday-Thursday);
- if (ld < start)
- --y;
- if (*fmt == CharT{'G'})
- os << y;
- else
- {
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(2);
- os << std::abs(static_cast<int>(y)) % 100;
- }
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'H':
- case 'I':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- if (insert_negative)
- {
- os << '-';
- insert_negative = false;
- }
- auto hms = fds.tod;
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- auto h = *fmt == CharT{'I'} ? date::make12(hms.hours()) : hms.hours();
- if (h < hours{10})
- os << CharT{'0'};
- os << h.count();
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_hour = static_cast<int>(hms.hours().count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'j':
- if (command)
- {
- if (modified == CharT{})
- {
- if (fds.ymd.ok() || fds.has_tod)
- {
- days doy;
- if (fds.ymd.ok())
- {
- auto ld = local_days(fds.ymd);
- auto y = fds.ymd.year();
- doy = ld - local_days(y/January/1) + days{1};
- }
- else
- {
- doy = duration_cast<days>(fds.tod.to_duration());
- }
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.flags(std::ios::dec | std::ios::right);
- os.width(3);
- os << doy.count();
- }
- else
- {
- os.setstate(std::ios::failbit);
- }
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'm':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.month().ok())
- os.setstate(std::ios::failbit);
- auto m = static_cast<unsigned>(fds.ymd.month());
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- if (m < 10)
- os << CharT{'0'};
- os << m;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_mon = static_cast<int>(m-1);
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'M':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- if (insert_negative)
- {
- os << '-';
- insert_negative = false;
- }
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- if (fds.tod.minutes() < minutes{10})
- os << CharT{'0'};
- os << fds.tod.minutes().count();
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_min = static_cast<int>(fds.tod.minutes().count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'n':
- if (command)
- {
- if (modified == CharT{})
- os << CharT{'\n'};
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'p':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
- const CharT f[] = {'%', *fmt};
- tm.tm_hour = static_cast<int>(fds.tod.hours().count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else
- if (date::is_am(fds.tod.hours()))
- os << ampm_names().first[0];
- else
- os << ampm_names().first[1];
-#endif
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'Q':
- case 'q':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- auto d = fds.tod.to_duration();
- if (*fmt == 'q')
- os << get_units<CharT>(typename decltype(d)::period::type{});
- else
- os << d.count();
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'r':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
- const CharT f[] = {'%', *fmt};
- tm.tm_hour = static_cast<int>(fds.tod.hours().count());
- tm.tm_min = static_cast<int>(fds.tod.minutes().count());
- tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else
- hh_mm_ss<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
- save_ostream<CharT, Traits> _(os);
- os.fill('0');
- os.width(2);
- os << date::make12(tod.hours()).count() << CharT{':'};
- os.width(2);
- os << tod.minutes().count() << CharT{':'};
- os.width(2);
- os << tod.seconds().count() << CharT{' '};
- if (date::is_am(tod.hours()))
- os << ampm_names().first[0];
- else
- os << ampm_names().first[1];
-#endif
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'R':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- if (fds.tod.hours() < hours{10})
- os << CharT{'0'};
- os << fds.tod.hours().count() << CharT{':'};
- if (fds.tod.minutes() < minutes{10})
- os << CharT{'0'};
- os << fds.tod.minutes().count();
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'S':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- if (insert_negative)
- {
- os << '-';
- insert_negative = false;
- }
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- os << fds.tod.s_;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 't':
- if (command)
- {
- if (modified == CharT{})
- os << CharT{'\t'};
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'T':
- if (command)
- {
- if (modified == CharT{})
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
- os << fds.tod;
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'u':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- auto wd = extract_weekday(os, fds);
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- os << (wd != 0 ? wd : 7u);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_wday = static_cast<int>(wd);
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'U':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- auto const& ymd = fds.ymd;
- if (!ymd.ok())
- os.setstate(std::ios::failbit);
- auto ld = local_days(ymd);
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- auto st = local_days(Sunday[1]/January/ymd.year());
- if (ld < st)
- os << CharT{'0'} << CharT{'0'};
- else
- {
- auto wn = duration_cast<weeks>(ld - st).count() + 1;
- if (wn < 10)
- os << CharT{'0'};
- os << wn;
- }
- }
- #if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_year = static_cast<int>(ymd.year()) - 1900;
- tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
- if (os.fail())
- return os;
- tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'V':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.ok())
- os.setstate(std::ios::failbit);
- auto ld = local_days(fds.ymd);
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- auto y = year_month_day{ld + days{3}}.year();
- auto st = local_days((y-years{1})/12/Thursday[last]) +
- (Monday-Thursday);
- if (ld < st)
- {
- --y;
- st = local_days((y - years{1})/12/Thursday[last]) +
- (Monday-Thursday);
- }
- auto wn = duration_cast<weeks>(ld - st).count() + 1;
- if (wn < 10)
- os << CharT{'0'};
- os << wn;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- auto const& ymd = fds.ymd;
- tm.tm_year = static_cast<int>(ymd.year()) - 1900;
- tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
- if (os.fail())
- return os;
- tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'w':
- if (command)
- {
- auto wd = extract_weekday(os, fds);
- if (os.fail())
- return os;
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- os << wd;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_wday = static_cast<int>(wd);
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- else
- {
- os << CharT{'%'} << modified << *fmt;
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'W':
- if (command)
- {
- if (modified == CharT{'E'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- auto const& ymd = fds.ymd;
- if (!ymd.ok())
- os.setstate(std::ios::failbit);
- auto ld = local_days(ymd);
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- auto st = local_days(Monday[1]/January/ymd.year());
- if (ld < st)
- os << CharT{'0'} << CharT{'0'};
- else
- {
- auto wn = duration_cast<weeks>(ld - st).count() + 1;
- if (wn < 10)
- os << CharT{'0'};
- os << wn;
- }
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_year = static_cast<int>(ymd.year()) - 1900;
- tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
- if (os.fail())
- return os;
- tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'X':
- if (command)
- {
- if (modified == CharT{'O'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.has_tod)
- os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
- tm = std::tm{};
- tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
- tm.tm_min = static_cast<int>(fds.tod.minutes().count());
- tm.tm_hour = static_cast<int>(fds.tod.hours().count());
- CharT f[3] = {'%'};
- auto fe = std::begin(f) + 1;
- if (modified == CharT{'E'})
- *fe++ = modified;
- *fe++ = *fmt;
- facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
-#else
- os << fds.tod;
-#endif
- }
- command = nullptr;
- modified = CharT{};
- }
- else
- os << *fmt;
- break;
- case 'y':
- if (command)
- {
- if (!fds.ymd.year().ok())
- os.setstate(std::ios::failbit);
- auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
- {
-#endif
- y = std::abs(y) % 100;
- if (y < 10)
- os << CharT{'0'};
- os << y;
-#if !ONLY_C_LOCALE
- }
- else
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_year = y - 1900;
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'Y':
- if (command)
- {
- if (modified == CharT{'O'})
- os << CharT{'%'} << modified << *fmt;
- else
- {
- if (!fds.ymd.year().ok())
- os.setstate(std::ios::failbit);
- auto y = fds.ymd.year();
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- save_ostream<CharT, Traits> _(os);
- os.imbue(std::locale::classic());
- os << y;
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'E'})
- {
- const CharT f[] = {'%', modified, *fmt};
- tm.tm_year = static_cast<int>(y) - 1900;
- facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
- }
-#endif
- }
- modified = CharT{};
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'z':
- if (command)
- {
- if (offset_sec == nullptr)
- {
- // Can not format %z with unknown offset
- os.setstate(ios::failbit);
- return os;
- }
- auto m = duration_cast<minutes>(*offset_sec);
- auto neg = m < minutes{0};
- m = date::abs(m);
- auto h = duration_cast<hours>(m);
- m -= h;
- if (neg)
- os << CharT{'-'};
- else
- os << CharT{'+'};
- if (h < hours{10})
- os << CharT{'0'};
- os << h.count();
- if (modified != CharT{})
- os << CharT{':'};
- if (m < minutes{10})
- os << CharT{'0'};
- os << m.count();
- command = nullptr;
- modified = CharT{};
- }
- else
- os << *fmt;
- break;
- case 'Z':
- if (command)
- {
- if (modified == CharT{})
- {
- if (abbrev == nullptr)
- {
- // Can not format %Z with unknown time_zone
- os.setstate(ios::failbit);
- return os;
- }
- for (auto c : *abbrev)
- os << CharT(c);
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- modified = CharT{};
- }
- command = nullptr;
- }
- else
- os << *fmt;
- break;
- case 'E':
- case 'O':
- if (command)
- {
- if (modified == CharT{})
- {
- modified = *fmt;
- }
- else
- {
- os << CharT{'%'} << modified << *fmt;
- command = nullptr;
- modified = CharT{};
- }
- }
- else
- os << *fmt;
- break;
- case '%':
- if (command)
- {
- if (modified == CharT{})
- {
- os << CharT{'%'};
- command = nullptr;
- }
- else
- {
- os << CharT{'%'} << modified << CharT{'%'};
- command = nullptr;
- modified = CharT{};
- }
- }
- else
- command = fmt;
- break;
- default:
- if (command)
- {
- os << CharT{'%'};
- command = nullptr;
- }
- if (modified != CharT{})
- {
- os << modified;
- modified = CharT{};
- }
- os << *fmt;
- break;
- }
- }
- if (command)
- os << CharT{'%'};
- if (modified != CharT{})
- os << modified;
- return os;
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year& y)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{y/0/0};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month& m)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{m/0/nanyear};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const day& d)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{d/0/nanyear};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const weekday& wd)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{wd};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year_month& ym)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{ym/0};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month_day& md)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{md/nanyear};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const year_month_day& ymd)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{ymd};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits, class Rep, class Period>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const std::chrono::duration<Rep, Period>& d)
-{
- using Duration = std::chrono::duration<Rep, Period>;
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- fields<CT> fds{hh_mm_ss<CT>{d}};
- return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const local_time<Duration>& tp, const std::string* abbrev = nullptr,
- const std::chrono::seconds* offset_sec = nullptr)
-{
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- auto ld = floor<days>(tp);
- fields<CT> fds{year_month_day{ld}, hh_mm_ss<CT>{tp-local_seconds{ld}}};
- return to_stream(os, fmt, fds, abbrev, offset_sec);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const sys_time<Duration>& tp)
-{
- using std::chrono::seconds;
- using CT = typename std::common_type<Duration, seconds>::type;
- const std::string abbrev("UTC");
- CONSTDATA seconds offset{0};
- auto sd = floor<days>(tp);
- fields<CT> fds{year_month_day{sd}, hh_mm_ss<CT>{tp-sys_seconds{sd}}};
- return to_stream(os, fmt, fds, &abbrev, &offset);
-}
-
-// format
-
-template <class CharT, class Streamable>
-auto
-format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
- -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
- std::basic_string<CharT>{})
-{
- std::basic_ostringstream<CharT> os;
- os.exceptions(std::ios::failbit | std::ios::badbit);
- os.imbue(loc);
- to_stream(os, fmt, tp);
- return os.str();
-}
-
-template <class CharT, class Streamable>
-auto
-format(const CharT* fmt, const Streamable& tp)
- -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
- std::basic_string<CharT>{})
-{
- std::basic_ostringstream<CharT> os;
- os.exceptions(std::ios::failbit | std::ios::badbit);
- to_stream(os, fmt, tp);
- return os.str();
-}
-
-template <class CharT, class Traits, class Alloc, class Streamable>
-auto
-format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
- const Streamable& tp)
- -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
- std::basic_string<CharT, Traits, Alloc>{})
-{
- std::basic_ostringstream<CharT, Traits, Alloc> os;
- os.exceptions(std::ios::failbit | std::ios::badbit);
- os.imbue(loc);
- to_stream(os, fmt.c_str(), tp);
- return os.str();
-}
-
-template <class CharT, class Traits, class Alloc, class Streamable>
-auto
-format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
- -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
- std::basic_string<CharT, Traits, Alloc>{})
-{
- std::basic_ostringstream<CharT, Traits, Alloc> os;
- os.exceptions(std::ios::failbit | std::ios::badbit);
- to_stream(os, fmt.c_str(), tp);
- return os.str();
-}
-
-// parse
-
-namespace detail
-{
-
-template <class CharT, class Traits>
-bool
-read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err)
-{
- auto ic = is.get();
- if (Traits::eq_int_type(ic, Traits::eof()) ||
- !Traits::eq(Traits::to_char_type(ic), fmt))
- {
- err |= std::ios::failbit;
- is.setstate(std::ios::failbit);
- return false;
- }
- return true;
-}
-
-template <class CharT, class Traits>
-unsigned
-read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
- unsigned x = 0;
- unsigned count = 0;
- while (true)
- {
- auto ic = is.peek();
- if (Traits::eq_int_type(ic, Traits::eof()))
- break;
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if (!('0' <= c && c <= '9'))
- break;
- (void)is.get();
- ++count;
- x = 10*x + static_cast<unsigned>(c - '0');
- if (count == M)
- break;
- }
- if (count < m)
- is.setstate(std::ios::failbit);
- return x;
-}
-
-template <class CharT, class Traits>
-int
-read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
- auto ic = is.peek();
- if (!Traits::eq_int_type(ic, Traits::eof()))
- {
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if (('0' <= c && c <= '9') || c == '-' || c == '+')
- {
- if (c == '-' || c == '+')
- (void)is.get();
- auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
- if (!is.fail())
- {
- if (c == '-')
- x = -x;
- return x;
- }
- }
- }
- if (m > 0)
- is.setstate(std::ios::failbit);
- return 0;
-}
-
-template <class CharT, class Traits>
-long double
-read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
- unsigned count = 0;
- auto decimal_point = Traits::to_int_type(
- std::use_facet<std::numpunct<CharT>>(is.getloc()).decimal_point());
- std::string buf;
- while (true)
- {
- auto ic = is.peek();
- if (Traits::eq_int_type(ic, Traits::eof()))
- break;
- if (Traits::eq_int_type(ic, decimal_point))
- {
- buf += '.';
- decimal_point = Traits::eof();
- is.get();
- }
- else
- {
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if (!('0' <= c && c <= '9'))
- break;
- buf += c;
- (void)is.get();
- }
- if (++count == M)
- break;
- }
- if (count < m)
- {
- is.setstate(std::ios::failbit);
- return 0;
- }
- return std::stold(buf);
-}
-
-struct rs
-{
- int& i;
- unsigned m;
- unsigned M;
-};
-
-struct ru
-{
- int& i;
- unsigned m;
- unsigned M;
-};
-
-struct rld
-{
- long double& i;
- unsigned m;
- unsigned M;
-};
-
-template <class CharT, class Traits>
-void
-read(std::basic_istream<CharT, Traits>&)
-{
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args)
-{
- // No-op if a0 == CharT{}
- if (a0 != CharT{})
- {
- auto ic = is.peek();
- if (Traits::eq_int_type(ic, Traits::eof()))
- {
- is.setstate(std::ios::failbit | std::ios::eofbit);
- return;
- }
- if (!Traits::eq(Traits::to_char_type(ic), a0))
- {
- is.setstate(std::ios::failbit);
- return;
- }
- (void)is.get();
- }
- read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args)
-{
- auto x = read_signed(is, a0.m, a0.M);
- if (is.fail())
- return;
- a0.i = x;
- read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args)
-{
- auto x = read_unsigned(is, a0.m, a0.M);
- if (is.fail())
- return;
- a0.i = static_cast<int>(x);
- read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args)
-{
- if (a0 != -1)
- {
- auto u = static_cast<unsigned>(a0);
- CharT buf[std::numeric_limits<unsigned>::digits10+2u] = {};
- auto e = buf;
- do
- {
- *e++ = static_cast<CharT>(CharT(u % 10) + CharT{'0'});
- u /= 10;
- } while (u > 0);
- std::reverse(buf, e);
- for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p)
- read(is, *p);
- }
- if (is.rdstate() == std::ios::goodbit)
- read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args)
-{
- auto x = read_long_double(is, a0.m, a0.M);
- if (is.fail())
- return;
- a0.i = x;
- read(is, std::forward<Args>(args)...);
-}
-
-template <class T, class CharT, class Traits>
-inline
-void
-checked_set(T& value, T from, T not_a_value, std::basic_ios<CharT, Traits>& is)
-{
- if (!is.fail())
- {
- if (value == not_a_value)
- value = std::move(from);
- else if (value != from)
- is.setstate(std::ios::failbit);
- }
-}
-
-} // namespace detail;
-
-template <class CharT, class Traits, class Duration, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev,
- std::chrono::minutes* offset)
-{
- using std::numeric_limits;
- using std::ios;
- using std::chrono::duration;
- using std::chrono::duration_cast;
- using std::chrono::seconds;
- using std::chrono::minutes;
- using std::chrono::hours;
- typename std::basic_istream<CharT, Traits>::sentry ok{is, true};
- if (ok)
- {
- date::detail::save_istream<CharT, Traits> ss(is);
- is.fill(' ');
- is.flags(std::ios::skipws | std::ios::dec);
- is.width(0);
-#if !ONLY_C_LOCALE
- auto& f = std::use_facet<std::time_get<CharT>>(is.getloc());
- std::tm tm{};
-#endif
- const CharT* command = nullptr;
- auto modified = CharT{};
- auto width = -1;
-
- CONSTDATA int not_a_year = numeric_limits<int>::min();
- CONSTDATA int not_a_2digit_year = 100;
- CONSTDATA int not_a_century = not_a_year / 100;
- CONSTDATA int not_a_month = 0;
- CONSTDATA int not_a_day = 0;
- CONSTDATA int not_a_hour = numeric_limits<int>::min();
- CONSTDATA int not_a_hour_12_value = 0;
- CONSTDATA int not_a_minute = not_a_hour;
- CONSTDATA Duration not_a_second = Duration::min();
- CONSTDATA int not_a_doy = -1;
- CONSTDATA int not_a_weekday = 8;
- CONSTDATA int not_a_week_num = 100;
- CONSTDATA int not_a_ampm = -1;
- CONSTDATA minutes not_a_offset = minutes::min();
-
- int Y = not_a_year; // c, F, Y *
- int y = not_a_2digit_year; // D, x, y *
- int g = not_a_2digit_year; // g *
- int G = not_a_year; // G *
- int C = not_a_century; // C *
- int m = not_a_month; // b, B, h, m, c, D, F, x *
- int d = not_a_day; // c, d, D, e, F, x *
- int j = not_a_doy; // j *
- int wd = not_a_weekday; // a, A, u, w *
- int H = not_a_hour; // c, H, R, T, X *
- int I = not_a_hour_12_value; // I, r *
- int p = not_a_ampm; // p, r *
- int M = not_a_minute; // c, M, r, R, T, X *
- Duration s = not_a_second; // c, r, S, T, X *
- int U = not_a_week_num; // U *
- int V = not_a_week_num; // V *
- int W = not_a_week_num; // W *
- std::basic_string<CharT, Traits, Alloc> temp_abbrev; // Z *
- minutes temp_offset = not_a_offset; // z *
-
- using detail::read;
- using detail::rs;
- using detail::ru;
- using detail::rld;
- using detail::checked_set;
- for (; *fmt != CharT{} && !is.fail(); ++fmt)
- {
- switch (*fmt)
- {
- case 'a':
- case 'A':
- case 'u':
- case 'w': // wd: a, A, u, w
- if (command)
- {
- int trial_wd = not_a_weekday;
- if (*fmt == 'a' || *fmt == 'A')
- {
- if (modified == CharT{})
- {
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- is.setstate(err);
- if (!is.fail())
- trial_wd = tm.tm_wday;
-#else
- auto nm = detail::weekday_names();
- auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- if (!is.fail())
- trial_wd = i % 7;
-#endif
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- }
- else // *fmt == 'u' || *fmt == 'w'
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- read(is, ru{trial_wd, 1, width == -1 ?
- 1u : static_cast<unsigned>(width)});
- if (!is.fail())
- {
- if (*fmt == 'u')
- {
- if (!(1 <= trial_wd && trial_wd <= 7))
- {
- trial_wd = not_a_weekday;
- is.setstate(ios::failbit);
- }
- else if (trial_wd == 7)
- trial_wd = 0;
- }
- else // *fmt == 'w'
- {
- if (!(0 <= trial_wd && trial_wd <= 6))
- {
- trial_wd = not_a_weekday;
- is.setstate(ios::failbit);
- }
- }
- }
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- is.setstate(err);
- if (!is.fail())
- trial_wd = tm.tm_wday;
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- }
- if (trial_wd != not_a_weekday)
- checked_set(wd, trial_wd, not_a_weekday, is);
- }
- else // !command
- read(is, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- break;
- case 'b':
- case 'B':
- case 'h':
- if (command)
- {
- if (modified == CharT{})
- {
- int ttm = not_a_month;
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- ttm = tm.tm_mon + 1;
- is.setstate(err);
-#else
- auto nm = detail::month_names();
- auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- if (!is.fail())
- ttm = i % 12 + 1;
-#endif
- checked_set(m, ttm, not_a_month, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'c':
- if (command)
- {
- if (modified != CharT{'O'})
- {
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- {
- checked_set(Y, tm.tm_year + 1900, not_a_year, is);
- checked_set(m, tm.tm_mon + 1, not_a_month, is);
- checked_set(d, tm.tm_mday, not_a_day, is);
- checked_set(H, tm.tm_hour, not_a_hour, is);
- checked_set(M, tm.tm_min, not_a_minute, is);
- checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
- not_a_second, is);
- }
- is.setstate(err);
-#else
- // "%a %b %e %T %Y"
- auto nm = detail::weekday_names();
- auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- checked_set(wd, static_cast<int>(i % 7), not_a_weekday, is);
- ws(is);
- nm = detail::month_names();
- i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- checked_set(m, static_cast<int>(i % 12 + 1), not_a_month, is);
- ws(is);
- int td = not_a_day;
- read(is, rs{td, 1, 2});
- checked_set(d, td, not_a_day, is);
- ws(is);
- using dfs = detail::decimal_format_seconds<Duration>;
- CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
- int tH;
- int tM;
- long double S;
- read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
- CharT{':'}, rld{S, 1, w});
- checked_set(H, tH, not_a_hour, is);
- checked_set(M, tM, not_a_minute, is);
- checked_set(s, round<Duration>(duration<long double>{S}),
- not_a_second, is);
- ws(is);
- int tY = not_a_year;
- read(is, rs{tY, 1, 4u});
- checked_set(Y, tY, not_a_year, is);
-#endif
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'x':
- if (command)
- {
- if (modified != CharT{'O'})
- {
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- {
- checked_set(Y, tm.tm_year + 1900, not_a_year, is);
- checked_set(m, tm.tm_mon + 1, not_a_month, is);
- checked_set(d, tm.tm_mday, not_a_day, is);
- }
- is.setstate(err);
-#else
- // "%m/%d/%y"
- int ty = not_a_2digit_year;
- int tm = not_a_month;
- int td = not_a_day;
- read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'},
- rs{ty, 1, 2});
- checked_set(y, ty, not_a_2digit_year, is);
- checked_set(m, tm, not_a_month, is);
- checked_set(d, td, not_a_day, is);
-#endif
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'X':
- if (command)
- {
- if (modified != CharT{'O'})
- {
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- {
- checked_set(H, tm.tm_hour, not_a_hour, is);
- checked_set(M, tm.tm_min, not_a_minute, is);
- checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
- not_a_second, is);
- }
- is.setstate(err);
-#else
- // "%T"
- using dfs = detail::decimal_format_seconds<Duration>;
- CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
- int tH = not_a_hour;
- int tM = not_a_minute;
- long double S;
- read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
- CharT{':'}, rld{S, 1, w});
- checked_set(H, tH, not_a_hour, is);
- checked_set(M, tM, not_a_minute, is);
- checked_set(s, round<Duration>(duration<long double>{S}),
- not_a_second, is);
-#endif
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'C':
- if (command)
- {
- int tC = not_a_century;
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
- {
-#endif
- read(is, rs{tC, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-#if !ONLY_C_LOCALE
- }
- else
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- {
- auto tY = tm.tm_year + 1900;
- tC = (tY >= 0 ? tY : tY-99) / 100;
- }
- is.setstate(err);
- }
-#endif
- checked_set(C, tC, not_a_century, is);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'D':
- if (command)
- {
- if (modified == CharT{})
- {
- int tn = not_a_month;
- int td = not_a_day;
- int ty = not_a_2digit_year;
- read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
- ru{td, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
- rs{ty, 1, 2});
- checked_set(y, ty, not_a_2digit_year, is);
- checked_set(m, tn, not_a_month, is);
- checked_set(d, td, not_a_day, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'F':
- if (command)
- {
- if (modified == CharT{})
- {
- int tY = not_a_year;
- int tn = not_a_month;
- int td = not_a_day;
- read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
- CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2});
- checked_set(Y, tY, not_a_year, is);
- checked_set(m, tn, not_a_month, is);
- checked_set(d, td, not_a_day, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'd':
- case 'e':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- int td = not_a_day;
- read(is, rs{td, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(d, td, not_a_day, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- command = nullptr;
- width = -1;
- modified = CharT{};
- if ((err & ios::failbit) == 0)
- checked_set(d, tm.tm_mday, not_a_day, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'H':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- int tH = not_a_hour;
- read(is, ru{tH, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(H, tH, not_a_hour, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(H, tm.tm_hour, not_a_hour, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'I':
- if (command)
- {
- if (modified == CharT{})
- {
- int tI = not_a_hour_12_value;
- // reads in an hour into I, but most be in [1, 12]
- read(is, rs{tI, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- if (!(1 <= tI && tI <= 12))
- is.setstate(ios::failbit);
- checked_set(I, tI, not_a_hour_12_value, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'j':
- if (command)
- {
- if (modified == CharT{})
- {
- int tj = not_a_doy;
- read(is, ru{tj, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
- checked_set(j, tj, not_a_doy, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'M':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- int tM = not_a_minute;
- read(is, ru{tM, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(M, tM, not_a_minute, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(M, tm.tm_min, not_a_minute, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'm':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- int tn = not_a_month;
- read(is, rs{tn, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(m, tn, not_a_month, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(m, tm.tm_mon + 1, not_a_month, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'n':
- case 't':
- if (command)
- {
- if (modified == CharT{})
- {
- // %n matches a single white space character
- // %t matches 0 or 1 white space characters
- auto ic = is.peek();
- if (Traits::eq_int_type(ic, Traits::eof()))
- {
- ios::iostate err = ios::eofbit;
- if (*fmt == 'n')
- err |= ios::failbit;
- is.setstate(err);
- break;
- }
- if (isspace(ic))
- {
- (void)is.get();
- }
- else if (*fmt == 'n')
- is.setstate(ios::failbit);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'p':
- if (command)
- {
- if (modified == CharT{})
- {
- int tp = not_a_ampm;
-#if !ONLY_C_LOCALE
- tm = std::tm{};
- tm.tm_hour = 1;
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- is.setstate(err);
- if (tm.tm_hour == 1)
- tp = 0;
- else if (tm.tm_hour == 13)
- tp = 1;
- else
- is.setstate(err);
-#else
- auto nm = detail::ampm_names();
- auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- tp = i;
-#endif
- checked_set(p, tp, not_a_ampm, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
-
- break;
- case 'r':
- if (command)
- {
- if (modified == CharT{})
- {
-#if !ONLY_C_LOCALE
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- {
- checked_set(H, tm.tm_hour, not_a_hour, is);
- checked_set(M, tm.tm_min, not_a_hour, is);
- checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
- not_a_second, is);
- }
- is.setstate(err);
-#else
- // "%I:%M:%S %p"
- using dfs = detail::decimal_format_seconds<Duration>;
- CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
- long double S;
- int tI = not_a_hour_12_value;
- int tM = not_a_minute;
- read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2},
- CharT{':'}, rld{S, 1, w});
- checked_set(I, tI, not_a_hour_12_value, is);
- checked_set(M, tM, not_a_minute, is);
- checked_set(s, round<Duration>(duration<long double>{S}),
- not_a_second, is);
- ws(is);
- auto nm = detail::ampm_names();
- auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
- checked_set(p, static_cast<int>(i), not_a_ampm, is);
-#endif
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'R':
- if (command)
- {
- if (modified == CharT{})
- {
- int tH = not_a_hour;
- int tM = not_a_minute;
- read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'},
- ru{tM, 1, 2}, CharT{'\0'});
- checked_set(H, tH, not_a_hour, is);
- checked_set(M, tM, not_a_minute, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'S':
- if (command)
- {
- #if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'E'})
-#endif
- {
- using dfs = detail::decimal_format_seconds<Duration>;
- CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
- long double S;
- read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
- checked_set(s, round<Duration>(duration<long double>{S}),
- not_a_second, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'O'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
- not_a_second, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'T':
- if (command)
- {
- if (modified == CharT{})
- {
- using dfs = detail::decimal_format_seconds<Duration>;
- CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
- int tH = not_a_hour;
- int tM = not_a_minute;
- long double S;
- read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
- CharT{':'}, rld{S, 1, w});
- checked_set(H, tH, not_a_hour, is);
- checked_set(M, tM, not_a_minute, is);
- checked_set(s, round<Duration>(duration<long double>{S}),
- not_a_second, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'Y':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#else
- if (modified != CharT{'O'})
-#endif
- {
- int tY = not_a_year;
- read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
- checked_set(Y, tY, not_a_year, is);
- }
-#if !ONLY_C_LOCALE
- else if (modified == CharT{'E'})
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(Y, tm.tm_year + 1900, not_a_year, is);
- is.setstate(err);
- }
-#endif
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'y':
- if (command)
- {
-#if !ONLY_C_LOCALE
- if (modified == CharT{})
-#endif
- {
- int ty = not_a_2digit_year;
- read(is, ru{ty, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(y, ty, not_a_2digit_year, is);
- }
-#if !ONLY_C_LOCALE
- else
- {
- ios::iostate err = ios::goodbit;
- f.get(is, nullptr, is, err, &tm, command, fmt+1);
- if ((err & ios::failbit) == 0)
- checked_set(Y, tm.tm_year + 1900, not_a_year, is);
- is.setstate(err);
- }
-#endif
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'g':
- if (command)
- {
- if (modified == CharT{})
- {
- int tg = not_a_2digit_year;
- read(is, ru{tg, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(g, tg, not_a_2digit_year, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'G':
- if (command)
- {
- if (modified == CharT{})
- {
- int tG = not_a_year;
- read(is, rs{tG, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
- checked_set(G, tG, not_a_year, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'U':
- if (command)
- {
- if (modified == CharT{})
- {
- int tU = not_a_week_num;
- read(is, ru{tU, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(U, tU, not_a_week_num, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'V':
- if (command)
- {
- if (modified == CharT{})
- {
- int tV = not_a_week_num;
- read(is, ru{tV, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(V, tV, not_a_week_num, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'W':
- if (command)
- {
- if (modified == CharT{})
- {
- int tW = not_a_week_num;
- read(is, ru{tW, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
- checked_set(W, tW, not_a_week_num, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'E':
- case 'O':
- if (command)
- {
- if (modified == CharT{})
- {
- modified = *fmt;
- }
- else
- {
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- }
- else
- read(is, *fmt);
- break;
- case '%':
- if (command)
- {
- if (modified == CharT{})
- read(is, *fmt);
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- command = fmt;
- break;
- case 'z':
- if (command)
- {
- int tH, tM;
- minutes toff = not_a_offset;
- bool neg = false;
- auto ic = is.peek();
- if (!Traits::eq_int_type(ic, Traits::eof()))
- {
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if (c == '-')
- neg = true;
- }
- if (modified == CharT{})
- {
- read(is, rs{tH, 2, 2});
- if (!is.fail())
- toff = hours{std::abs(tH)};
- if (is.good())
- {
- ic = is.peek();
- if (!Traits::eq_int_type(ic, Traits::eof()))
- {
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if ('0' <= c && c <= '9')
- {
- read(is, ru{tM, 2, 2});
- if (!is.fail())
- toff += minutes{tM};
- }
- }
- }
- }
- else
- {
- read(is, rs{tH, 1, 2});
- if (!is.fail())
- toff = hours{std::abs(tH)};
- if (is.good())
- {
- ic = is.peek();
- if (!Traits::eq_int_type(ic, Traits::eof()))
- {
- auto c = static_cast<char>(Traits::to_char_type(ic));
- if (c == ':')
- {
- (void)is.get();
- read(is, ru{tM, 2, 2});
- if (!is.fail())
- toff += minutes{tM};
- }
- }
- }
- }
- if (neg)
- toff = -toff;
- checked_set(temp_offset, toff, not_a_offset, is);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- case 'Z':
- if (command)
- {
- if (modified == CharT{})
- {
- std::basic_string<CharT, Traits, Alloc> buf;
- while (is.rdstate() == std::ios::goodbit)
- {
- auto i = is.rdbuf()->sgetc();
- if (Traits::eq_int_type(i, Traits::eof()))
- {
- is.setstate(ios::eofbit);
- break;
- }
- auto wc = Traits::to_char_type(i);
- auto c = static_cast<char>(wc);
- // is c a valid time zone name or abbreviation character?
- if (!(CharT{1} < wc && wc < CharT{127}) || !(isalnum(c) ||
- c == '_' || c == '/' || c == '-' || c == '+'))
- break;
- buf.push_back(c);
- is.rdbuf()->sbumpc();
- }
- if (buf.empty())
- is.setstate(ios::failbit);
- checked_set(temp_abbrev, buf, {}, is);
- }
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- else
- read(is, *fmt);
- break;
- default:
- if (command)
- {
- if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9')
- {
- width = static_cast<char>(*fmt) - '0';
- while ('0' <= fmt[1] && fmt[1] <= '9')
- width = 10*width + static_cast<char>(*++fmt) - '0';
- }
- else
- {
- if (modified == CharT{})
- read(is, CharT{'%'}, width, *fmt);
- else
- read(is, CharT{'%'}, width, modified, *fmt);
- command = nullptr;
- width = -1;
- modified = CharT{};
- }
- }
- else // !command
- {
- if (isspace(static_cast<unsigned char>(*fmt)))
- {
- // space matches 0 or more white space characters
- if (is.good())
- ws(is);
- }
- else
- read(is, *fmt);
- }
- break;
- }
- }
- // is.fail() || *fmt == CharT{}
- if (is.rdstate() == ios::goodbit && command)
- {
- if (modified == CharT{})
- read(is, CharT{'%'}, width);
- else
- read(is, CharT{'%'}, width, modified);
- }
- if (!is.fail())
- {
- if (y != not_a_2digit_year)
- {
- // Convert y and an optional C to Y
- if (!(0 <= y && y <= 99))
- goto broken;
- if (C == not_a_century)
- {
- if (Y == not_a_year)
- {
- if (y >= 69)
- C = 19;
- else
- C = 20;
- }
- else
- {
- C = (Y >= 0 ? Y : Y-100) / 100;
- }
- }
- int tY;
- if (C >= 0)
- tY = 100*C + y;
- else
- tY = 100*(C+1) - (y == 0 ? 100 : y);
- if (Y != not_a_year && Y != tY)
- goto broken;
- Y = tY;
- }
- if (g != not_a_2digit_year)
- {
- // Convert g and an optional C to G
- if (!(0 <= g && g <= 99))
- goto broken;
- if (C == not_a_century)
- {
- if (G == not_a_year)
- {
- if (g >= 69)
- C = 19;
- else
- C = 20;
- }
- else
- {
- C = (G >= 0 ? G : G-100) / 100;
- }
- }
- int tG;
- if (C >= 0)
- tG = 100*C + g;
- else
- tG = 100*(C+1) - (g == 0 ? 100 : g);
- if (G != not_a_year && G != tG)
- goto broken;
- G = tG;
- }
- if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
- Y = not_a_year;
- bool computed = false;
- if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday)
- {
- year_month_day ymd_trial = sys_days(year{G-1}/December/Thursday[last]) +
- (Monday-Thursday) + weeks{V-1} +
- (weekday{static_cast<unsigned>(wd)}-Monday);
- if (Y == not_a_year)
- Y = static_cast<int>(ymd_trial.year());
- else if (year{Y} != ymd_trial.year())
- goto broken;
- if (m == not_a_month)
- m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
- else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
- goto broken;
- if (d == not_a_day)
- d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
- else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
- goto broken;
- computed = true;
- }
- if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday)
- {
- year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) +
- weeks{U-1} +
- (weekday{static_cast<unsigned>(wd)} - Sunday);
- if (Y == not_a_year)
- Y = static_cast<int>(ymd_trial.year());
- else if (year{Y} != ymd_trial.year())
- goto broken;
- if (m == not_a_month)
- m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
- else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
- goto broken;
- if (d == not_a_day)
- d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
- else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
- goto broken;
- computed = true;
- }
- if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday)
- {
- year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) +
- weeks{W-1} +
- (weekday{static_cast<unsigned>(wd)} - Monday);
- if (Y == not_a_year)
- Y = static_cast<int>(ymd_trial.year());
- else if (year{Y} != ymd_trial.year())
- goto broken;
- if (m == not_a_month)
- m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
- else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
- goto broken;
- if (d == not_a_day)
- d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
- else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
- goto broken;
- computed = true;
- }
- if (j != not_a_doy && Y != not_a_year)
- {
- auto ymd_trial = year_month_day{local_days(year{Y}/1/1) + days{j-1}};
- if (m == 0)
- m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
- else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
- goto broken;
- if (d == 0)
- d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
- else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
- goto broken;
- j = not_a_doy;
- }
- auto ymd = year{Y}/m/d;
- if (ymd.ok())
- {
- if (wd == not_a_weekday)
- wd = static_cast<int>((weekday(sys_days(ymd)) - Sunday).count());
- else if (wd != static_cast<int>((weekday(sys_days(ymd)) - Sunday).count()))
- goto broken;
- if (!computed)
- {
- if (G != not_a_year || V != not_a_week_num)
- {
- sys_days sd = ymd;
- auto G_trial = year_month_day{sd + days{3}}.year();
- auto start = sys_days((G_trial - years{1})/December/Thursday[last]) +
- (Monday - Thursday);
- if (sd < start)
- {
- --G_trial;
- if (V != not_a_week_num)
- start = sys_days((G_trial - years{1})/December/Thursday[last])
- + (Monday - Thursday);
- }
- if (G != not_a_year && G != static_cast<int>(G_trial))
- goto broken;
- if (V != not_a_week_num)
- {
- auto V_trial = duration_cast<weeks>(sd - start).count() + 1;
- if (V != V_trial)
- goto broken;
- }
- }
- if (U != not_a_week_num)
- {
- auto start = sys_days(Sunday[1]/January/ymd.year());
- auto U_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
- if (U != U_trial)
- goto broken;
- }
- if (W != not_a_week_num)
- {
- auto start = sys_days(Monday[1]/January/ymd.year());
- auto W_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
- if (W != W_trial)
- goto broken;
- }
- }
- }
- fds.ymd = ymd;
- if (I != not_a_hour_12_value)
- {
- if (!(1 <= I && I <= 12))
- goto broken;
- if (p != not_a_ampm)
- {
- // p is in [0, 1] == [AM, PM]
- // Store trial H in I
- if (I == 12)
- --p;
- I += p*12;
- // Either set H from I or make sure H and I are consistent
- if (H == not_a_hour)
- H = I;
- else if (I != H)
- goto broken;
- }
- else // p == not_a_ampm
- {
- // if H, make sure H and I could be consistent
- if (H != not_a_hour)
- {
- if (I == 12)
- {
- if (H != 0 && H != 12)
- goto broken;
- }
- else if (!(I == H || I == H+12))
- {
- goto broken;
- }
- }
- }
- }
- if (H != not_a_hour)
- {
- fds.has_tod = true;
- fds.tod = hh_mm_ss<Duration>{hours{H}};
- }
- if (M != not_a_minute)
- {
- fds.has_tod = true;
- fds.tod.m_ = minutes{M};
- }
- if (s != not_a_second)
- {
- fds.has_tod = true;
- fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
- }
- if (j != not_a_doy)
- {
- fds.has_tod = true;
- fds.tod.h_ += hours{days{j}};
- }
- if (wd != not_a_weekday)
- fds.wd = weekday{static_cast<unsigned>(wd)};
- if (abbrev != nullptr)
- *abbrev = std::move(temp_abbrev);
- if (offset != nullptr && temp_offset != not_a_offset)
- *offset = temp_offset;
- }
- return is;
- }
-broken:
- is.setstate(ios::failbit);
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.year().ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- y = fds.ymd.year();
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.month().ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- m = fds.ymd.month();
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.day().ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- d = fds.ymd.day();
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.wd.ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- wd = fds.wd;
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.month().ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- ym = fds.ymd.year()/fds.ymd.month();
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.month().ok() || !fds.ymd.day().ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- md = fds.ymd.month()/fds.ymd.day();
- return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- year_month_day& ymd, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = std::chrono::seconds;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- ymd = fds.ymd;
- return is;
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- sys_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- std::chrono::minutes offset_local{};
- auto offptr = offset ? offset : &offset_local;
- fields<CT> fds{};
- fds.has_tod = true;
- from_stream(is, fmt, fds, abbrev, offptr);
- if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
- return is;
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- local_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- fields<CT> fds{};
- fds.has_tod = true;
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
- return is;
-}
-
-template <class Rep, class Period, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- std::chrono::duration<Rep, Period>& d,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using Duration = std::chrono::duration<Rep, Period>;
- using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
- fields<CT> fds{};
- from_stream(is, fmt, fds, abbrev, offset);
- if (!fds.has_tod)
- is.setstate(std::ios::failbit);
- if (!is.fail())
- d = std::chrono::duration_cast<Duration>(fds.tod.to_duration());
- return is;
-}
-
-template <class Parsable, class CharT, class Traits = std::char_traits<CharT>,
- class Alloc = std::allocator<CharT>>
-struct parse_manip
-{
- const std::basic_string<CharT, Traits, Alloc> format_;
- Parsable& tp_;
- std::basic_string<CharT, Traits, Alloc>* abbrev_;
- std::chrono::minutes* offset_;
-
-public:
- parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
- : format_(std::move(format))
- , tp_(tp)
- , abbrev_(abbrev)
- , offset_(offset)
- {}
-
-};
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-std::basic_istream<CharT, Traits>&
-operator>>(std::basic_istream<CharT, Traits>& is,
- const parse_manip<Parsable, CharT, Traits, Alloc>& x)
-{
- return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
- format.c_str(), tp),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp})
-{
- return {format, tp};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
- std::basic_string<CharT, Traits, Alloc>& abbrev)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
- format.c_str(), tp, &abbrev),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
-{
- return {format, tp, &abbrev};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
- std::chrono::minutes& offset)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
- format.c_str(), tp,
- std::declval<std::basic_string<CharT, Traits, Alloc>*>(),
- &offset),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr, &offset})
-{
- return {format, tp, nullptr, &offset};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
- std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
- format.c_str(), tp, &abbrev, &offset),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
-{
- return {format, tp, &abbrev, &offset};
-}
-
-// const CharT* formats
-
-template <class Parsable, class CharT>
-inline
-auto
-parse(const CharT* format, Parsable& tp)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp),
- parse_manip<Parsable, CharT>{format, tp})
-{
- return {format, tp};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const CharT* format, Parsable& tp, std::basic_string<CharT, Traits, Alloc>& abbrev)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
- tp, &abbrev),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
-{
- return {format, tp, &abbrev};
-}
-
-template <class Parsable, class CharT>
-inline
-auto
-parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format,
- tp, std::declval<std::basic_string<CharT>*>(), &offset),
- parse_manip<Parsable, CharT>{format, tp, nullptr, &offset})
-{
- return {format, tp, nullptr, &offset};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const CharT* format, Parsable& tp,
- std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
- -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
- tp, &abbrev, &offset),
- parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
-{
- return {format, tp, &abbrev, &offset};
-}
-
-// duration streaming
-
-template <class CharT, class Traits, class Rep, class Period>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os,
- const std::chrono::duration<Rep, Period>& d)
-{
- return os << detail::make_string<CharT, Traits>::from(d.count()) +
- detail::get_units<CharT>(typename Period::type{});
-}
-
-} // namespace date
-} // namespace arrow_vendored
-
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
-
-#ifdef __GNUC__
-# pragma GCC diagnostic pop
-#endif
-
-#endif // DATE_H
+#ifndef DATE_H
+#define DATE_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2016 Adrian Colomitchi
+// Copyright (c) 2017 Florian Dang
+// Copyright (c) 2017 Paul Thompson
+// Copyright (c) 2018, 2019 Tomasz Kamiński
+// Copyright (c) 2019 Jiangang Zhuang
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies. When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+#ifndef HAS_STRING_VIEW
+# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+# define HAS_STRING_VIEW 1
+# else
+# define HAS_STRING_VIEW 0
+# endif
+#endif // HAS_STRING_VIEW
+
+#include <cassert>
+#include <algorithm>
+#include <cctype>
+#include <chrono>
+#include <climits>
+#if !(__cplusplus >= 201402)
+# include <cmath>
+#endif
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <ctime>
+#include <ios>
+#include <istream>
+#include <iterator>
+#include <limits>
+#include <locale>
+#include <memory>
+#include <ostream>
+#include <ratio>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#if HAS_STRING_VIEW
+# include <string_view>
+#endif
+#include <utility>
+#include <type_traits>
+
+#ifdef __GNUC__
+# pragma GCC diagnostic push
+# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 7)
+# pragma GCC diagnostic ignored "-Wpedantic"
+# endif
+# if __GNUC__ < 5
+ // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
+# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+# endif
+#endif
+
+#ifdef _MSC_VER
+# pragma warning(push)
+// warning C4127: conditional expression is constant
+# pragma warning(disable : 4127)
+#endif
+
+namespace arrow_vendored
+{
+namespace date
+{
+
+//---------------+
+// Configuration |
+//---------------+
+
+#ifndef ONLY_C_LOCALE
+# define ONLY_C_LOCALE 0
+#endif
+
+#if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910))
+// MSVC
+# ifndef _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
+# define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
+# endif
+# if _MSC_VER < 1910
+// before VS2017
+# define CONSTDATA const
+# define CONSTCD11
+# define CONSTCD14
+# define NOEXCEPT _NOEXCEPT
+# else
+// VS2017 and later
+# define CONSTDATA constexpr const
+# define CONSTCD11 constexpr
+# define CONSTCD14 constexpr
+# define NOEXCEPT noexcept
+# endif
+
+#elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150
+// Oracle Developer Studio 12.6 and earlier
+# define CONSTDATA constexpr const
+# define CONSTCD11 constexpr
+# define CONSTCD14
+# define NOEXCEPT noexcept
+
+#elif __cplusplus >= 201402
+// C++14
+# define CONSTDATA constexpr const
+# define CONSTCD11 constexpr
+# define CONSTCD14 constexpr
+# define NOEXCEPT noexcept
+#else
+// C++11
+# define CONSTDATA constexpr const
+# define CONSTCD11 constexpr
+# define CONSTCD14
+# define NOEXCEPT noexcept
+#endif
+
+#ifndef HAS_UNCAUGHT_EXCEPTIONS
+# if __cplusplus > 201703 || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L)
+# define HAS_UNCAUGHT_EXCEPTIONS 1
+# else
+# define HAS_UNCAUGHT_EXCEPTIONS 0
+# endif
+#endif // HAS_UNCAUGHT_EXCEPTIONS
+
+#ifndef HAS_VOID_T
+# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+# define HAS_VOID_T 1
+# else
+# define HAS_VOID_T 0
+# endif
+#endif // HAS_VOID_T
+
+// Protect from Oracle sun macro
+#ifdef sun
+# undef sun
+#endif
+
+// Work around for a NVCC compiler bug which causes it to fail
+// to compile std::ratio_{multiply,divide} when used directly
+// in the std::chrono::duration template instantiations below
+namespace detail {
+template <typename R1, typename R2>
+using ratio_multiply = decltype(std::ratio_multiply<R1, R2>{});
+
+template <typename R1, typename R2>
+using ratio_divide = decltype(std::ratio_divide<R1, R2>{});
+} // namespace detail
+
+//-----------+
+// Interface |
+//-----------+
+
+// durations
+
+using days = std::chrono::duration
+ <int, detail::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
+
+using weeks = std::chrono::duration
+ <int, detail::ratio_multiply<std::ratio<7>, days::period>>;
+
+using years = std::chrono::duration
+ <int, detail::ratio_multiply<std::ratio<146097, 400>, days::period>>;
+
+using months = std::chrono::duration
+ <int, detail::ratio_divide<years::period, std::ratio<12>>>;
+
+// time_point
+
+template <class Duration>
+ using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
+
+using sys_days = sys_time<days>;
+using sys_seconds = sys_time<std::chrono::seconds>;
+
+struct local_t {};
+
+template <class Duration>
+ using local_time = std::chrono::time_point<local_t, Duration>;
+
+using local_seconds = local_time<std::chrono::seconds>;
+using local_days = local_time<days>;
+
+// types
+
+struct last_spec
+{
+ explicit last_spec() = default;
+};
+
+class day;
+class month;
+class year;
+
+class weekday;
+class weekday_indexed;
+class weekday_last;
+
+class month_day;
+class month_day_last;
+class month_weekday;
+class month_weekday_last;
+
+class year_month;
+
+class year_month_day;
+class year_month_day_last;
+class year_month_weekday;
+class year_month_weekday_last;
+
+// date composition operators
+
+CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT;
+CONSTCD11 year_month operator/(const year& y, int m) NOEXCEPT;
+
+CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT;
+CONSTCD11 month_day operator/(const day& d, int m) NOEXCEPT;
+CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT;
+CONSTCD11 month_day operator/(const month& m, int d) NOEXCEPT;
+CONSTCD11 month_day operator/(int m, const day& d) NOEXCEPT;
+
+CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT;
+CONSTCD11 month_day_last operator/(int m, last_spec) NOEXCEPT;
+CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT;
+CONSTCD11 month_day_last operator/(last_spec, int m) NOEXCEPT;
+
+CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT;
+CONSTCD11 month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT;
+CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT;
+CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT;
+
+CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT;
+
+CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const year_month& ym, int d) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT;
+CONSTCD11 year_month_day operator/(int y, const month_day& md) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const month_day& md, int y) NOEXCEPT;
+
+CONSTCD11
+ year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT;
+CONSTCD11
+ year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
+CONSTCD11
+ year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT;
+CONSTCD11
+ year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
+CONSTCD11
+ year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const year& y, const month_weekday& mwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(int y, const month_weekday& mwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const month_weekday& mwd, const year& y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const month_weekday& mwd, int y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(int y, const month_weekday_last& mwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, int y) NOEXCEPT;
+
+// Detailed interface
+
+// day
+
+class day
+{
+ unsigned char d_;
+
+public:
+ day() = default;
+ explicit CONSTCD11 day(unsigned d) NOEXCEPT;
+
+ CONSTCD14 day& operator++() NOEXCEPT;
+ CONSTCD14 day operator++(int) NOEXCEPT;
+ CONSTCD14 day& operator--() NOEXCEPT;
+ CONSTCD14 day operator--(int) NOEXCEPT;
+
+ CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
+ CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
+
+ CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT;
+
+CONSTCD11 day operator+(const day& x, const days& y) NOEXCEPT;
+CONSTCD11 day operator+(const days& x, const day& y) NOEXCEPT;
+CONSTCD11 day operator-(const day& x, const days& y) NOEXCEPT;
+CONSTCD11 days operator-(const day& x, const day& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const day& d);
+
+// month
+
+class month
+{
+ unsigned char m_;
+
+public:
+ month() = default;
+ explicit CONSTCD11 month(unsigned m) NOEXCEPT;
+
+ CONSTCD14 month& operator++() NOEXCEPT;
+ CONSTCD14 month operator++(int) NOEXCEPT;
+ CONSTCD14 month& operator--() NOEXCEPT;
+ CONSTCD14 month operator--(int) NOEXCEPT;
+
+ CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
+ CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
+
+ CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT;
+
+CONSTCD14 month operator+(const month& x, const months& y) NOEXCEPT;
+CONSTCD14 month operator+(const months& x, const month& y) NOEXCEPT;
+CONSTCD14 month operator-(const month& x, const months& y) NOEXCEPT;
+CONSTCD14 months operator-(const month& x, const month& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month& m);
+
+// year
+
+class year
+{
+ short y_;
+
+public:
+ year() = default;
+ explicit CONSTCD11 year(int y) NOEXCEPT;
+
+ CONSTCD14 year& operator++() NOEXCEPT;
+ CONSTCD14 year operator++(int) NOEXCEPT;
+ CONSTCD14 year& operator--() NOEXCEPT;
+ CONSTCD14 year operator--(int) NOEXCEPT;
+
+ CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
+ CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
+
+ CONSTCD11 year operator-() const NOEXCEPT;
+ CONSTCD11 year operator+() const NOEXCEPT;
+
+ CONSTCD11 bool is_leap() const NOEXCEPT;
+
+ CONSTCD11 explicit operator int() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+
+ static CONSTCD11 year min() NOEXCEPT { return year{-32767}; }
+ static CONSTCD11 year max() NOEXCEPT { return year{32767}; }
+};
+
+CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT;
+
+CONSTCD11 year operator+(const year& x, const years& y) NOEXCEPT;
+CONSTCD11 year operator+(const years& x, const year& y) NOEXCEPT;
+CONSTCD11 year operator-(const year& x, const years& y) NOEXCEPT;
+CONSTCD11 years operator-(const year& x, const year& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year& y);
+
+// weekday
+
+class weekday
+{
+ unsigned char wd_;
+public:
+ weekday() = default;
+ explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
+ CONSTCD14 weekday(const sys_days& dp) NOEXCEPT;
+ CONSTCD14 explicit weekday(const local_days& dp) NOEXCEPT;
+
+ CONSTCD14 weekday& operator++() NOEXCEPT;
+ CONSTCD14 weekday operator++(int) NOEXCEPT;
+ CONSTCD14 weekday& operator--() NOEXCEPT;
+ CONSTCD14 weekday operator--(int) NOEXCEPT;
+
+ CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
+ CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
+
+ CONSTCD11 bool ok() const NOEXCEPT;
+
+ CONSTCD11 unsigned c_encoding() const NOEXCEPT;
+ CONSTCD11 unsigned iso_encoding() const NOEXCEPT;
+
+ CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
+ CONSTCD11 weekday_last operator[](last_spec) const NOEXCEPT;
+
+private:
+ static CONSTCD14 unsigned char weekday_from_days(int z) NOEXCEPT;
+
+ friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
+ friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
+ friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
+ template<class CharT, class Traits>
+ friend std::basic_ostream<CharT, Traits>&
+ operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
+ friend class weekday_indexed;
+};
+
+CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT;
+
+CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
+CONSTCD14 weekday operator+(const days& x, const weekday& y) NOEXCEPT;
+CONSTCD14 weekday operator-(const weekday& x, const days& y) NOEXCEPT;
+CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
+
+// weekday_indexed
+
+class weekday_indexed
+{
+ unsigned char wd_ : 4;
+ unsigned char index_ : 4;
+
+public:
+ weekday_indexed() = default;
+ CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
+
+ CONSTCD11 date::weekday weekday() const NOEXCEPT;
+ CONSTCD11 unsigned index() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi);
+
+// weekday_last
+
+class weekday_last
+{
+ date::weekday wd_;
+
+public:
+ explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
+
+ CONSTCD11 date::weekday weekday() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl);
+
+namespace detail
+{
+
+struct unspecified_month_disambiguator {};
+
+} // namespace detail
+
+// year_month
+
+class year_month
+{
+ date::year y_;
+ date::month m_;
+
+public:
+ year_month() = default;
+ CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
+
+ CONSTCD11 date::year year() const NOEXCEPT;
+ CONSTCD11 date::month month() const NOEXCEPT;
+
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
+ CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
+ CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
+
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT;
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT;
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT;
+
+CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT;
+CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT;
+CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym);
+
+// month_day
+
+class month_day
+{
+ date::month m_;
+ date::day d_;
+
+public:
+ month_day() = default;
+ CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
+
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::day day() const NOEXCEPT;
+
+ CONSTCD14 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md);
+
+// month_day_last
+
+class month_day_last
+{
+ date::month m_;
+
+public:
+ CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
+
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl);
+
+// month_weekday
+
+class month_weekday
+{
+ date::month m_;
+ date::weekday_indexed wdi_;
+public:
+ CONSTCD11 month_weekday(const date::month& m,
+ const date::weekday_indexed& wdi) NOEXCEPT;
+
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd);
+
+// month_weekday_last
+
+class month_weekday_last
+{
+ date::month m_;
+ date::weekday_last wdl_;
+
+public:
+ CONSTCD11 month_weekday_last(const date::month& m,
+ const date::weekday_last& wd) NOEXCEPT;
+
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11
+ bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl);
+
+// class year_month_day
+
+class year_month_day
+{
+ date::year y_;
+ date::month m_;
+ date::day d_;
+
+public:
+ year_month_day() = default;
+ CONSTCD11 year_month_day(const date::year& y, const date::month& m,
+ const date::day& d) NOEXCEPT;
+ CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
+
+ CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
+ CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
+
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
+ CONSTCD14 year_month_day& operator+=(const years& y) NOEXCEPT;
+ CONSTCD14 year_month_day& operator-=(const years& y) NOEXCEPT;
+
+ CONSTCD11 date::year year() const NOEXCEPT;
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::day day() const NOEXCEPT;
+
+ CONSTCD14 operator sys_days() const NOEXCEPT;
+ CONSTCD14 explicit operator local_days() const NOEXCEPT;
+ CONSTCD14 bool ok() const NOEXCEPT;
+
+private:
+ static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
+ CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT;
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT;
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT;
+CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT;
+CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT;
+CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd);
+
+// year_month_day_last
+
+class year_month_day_last
+{
+ date::year y_;
+ date::month_day_last mdl_;
+
+public:
+ CONSTCD11 year_month_day_last(const date::year& y,
+ const date::month_day_last& mdl) NOEXCEPT;
+
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
+ CONSTCD14 year_month_day_last& operator+=(const years& y) NOEXCEPT;
+ CONSTCD14 year_month_day_last& operator-=(const years& y) NOEXCEPT;
+
+ CONSTCD11 date::year year() const NOEXCEPT;
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
+ CONSTCD14 date::day day() const NOEXCEPT;
+
+ CONSTCD14 operator sys_days() const NOEXCEPT;
+ CONSTCD14 explicit operator local_days() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11
+ bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator< (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator> (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+ bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_day_last
+operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl);
+
+// year_month_weekday
+
+class year_month_weekday
+{
+ date::year y_;
+ date::month m_;
+ date::weekday_indexed wdi_;
+
+public:
+ year_month_weekday() = default;
+ CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
+ const date::weekday_indexed& wdi) NOEXCEPT;
+ CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
+ CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
+
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
+ CONSTCD14 year_month_weekday& operator+=(const years& y) NOEXCEPT;
+ CONSTCD14 year_month_weekday& operator-=(const years& y) NOEXCEPT;
+
+ CONSTCD11 date::year year() const NOEXCEPT;
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::weekday weekday() const NOEXCEPT;
+ CONSTCD11 unsigned index() const NOEXCEPT;
+ CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+ CONSTCD14 operator sys_days() const NOEXCEPT;
+ CONSTCD14 explicit operator local_days() const NOEXCEPT;
+ CONSTCD14 bool ok() const NOEXCEPT;
+
+private:
+ static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
+ CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11
+ bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+CONSTCD11
+ bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday
+operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi);
+
+// year_month_weekday_last
+
+class year_month_weekday_last
+{
+ date::year y_;
+ date::month m_;
+ date::weekday_last wdl_;
+
+public:
+ CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
+ const date::weekday_last& wdl) NOEXCEPT;
+
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
+ template<class = detail::unspecified_month_disambiguator>
+ CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
+ CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
+ CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
+
+ CONSTCD11 date::year year() const NOEXCEPT;
+ CONSTCD11 date::month month() const NOEXCEPT;
+ CONSTCD11 date::weekday weekday() const NOEXCEPT;
+ CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+
+ CONSTCD14 operator sys_days() const NOEXCEPT;
+ CONSTCD14 explicit operator local_days() const NOEXCEPT;
+ CONSTCD11 bool ok() const NOEXCEPT;
+
+private:
+ CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11
+bool
+operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+
+CONSTCD11
+bool
+operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday_last
+operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT;
+
+template<class = detail::unspecified_month_disambiguator>
+CONSTCD14
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl);
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+inline namespace literals
+{
+
+CONSTCD11 date::day operator "" _d(unsigned long long d) NOEXCEPT;
+CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
+
+} // inline namespace literals
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+// CONSTDATA date::month January{1};
+// CONSTDATA date::month February{2};
+// CONSTDATA date::month March{3};
+// CONSTDATA date::month April{4};
+// CONSTDATA date::month May{5};
+// CONSTDATA date::month June{6};
+// CONSTDATA date::month July{7};
+// CONSTDATA date::month August{8};
+// CONSTDATA date::month September{9};
+// CONSTDATA date::month October{10};
+// CONSTDATA date::month November{11};
+// CONSTDATA date::month December{12};
+//
+// CONSTDATA date::weekday Sunday{0u};
+// CONSTDATA date::weekday Monday{1u};
+// CONSTDATA date::weekday Tuesday{2u};
+// CONSTDATA date::weekday Wednesday{3u};
+// CONSTDATA date::weekday Thursday{4u};
+// CONSTDATA date::weekday Friday{5u};
+// CONSTDATA date::weekday Saturday{6u};
+
+#if HAS_VOID_T
+
+template <class T, class = std::void_t<>>
+struct is_clock
+ : std::false_type
+{};
+
+template <class T>
+struct is_clock<T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
+ typename T::duration, typename T::time_point,
+ decltype(T::is_steady)>>
+ : std::true_type
+{};
+
+#endif // HAS_VOID_T
+
+//----------------+
+// Implementation |
+//----------------+
+
+// utilities
+namespace detail {
+
+template<class CharT, class Traits = std::char_traits<CharT>>
+class save_istream
+{
+protected:
+ std::basic_ios<CharT, Traits>& is_;
+ CharT fill_;
+ std::ios::fmtflags flags_;
+ std::streamsize width_;
+ std::basic_ostream<CharT, Traits>* tie_;
+ std::locale loc_;
+
+public:
+ ~save_istream()
+ {
+ is_.fill(fill_);
+ is_.flags(flags_);
+ is_.width(width_);
+ is_.imbue(loc_);
+ is_.tie(tie_);
+ }
+
+ save_istream(const save_istream&) = delete;
+ save_istream& operator=(const save_istream&) = delete;
+
+ explicit save_istream(std::basic_ios<CharT, Traits>& is)
+ : is_(is)
+ , fill_(is.fill())
+ , flags_(is.flags())
+ , width_(is.width(0))
+ , tie_(is.tie(nullptr))
+ , loc_(is.getloc())
+ {
+ if (tie_ != nullptr)
+ tie_->flush();
+ }
+};
+
+template<class CharT, class Traits = std::char_traits<CharT>>
+class save_ostream
+ : private save_istream<CharT, Traits>
+{
+public:
+ ~save_ostream()
+ {
+ if ((this->flags_ & std::ios::unitbuf) &&
+#if HAS_UNCAUGHT_EXCEPTIONS
+ std::uncaught_exceptions() == 0 &&
+#else
+ !std::uncaught_exception() &&
+#endif
+ this->is_.good())
+ this->is_.rdbuf()->pubsync();
+ }
+
+ save_ostream(const save_ostream&) = delete;
+ save_ostream& operator=(const save_ostream&) = delete;
+
+ explicit save_ostream(std::basic_ios<CharT, Traits>& os)
+ : save_istream<CharT, Traits>(os)
+ {
+ }
+};
+
+template <class T>
+struct choose_trunc_type
+{
+ static const int digits = std::numeric_limits<T>::digits;
+ using type = typename std::conditional
+ <
+ digits < 32,
+ std::int32_t,
+ typename std::conditional
+ <
+ digits < 64,
+ std::int64_t,
+#ifdef __SIZEOF_INT128__
+ __int128
+#else
+ std::int64_t
+#endif
+ >::type
+ >::type;
+};
+
+template <class T>
+CONSTCD11
+inline
+typename std::enable_if
+<
+ !std::chrono::treat_as_floating_point<T>::value,
+ T
+>::type
+trunc(T t) NOEXCEPT
+{
+ return t;
+}
+
+template <class T>
+CONSTCD14
+inline
+typename std::enable_if
+<
+ std::chrono::treat_as_floating_point<T>::value,
+ T
+>::type
+trunc(T t) NOEXCEPT
+{
+ using std::numeric_limits;
+ using I = typename choose_trunc_type<T>::type;
+ CONSTDATA auto digits = numeric_limits<T>::digits;
+ static_assert(digits < numeric_limits<I>::digits, "");
+ CONSTDATA auto max = I{1} << (digits-1);
+ CONSTDATA auto min = -max;
+ const auto negative = t < T{0};
+ if (min <= t && t <= max && t != 0 && t == t)
+ {
+ t = static_cast<T>(static_cast<I>(t));
+ if (t == 0 && negative)
+ t = -t;
+ }
+ return t;
+}
+
+template <std::intmax_t Xp, std::intmax_t Yp>
+struct static_gcd
+{
+ static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
+};
+
+template <std::intmax_t Xp>
+struct static_gcd<Xp, 0>
+{
+ static const std::intmax_t value = Xp;
+};
+
+template <>
+struct static_gcd<0, 0>
+{
+ static const std::intmax_t value = 1;
+};
+
+template <class R1, class R2>
+struct no_overflow
+{
+private:
+ static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
+ static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
+ static const std::intmax_t n1 = R1::num / gcd_n1_n2;
+ static const std::intmax_t d1 = R1::den / gcd_d1_d2;
+ static const std::intmax_t n2 = R2::num / gcd_n1_n2;
+ static const std::intmax_t d2 = R2::den / gcd_d1_d2;
+ static const std::intmax_t max = std::numeric_limits<std::intmax_t>::max();
+
+ template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
+ struct mul // overflow == false
+ {
+ static const std::intmax_t value = Xp * Yp;
+ };
+
+ template <std::intmax_t Xp, std::intmax_t Yp>
+ struct mul<Xp, Yp, true>
+ {
+ static const std::intmax_t value = 1;
+ };
+
+public:
+ static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
+ typedef std::ratio<mul<n1, d2, !value>::value,
+ mul<n2, d1, !value>::value> type;
+};
+
+} // detail
+
+// trunc towards zero
+template <class To, class Rep, class Period>
+CONSTCD11
+inline
+typename std::enable_if
+<
+ detail::no_overflow<Period, typename To::period>::value,
+ To
+>::type
+trunc(const std::chrono::duration<Rep, Period>& d)
+{
+ return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
+}
+
+template <class To, class Rep, class Period>
+CONSTCD11
+inline
+typename std::enable_if
+<
+ !detail::no_overflow<Period, typename To::period>::value,
+ To
+>::type
+trunc(const std::chrono::duration<Rep, Period>& d)
+{
+ using std::chrono::duration_cast;
+ using std::chrono::duration;
+ using rep = typename std::common_type<Rep, typename To::rep>::type;
+ return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
+}
+
+#ifndef HAS_CHRONO_ROUNDING
+# if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined (__clang__)))
+# define HAS_CHRONO_ROUNDING 1
+# elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
+# define HAS_CHRONO_ROUNDING 1
+# elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
+# define HAS_CHRONO_ROUNDING 1
+# else
+# define HAS_CHRONO_ROUNDING 0
+# endif
+#endif // HAS_CHRONO_ROUNDING
+
+#if HAS_CHRONO_ROUNDING == 0
+
+// round down
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+typename std::enable_if
+<
+ detail::no_overflow<Period, typename To::period>::value,
+ To
+>::type
+floor(const std::chrono::duration<Rep, Period>& d)
+{
+ auto t = trunc<To>(d);
+ if (t > d)
+ return t - To{1};
+ return t;
+}
+
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+typename std::enable_if
+<
+ !detail::no_overflow<Period, typename To::period>::value,
+ To
+>::type
+floor(const std::chrono::duration<Rep, Period>& d)
+{
+ using rep = typename std::common_type<Rep, typename To::rep>::type;
+ return floor<To>(floor<std::chrono::duration<rep>>(d));
+}
+
+// round to nearest, to even on tie
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+To
+round(const std::chrono::duration<Rep, Period>& d)
+{
+ auto t0 = floor<To>(d);
+ auto t1 = t0 + To{1};
+ if (t1 == To{0} && t0 < To{0})
+ t1 = -t1;
+ auto diff0 = d - t0;
+ auto diff1 = t1 - d;
+ if (diff0 == diff1)
+ {
+ if (t0 - trunc<To>(t0/2)*2 == To{0})
+ return t0;
+ return t1;
+ }
+ if (diff0 < diff1)
+ return t0;
+ return t1;
+}
+
+// round up
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+To
+ceil(const std::chrono::duration<Rep, Period>& d)
+{
+ auto t = trunc<To>(d);
+ if (t < d)
+ return t + To{1};
+ return t;
+}
+
+template <class Rep, class Period,
+ class = typename std::enable_if
+ <
+ std::numeric_limits<Rep>::is_signed
+ >::type>
+CONSTCD11
+std::chrono::duration<Rep, Period>
+abs(std::chrono::duration<Rep, Period> d)
+{
+ return d >= d.zero() ? d : -d;
+}
+
+// round down
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+floor(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+ using std::chrono::time_point;
+ return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
+}
+
+// round to nearest, to even on tie
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+round(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+ using std::chrono::time_point;
+ return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
+}
+
+// round up
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+ceil(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+ using std::chrono::time_point;
+ return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
+}
+
+#else // HAS_CHRONO_ROUNDING == 1
+
+using std::chrono::floor;
+using std::chrono::ceil;
+using std::chrono::round;
+using std::chrono::abs;
+
+#endif // HAS_CHRONO_ROUNDING
+
+// trunc towards zero
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+trunc(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+ using std::chrono::time_point;
+ return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
+}
+
+// day
+
+CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast<decltype(d_)>(d)) {}
+CONSTCD14 inline day& day::operator++() NOEXCEPT {++d_; return *this;}
+CONSTCD14 inline day day::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline day& day::operator--() NOEXCEPT {--d_; return *this;}
+CONSTCD14 inline day day::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {*this = *this + d; return *this;}
+CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {*this = *this - d; return *this;}
+CONSTCD11 inline day::operator unsigned() const NOEXCEPT {return d_;}
+CONSTCD11 inline bool day::ok() const NOEXCEPT {return 1 <= d_ && d_ <= 31;}
+
+CONSTCD11
+inline
+bool
+operator==(const day& x, const day& y) NOEXCEPT
+{
+ return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const day& x, const day& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const day& x, const day& y) NOEXCEPT
+{
+ return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const day& x, const day& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const day& x, const day& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const day& x, const day& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+CONSTCD11
+inline
+days
+operator-(const day& x, const day& y) NOEXCEPT
+{
+ return days{static_cast<days::rep>(static_cast<unsigned>(x)
+ - static_cast<unsigned>(y))};
+}
+
+CONSTCD11
+inline
+day
+operator+(const day& x, const days& y) NOEXCEPT
+{
+ return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
+}
+
+CONSTCD11
+inline
+day
+operator+(const days& x, const day& y) NOEXCEPT
+{
+ return y + x;
+}
+
+CONSTCD11
+inline
+day
+operator-(const day& x, const days& y) NOEXCEPT
+{
+ return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const day& d)
+{
+ detail::save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << static_cast<unsigned>(d);
+ if (!d.ok())
+ os << " is not a valid day";
+ return os;
+}
+
+// month
+
+CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast<decltype(m_)>(m)) {}
+CONSTCD14 inline month& month::operator++() NOEXCEPT {*this += months{1}; return *this;}
+CONSTCD14 inline month month::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline month& month::operator--() NOEXCEPT {*this -= months{1}; return *this;}
+CONSTCD14 inline month month::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+
+CONSTCD14
+inline
+month&
+month::operator+=(const months& m) NOEXCEPT
+{
+ *this = *this + m;
+ return *this;
+}
+
+CONSTCD14
+inline
+month&
+month::operator-=(const months& m) NOEXCEPT
+{
+ *this = *this - m;
+ return *this;
+}
+
+CONSTCD11 inline month::operator unsigned() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool month::ok() const NOEXCEPT {return 1 <= m_ && m_ <= 12;}
+
+CONSTCD11
+inline
+bool
+operator==(const month& x, const month& y) NOEXCEPT
+{
+ return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month& x, const month& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month& x, const month& y) NOEXCEPT
+{
+ return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month& x, const month& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month& x, const month& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month& x, const month& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+CONSTCD14
+inline
+months
+operator-(const month& x, const month& y) NOEXCEPT
+{
+ auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
+ return months(d <= 11 ? d : d + 12);
+}
+
+CONSTCD14
+inline
+month
+operator+(const month& x, const months& y) NOEXCEPT
+{
+ auto const mu = static_cast<long long>(static_cast<unsigned>(x)) + y.count() - 1;
+ auto const yr = (mu >= 0 ? mu : mu-11) / 12;
+ return month{static_cast<unsigned>(mu - yr * 12 + 1)};
+}
+
+CONSTCD14
+inline
+month
+operator+(const months& x, const month& y) NOEXCEPT
+{
+ return y + x;
+}
+
+CONSTCD14
+inline
+month
+operator-(const month& x, const months& y) NOEXCEPT
+{
+ return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month& m)
+{
+ if (m.ok())
+ {
+ CharT fmt[] = {'%', 'b', 0};
+ os << format(os.getloc(), fmt, m);
+ }
+ else
+ os << static_cast<unsigned>(m) << " is not a valid month";
+ return os;
+}
+
+// year
+
+CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast<decltype(y_)>(y)) {}
+CONSTCD14 inline year& year::operator++() NOEXCEPT {++y_; return *this;}
+CONSTCD14 inline year year::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline year& year::operator--() NOEXCEPT {--y_; return *this;}
+CONSTCD14 inline year year::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {*this = *this + y; return *this;}
+CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {*this = *this - y; return *this;}
+CONSTCD11 inline year year::operator-() const NOEXCEPT {return year{-y_};}
+CONSTCD11 inline year year::operator+() const NOEXCEPT {return *this;}
+
+CONSTCD11
+inline
+bool
+year::is_leap() const NOEXCEPT
+{
+ return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
+}
+
+CONSTCD11 inline year::operator int() const NOEXCEPT {return y_;}
+
+CONSTCD11
+inline
+bool
+year::ok() const NOEXCEPT
+{
+ return y_ != std::numeric_limits<short>::min();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year& x, const year& y) NOEXCEPT
+{
+ return static_cast<int>(x) == static_cast<int>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year& x, const year& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year& x, const year& y) NOEXCEPT
+{
+ return static_cast<int>(x) < static_cast<int>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year& x, const year& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year& x, const year& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year& x, const year& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+CONSTCD11
+inline
+years
+operator-(const year& x, const year& y) NOEXCEPT
+{
+ return years{static_cast<int>(x) - static_cast<int>(y)};
+}
+
+CONSTCD11
+inline
+year
+operator+(const year& x, const years& y) NOEXCEPT
+{
+ return year{static_cast<int>(x) + y.count()};
+}
+
+CONSTCD11
+inline
+year
+operator+(const years& x, const year& y) NOEXCEPT
+{
+ return y + x;
+}
+
+CONSTCD11
+inline
+year
+operator-(const year& x, const years& y) NOEXCEPT
+{
+ return year{static_cast<int>(x) - y.count()};
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year& y)
+{
+ detail::save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::internal);
+ os.width(4 + (y < year{0}));
+ os.imbue(std::locale::classic());
+ os << static_cast<int>(y);
+ if (!y.ok())
+ os << " is not a valid year";
+ return os;
+}
+
+// weekday
+
+CONSTCD14
+inline
+unsigned char
+weekday::weekday_from_days(int z) NOEXCEPT
+{
+ auto u = static_cast<unsigned>(z);
+ return static_cast<unsigned char>(z >= -4 ? (u+4) % 7 : u % 7);
+}
+
+CONSTCD11
+inline
+weekday::weekday(unsigned wd) NOEXCEPT
+ : wd_(static_cast<decltype(wd_)>(wd != 7 ? wd : 0))
+ {}
+
+CONSTCD14
+inline
+weekday::weekday(const sys_days& dp) NOEXCEPT
+ : wd_(weekday_from_days(dp.time_since_epoch().count()))
+ {}
+
+CONSTCD14
+inline
+weekday::weekday(const local_days& dp) NOEXCEPT
+ : wd_(weekday_from_days(dp.time_since_epoch().count()))
+ {}
+
+CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {*this += days{1}; return *this;}
+CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {*this -= days{1}; return *this;}
+CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+
+CONSTCD14
+inline
+weekday&
+weekday::operator+=(const days& d) NOEXCEPT
+{
+ *this = *this + d;
+ return *this;
+}
+
+CONSTCD14
+inline
+weekday&
+weekday::operator-=(const days& d) NOEXCEPT
+{
+ *this = *this - d;
+ return *this;
+}
+
+CONSTCD11 inline bool weekday::ok() const NOEXCEPT {return wd_ <= 6;}
+
+CONSTCD11
+inline
+unsigned weekday::c_encoding() const NOEXCEPT
+{
+ return unsigned{wd_};
+}
+
+CONSTCD11
+inline
+unsigned weekday::iso_encoding() const NOEXCEPT
+{
+ return unsigned{((wd_ == 0u) ? 7u : wd_)};
+}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday& x, const weekday& y) NOEXCEPT
+{
+ return x.wd_ == y.wd_;
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday& x, const weekday& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD14
+inline
+days
+operator-(const weekday& x, const weekday& y) NOEXCEPT
+{
+ auto const wdu = x.wd_ - y.wd_;
+ auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
+ return days{wdu - wk * 7};
+}
+
+CONSTCD14
+inline
+weekday
+operator+(const weekday& x, const days& y) NOEXCEPT
+{
+ auto const wdu = static_cast<long long>(static_cast<unsigned>(x.wd_)) + y.count();
+ auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
+ return weekday{static_cast<unsigned>(wdu - wk * 7)};
+}
+
+CONSTCD14
+inline
+weekday
+operator+(const days& x, const weekday& y) NOEXCEPT
+{
+ return y + x;
+}
+
+CONSTCD14
+inline
+weekday
+operator-(const weekday& x, const days& y) NOEXCEPT
+{
+ return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd)
+{
+ if (wd.ok())
+ {
+ CharT fmt[] = {'%', 'a', 0};
+ os << format(fmt, wd);
+ }
+ else
+ os << static_cast<unsigned>(wd.wd_) << " is not a valid weekday";
+ return os;
+}
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+inline namespace literals
+{
+
+CONSTCD11
+inline
+date::day
+operator "" _d(unsigned long long d) NOEXCEPT
+{
+ return date::day{static_cast<unsigned>(d)};
+}
+
+CONSTCD11
+inline
+date::year
+operator "" _y(unsigned long long y) NOEXCEPT
+{
+ return date::year(static_cast<int>(y));
+}
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+CONSTDATA date::last_spec last{};
+
+CONSTDATA date::month jan{1};
+CONSTDATA date::month feb{2};
+CONSTDATA date::month mar{3};
+CONSTDATA date::month apr{4};
+CONSTDATA date::month may{5};
+CONSTDATA date::month jun{6};
+CONSTDATA date::month jul{7};
+CONSTDATA date::month aug{8};
+CONSTDATA date::month sep{9};
+CONSTDATA date::month oct{10};
+CONSTDATA date::month nov{11};
+CONSTDATA date::month dec{12};
+
+CONSTDATA date::weekday sun{0u};
+CONSTDATA date::weekday mon{1u};
+CONSTDATA date::weekday tue{2u};
+CONSTDATA date::weekday wed{3u};
+CONSTDATA date::weekday thu{4u};
+CONSTDATA date::weekday fri{5u};
+CONSTDATA date::weekday sat{6u};
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+} // inline namespace literals
+#endif
+
+CONSTDATA date::month January{1};
+CONSTDATA date::month February{2};
+CONSTDATA date::month March{3};
+CONSTDATA date::month April{4};
+CONSTDATA date::month May{5};
+CONSTDATA date::month June{6};
+CONSTDATA date::month July{7};
+CONSTDATA date::month August{8};
+CONSTDATA date::month September{9};
+CONSTDATA date::month October{10};
+CONSTDATA date::month November{11};
+CONSTDATA date::month December{12};
+
+CONSTDATA date::weekday Monday{1};
+CONSTDATA date::weekday Tuesday{2};
+CONSTDATA date::weekday Wednesday{3};
+CONSTDATA date::weekday Thursday{4};
+CONSTDATA date::weekday Friday{5};
+CONSTDATA date::weekday Saturday{6};
+CONSTDATA date::weekday Sunday{7};
+
+// weekday_indexed
+
+CONSTCD11
+inline
+weekday
+weekday_indexed::weekday() const NOEXCEPT
+{
+ return date::weekday{static_cast<unsigned>(wd_)};
+}
+
+CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT {return index_;}
+
+CONSTCD11
+inline
+bool
+weekday_indexed::ok() const NOEXCEPT
+{
+ return weekday().ok() && 1 <= index_ && index_ <= 5;
+}
+
+#ifdef __GNUC__
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wconversion"
+#endif // __GNUC__
+
+CONSTCD11
+inline
+weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
+ : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd.wd_)))
+ , index_(static_cast<decltype(index_)>(index))
+ {}
+
+#ifdef __GNUC__
+# pragma GCC diagnostic pop
+#endif // __GNUC__
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi)
+{
+ os << wdi.weekday() << '[' << wdi.index();
+ if (!(1 <= wdi.index() && wdi.index() <= 5))
+ os << " is not a valid index";
+ os << ']';
+ return os;
+}
+
+CONSTCD11
+inline
+weekday_indexed
+weekday::operator[](unsigned index) const NOEXCEPT
+{
+ return {*this, index};
+}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
+{
+ return x.weekday() == y.weekday() && x.index() == y.index();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+// weekday_last
+
+CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT {return wd_;}
+CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT {return wd_.ok();}
+CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT
+{
+ return x.weekday() == y.weekday();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl)
+{
+ return os << wdl.weekday() << "[last]";
+}
+
+CONSTCD11
+inline
+weekday_last
+weekday::operator[](last_spec) const NOEXCEPT
+{
+ return weekday_last{*this};
+}
+
+// year_month
+
+CONSTCD11
+inline
+year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
+ : y_(y)
+ , m_(m)
+ {}
+
+CONSTCD11 inline year year_month::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool year_month::ok() const NOEXCEPT {return y_.ok() && m_.ok();}
+
+template<class>
+CONSTCD14
+inline
+year_month&
+year_month::operator+=(const months& dm) NOEXCEPT
+{
+ *this = *this + dm;
+ return *this;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month&
+year_month::operator-=(const months& dm) NOEXCEPT
+{
+ *this = *this - dm;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator+=(const years& dy) NOEXCEPT
+{
+ *this = *this + dy;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator-=(const years& dy) NOEXCEPT
+{
+ *this = *this - dy;
+ return *this;
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return x.year() == y.year() && x.month() == y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return x.year() < y.year() ? true
+ : (x.year() > y.year() ? false
+ : (x.month() < y.month()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+template<class>
+CONSTCD14
+inline
+year_month
+operator+(const year_month& ym, const months& dm) NOEXCEPT
+{
+ auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
+ auto dy = (dmi >= 0 ? dmi : dmi-11) / 12;
+ dmi = dmi - dy * 12 + 1;
+ return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
+}
+
+template<class>
+CONSTCD14
+inline
+year_month
+operator+(const months& dm, const year_month& ym) NOEXCEPT
+{
+ return ym + dm;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month
+operator-(const year_month& ym, const months& dm) NOEXCEPT
+{
+ return ym + -dm;
+}
+
+CONSTCD11
+inline
+months
+operator-(const year_month& x, const year_month& y) NOEXCEPT
+{
+ return (x.year() - y.year()) +
+ months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
+}
+
+CONSTCD11
+inline
+year_month
+operator+(const year_month& ym, const years& dy) NOEXCEPT
+{
+ return (ym.year() + dy) / ym.month();
+}
+
+CONSTCD11
+inline
+year_month
+operator+(const years& dy, const year_month& ym) NOEXCEPT
+{
+ return ym + dy;
+}
+
+CONSTCD11
+inline
+year_month
+operator-(const year_month& ym, const years& dy) NOEXCEPT
+{
+ return ym + -dy;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym)
+{
+ return os << ym.year() << '/' << ym.month();
+}
+
+// month_day
+
+CONSTCD11
+inline
+month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT
+ : m_(m)
+ , d_(d)
+ {}
+
+CONSTCD11 inline date::month month_day::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline date::day month_day::day() const NOEXCEPT {return d_;}
+
+CONSTCD14
+inline
+bool
+month_day::ok() const NOEXCEPT
+{
+ CONSTDATA date::day d[] =
+ {
+ date::day(31), date::day(29), date::day(31),
+ date::day(30), date::day(31), date::day(30),
+ date::day(31), date::day(31), date::day(30),
+ date::day(31), date::day(30), date::day(31)
+ };
+ return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_)-1];
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return x.month() == y.month() && x.day() == y.day();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return x.month() < y.month() ? true
+ : (x.month() > y.month() ? false
+ : (x.day() < y.day()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month_day& x, const month_day& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md)
+{
+ return os << md.month() << '/' << md.day();
+}
+
+// month_day_last
+
+CONSTCD11 inline month month_day_last::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT {return m_.ok();}
+CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {}
+
+CONSTCD11
+inline
+bool
+operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return x.month() == y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return x.month() < y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl)
+{
+ return os << mdl.month() << "/last";
+}
+
+// month_weekday
+
+CONSTCD11
+inline
+month_weekday::month_weekday(const date::month& m,
+ const date::weekday_indexed& wdi) NOEXCEPT
+ : m_(m)
+ , wdi_(wdi)
+ {}
+
+CONSTCD11 inline month month_weekday::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday_indexed
+month_weekday::weekday_indexed() const NOEXCEPT
+{
+ return wdi_;
+}
+
+CONSTCD11
+inline
+bool
+month_weekday::ok() const NOEXCEPT
+{
+ return m_.ok() && wdi_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT
+{
+ return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd)
+{
+ return os << mwd.month() << '/' << mwd.weekday_indexed();
+}
+
+// month_weekday_last
+
+CONSTCD11
+inline
+month_weekday_last::month_weekday_last(const date::month& m,
+ const date::weekday_last& wdl) NOEXCEPT
+ : m_(m)
+ , wdl_(wdl)
+ {}
+
+CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday_last
+month_weekday_last::weekday_last() const NOEXCEPT
+{
+ return wdl_;
+}
+
+CONSTCD11
+inline
+bool
+month_weekday_last::ok() const NOEXCEPT
+{
+ return m_.ok() && wdl_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
+{
+ return x.month() == y.month() && x.weekday_last() == y.weekday_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl)
+{
+ return os << mwdl.month() << '/' << mwdl.weekday_last();
+}
+
+// year_month_day_last
+
+CONSTCD11
+inline
+year_month_day_last::year_month_day_last(const date::year& y,
+ const date::month_day_last& mdl) NOEXCEPT
+ : y_(y)
+ , mdl_(mdl)
+ {}
+
+template<class>
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator+=(const months& m) NOEXCEPT
+{
+ *this = *this + m;
+ return *this;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator-=(const months& m) NOEXCEPT
+{
+ *this = *this - m;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator+=(const years& y) NOEXCEPT
+{
+ *this = *this + y;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator-=(const years& y) NOEXCEPT
+{
+ *this = *this - y;
+ return *this;
+}
+
+CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {return mdl_.month();}
+
+CONSTCD11
+inline
+month_day_last
+year_month_day_last::month_day_last() const NOEXCEPT
+{
+ return mdl_;
+}
+
+CONSTCD14
+inline
+day
+year_month_day_last::day() const NOEXCEPT
+{
+ CONSTDATA date::day d[] =
+ {
+ date::day(31), date::day(28), date::day(31),
+ date::day(30), date::day(31), date::day(30),
+ date::day(31), date::day(31), date::day(30),
+ date::day(31), date::day(30), date::day(31)
+ };
+ return (month() != February || !y_.is_leap()) && mdl_.ok() ?
+ d[static_cast<unsigned>(month()) - 1] : date::day{29};
+}
+
+CONSTCD14
+inline
+year_month_day_last::operator sys_days() const NOEXCEPT
+{
+ return sys_days(year()/month()/day());
+}
+
+CONSTCD14
+inline
+year_month_day_last::operator local_days() const NOEXCEPT
+{
+ return local_days(year()/month()/day());
+}
+
+CONSTCD11
+inline
+bool
+year_month_day_last::ok() const NOEXCEPT
+{
+ return y_.ok() && mdl_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return x.year() == y.year() && x.month_day_last() == y.month_day_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return x.year() < y.year() ? true
+ : (x.year() > y.year() ? false
+ : (x.month_day_last() < y.month_day_last()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl)
+{
+ return os << ymdl.year() << '/' << ymdl.month_day_last();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
+{
+ return (ymdl.year() / ymdl.month() + dm) / last;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day_last
+operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT
+{
+ return ymdl + dm;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
+{
+ return ymdl + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
+{
+ return {ymdl.year()+dy, ymdl.month_day_last()};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT
+{
+ return ymdl + dy;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
+{
+ return ymdl + (-dy);
+}
+
+// year_month_day
+
+CONSTCD11
+inline
+year_month_day::year_month_day(const date::year& y, const date::month& m,
+ const date::day& d) NOEXCEPT
+ : y_(y)
+ , m_(m)
+ , d_(d)
+ {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
+ : y_(ymdl.year())
+ , m_(ymdl.month())
+ , d_(ymdl.day())
+ {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(sys_days dp) NOEXCEPT
+ : year_month_day(from_days(dp.time_since_epoch()))
+ {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(local_days dp) NOEXCEPT
+ : year_month_day(from_days(dp.time_since_epoch()))
+ {}
+
+CONSTCD11 inline year year_month_day::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_day::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline day year_month_day::day() const NOEXCEPT {return d_;}
+
+template<class>
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator+=(const months& m) NOEXCEPT
+{
+ *this = *this + m;
+ return *this;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator-=(const months& m) NOEXCEPT
+{
+ *this = *this - m;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator+=(const years& y) NOEXCEPT
+{
+ *this = *this + y;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator-=(const years& y) NOEXCEPT
+{
+ *this = *this - y;
+ return *this;
+}
+
+CONSTCD14
+inline
+days
+year_month_day::to_days() const NOEXCEPT
+{
+ static_assert(std::numeric_limits<unsigned>::digits >= 18,
+ "This algorithm has not been ported to a 16 bit unsigned integer");
+ static_assert(std::numeric_limits<int>::digits >= 20,
+ "This algorithm has not been ported to a 16 bit signed integer");
+ auto const y = static_cast<int>(y_) - (m_ <= February);
+ auto const m = static_cast<unsigned>(m_);
+ auto const d = static_cast<unsigned>(d_);
+ auto const era = (y >= 0 ? y : y-399) / 400;
+ auto const yoe = static_cast<unsigned>(y - era * 400); // [0, 399]
+ auto const doy = (153*(m > 2 ? m-3 : m+9) + 2)/5 + d-1; // [0, 365]
+ auto const doe = yoe * 365 + yoe/4 - yoe/100 + doy; // [0, 146096]
+ return days{era * 146097 + static_cast<int>(doe) - 719468};
+}
+
+CONSTCD14
+inline
+year_month_day::operator sys_days() const NOEXCEPT
+{
+ return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_day::operator local_days() const NOEXCEPT
+{
+ return local_days{to_days()};
+}
+
+CONSTCD14
+inline
+bool
+year_month_day::ok() const NOEXCEPT
+{
+ if (!(y_.ok() && m_.ok()))
+ return false;
+ return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return x.year() < y.year() ? true
+ : (x.year() > y.year() ? false
+ : (x.month() < y.month() ? true
+ : (x.month() > y.month() ? false
+ : (x.day() < y.day()))));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+ return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd)
+{
+ detail::save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.imbue(std::locale::classic());
+ os << ymd.year() << '-';
+ os.width(2);
+ os << static_cast<unsigned>(ymd.month()) << '-';
+ os << ymd.day();
+ if (!ymd.ok())
+ os << " is not a valid date";
+ return os;
+}
+
+CONSTCD14
+inline
+year_month_day
+year_month_day::from_days(days dp) NOEXCEPT
+{
+ static_assert(std::numeric_limits<unsigned>::digits >= 18,
+ "This algorithm has not been ported to a 16 bit unsigned integer");
+ static_assert(std::numeric_limits<int>::digits >= 20,
+ "This algorithm has not been ported to a 16 bit signed integer");
+ auto const z = dp.count() + 719468;
+ auto const era = (z >= 0 ? z : z - 146096) / 146097;
+ auto const doe = static_cast<unsigned>(z - era * 146097); // [0, 146096]
+ auto const yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; // [0, 399]
+ auto const y = static_cast<days::rep>(yoe) + era * 400;
+ auto const doy = doe - (365*yoe + yoe/4 - yoe/100); // [0, 365]
+ auto const mp = (5*doy + 2)/153; // [0, 11]
+ auto const d = doy - (153*mp+2)/5 + 1; // [1, 31]
+ auto const m = mp < 10 ? mp+3 : mp-9; // [1, 12]
+ return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day
+operator+(const year_month_day& ymd, const months& dm) NOEXCEPT
+{
+ return (ymd.year() / ymd.month() + dm) / ymd.day();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day
+operator+(const months& dm, const year_month_day& ymd) NOEXCEPT
+{
+ return ymd + dm;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_day
+operator-(const year_month_day& ymd, const months& dm) NOEXCEPT
+{
+ return ymd + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_day
+operator+(const year_month_day& ymd, const years& dy) NOEXCEPT
+{
+ return (ymd.year() + dy) / ymd.month() / ymd.day();
+}
+
+CONSTCD11
+inline
+year_month_day
+operator+(const years& dy, const year_month_day& ymd) NOEXCEPT
+{
+ return ymd + dy;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator-(const year_month_day& ymd, const years& dy) NOEXCEPT
+{
+ return ymd + (-dy);
+}
+
+// year_month_weekday
+
+CONSTCD11
+inline
+year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
+ const date::weekday_indexed& wdi)
+ NOEXCEPT
+ : y_(y)
+ , m_(m)
+ , wdi_(wdi)
+ {}
+
+CONSTCD14
+inline
+year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
+ : year_month_weekday(from_days(dp.time_since_epoch()))
+ {}
+
+CONSTCD14
+inline
+year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
+ : year_month_weekday(from_days(dp.time_since_epoch()))
+ {}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator+=(const months& m) NOEXCEPT
+{
+ *this = *this + m;
+ return *this;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator-=(const months& m) NOEXCEPT
+{
+ *this = *this - m;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator+=(const years& y) NOEXCEPT
+{
+ *this = *this + y;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator-=(const years& y) NOEXCEPT
+{
+ *this = *this - y;
+ return *this;
+}
+
+CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday
+year_month_weekday::weekday() const NOEXCEPT
+{
+ return wdi_.weekday();
+}
+
+CONSTCD11
+inline
+unsigned
+year_month_weekday::index() const NOEXCEPT
+{
+ return wdi_.index();
+}
+
+CONSTCD11
+inline
+weekday_indexed
+year_month_weekday::weekday_indexed() const NOEXCEPT
+{
+ return wdi_;
+}
+
+CONSTCD14
+inline
+year_month_weekday::operator sys_days() const NOEXCEPT
+{
+ return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_weekday::operator local_days() const NOEXCEPT
+{
+ return local_days{to_days()};
+}
+
+CONSTCD14
+inline
+bool
+year_month_weekday::ok() const NOEXCEPT
+{
+ if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1)
+ return false;
+ if (wdi_.index() <= 4)
+ return true;
+ auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_/m_/1)) +
+ days((wdi_.index()-1)*7 + 1);
+ return static_cast<unsigned>(d2.count()) <= static_cast<unsigned>((y_/m_/last).day());
+}
+
+CONSTCD14
+inline
+year_month_weekday
+year_month_weekday::from_days(days d) NOEXCEPT
+{
+ sys_days dp{d};
+ auto const wd = date::weekday(dp);
+ auto const ymd = year_month_day(dp);
+ return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day())-1)/7+1]};
+}
+
+CONSTCD14
+inline
+days
+year_month_weekday::to_days() const NOEXCEPT
+{
+ auto d = sys_days(y_/m_/1);
+ return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index()-1)*7})
+ ).time_since_epoch();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
+{
+ return x.year() == y.year() && x.month() == y.month() &&
+ x.weekday_indexed() == y.weekday_indexed();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi)
+{
+ return os << ymwdi.year() << '/' << ymwdi.month()
+ << '/' << ymwdi.weekday_indexed();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
+{
+ return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday
+operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT
+{
+ return ymwd + dm;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
+{
+ return ymwd + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
+{
+ return {ymwd.year()+dy, ymwd.month(), ymwd.weekday_indexed()};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT
+{
+ return ymwd + dy;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
+{
+ return ymwd + (-dy);
+}
+
+// year_month_weekday_last
+
+CONSTCD11
+inline
+year_month_weekday_last::year_month_weekday_last(const date::year& y,
+ const date::month& m,
+ const date::weekday_last& wdl) NOEXCEPT
+ : y_(y)
+ , m_(m)
+ , wdl_(wdl)
+ {}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator+=(const months& m) NOEXCEPT
+{
+ *this = *this + m;
+ return *this;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator-=(const months& m) NOEXCEPT
+{
+ *this = *this - m;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator+=(const years& y) NOEXCEPT
+{
+ *this = *this + y;
+ return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator-=(const years& y) NOEXCEPT
+{
+ *this = *this - y;
+ return *this;
+}
+
+CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday
+year_month_weekday_last::weekday() const NOEXCEPT
+{
+ return wdl_.weekday();
+}
+
+CONSTCD11
+inline
+weekday_last
+year_month_weekday_last::weekday_last() const NOEXCEPT
+{
+ return wdl_;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last::operator sys_days() const NOEXCEPT
+{
+ return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_weekday_last::operator local_days() const NOEXCEPT
+{
+ return local_days{to_days()};
+}
+
+CONSTCD11
+inline
+bool
+year_month_weekday_last::ok() const NOEXCEPT
+{
+ return y_.ok() && m_.ok() && wdl_.ok();
+}
+
+CONSTCD14
+inline
+days
+year_month_weekday_last::to_days() const NOEXCEPT
+{
+ auto const d = sys_days(y_/m_/last);
+ return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
+{
+ return x.year() == y.year() && x.month() == y.month() &&
+ x.weekday_last() == y.weekday_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
+{
+ return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl)
+{
+ return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
+{
+ return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday_last
+operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT
+{
+ return ymwdl + dm;
+}
+
+template<class>
+CONSTCD14
+inline
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
+{
+ return ymwdl + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
+{
+ return {ymwdl.year()+dy, ymwdl.month(), ymwdl.weekday_last()};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT
+{
+ return ymwdl + dy;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
+{
+ return ymwdl + (-dy);
+}
+
+// year_month from operator/()
+
+CONSTCD11
+inline
+year_month
+operator/(const year& y, const month& m) NOEXCEPT
+{
+ return {y, m};
+}
+
+CONSTCD11
+inline
+year_month
+operator/(const year& y, int m) NOEXCEPT
+{
+ return y / month(static_cast<unsigned>(m));
+}
+
+// month_day from operator/()
+
+CONSTCD11
+inline
+month_day
+operator/(const month& m, const day& d) NOEXCEPT
+{
+ return {m, d};
+}
+
+CONSTCD11
+inline
+month_day
+operator/(const day& d, const month& m) NOEXCEPT
+{
+ return m / d;
+}
+
+CONSTCD11
+inline
+month_day
+operator/(const month& m, int d) NOEXCEPT
+{
+ return m / day(static_cast<unsigned>(d));
+}
+
+CONSTCD11
+inline
+month_day
+operator/(int m, const day& d) NOEXCEPT
+{
+ return month(static_cast<unsigned>(m)) / d;
+}
+
+CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT {return m / d;}
+
+// month_day_last from operator/()
+
+CONSTCD11
+inline
+month_day_last
+operator/(const month& m, last_spec) NOEXCEPT
+{
+ return month_day_last{m};
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(last_spec, const month& m) NOEXCEPT
+{
+ return m/last;
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(int m, last_spec) NOEXCEPT
+{
+ return month(static_cast<unsigned>(m))/last;
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(last_spec, int m) NOEXCEPT
+{
+ return m/last;
+}
+
+// month_weekday from operator/()
+
+CONSTCD11
+inline
+month_weekday
+operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT
+{
+ return {m, wdi};
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT
+{
+ return m / wdi;
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(int m, const weekday_indexed& wdi) NOEXCEPT
+{
+ return month(static_cast<unsigned>(m)) / wdi;
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(const weekday_indexed& wdi, int m) NOEXCEPT
+{
+ return m / wdi;
+}
+
+// month_weekday_last from operator/()
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const month& m, const weekday_last& wdl) NOEXCEPT
+{
+ return {m, wdl};
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const weekday_last& wdl, const month& m) NOEXCEPT
+{
+ return m / wdl;
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(int m, const weekday_last& wdl) NOEXCEPT
+{
+ return month(static_cast<unsigned>(m)) / wdl;
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const weekday_last& wdl, int m) NOEXCEPT
+{
+ return m / wdl;
+}
+
+// year_month_day from operator/()
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year_month& ym, const day& d) NOEXCEPT
+{
+ return {ym.year(), ym.month(), d};
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year_month& ym, int d) NOEXCEPT
+{
+ return ym / day(static_cast<unsigned>(d));
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year& y, const month_day& md) NOEXCEPT
+{
+ return y / md.month() / md.day();
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(int y, const month_day& md) NOEXCEPT
+{
+ return year(y) / md;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const month_day& md, const year& y) NOEXCEPT
+{
+ return y / md;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const month_day& md, int y) NOEXCEPT
+{
+ return year(y) / md;
+}
+
+// year_month_day_last from operator/()
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const year_month& ym, last_spec) NOEXCEPT
+{
+ return {ym.year(), month_day_last{ym.month()}};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const year& y, const month_day_last& mdl) NOEXCEPT
+{
+ return {y, mdl};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(int y, const month_day_last& mdl) NOEXCEPT
+{
+ return year(y) / mdl;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const month_day_last& mdl, const year& y) NOEXCEPT
+{
+ return y / mdl;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const month_day_last& mdl, int y) NOEXCEPT
+{
+ return year(y) / mdl;
+}
+
+// year_month_weekday from operator/()
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT
+{
+ return {ym.year(), ym.month(), wdi};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const year& y, const month_weekday& mwd) NOEXCEPT
+{
+ return {y, mwd.month(), mwd.weekday_indexed()};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(int y, const month_weekday& mwd) NOEXCEPT
+{
+ return year(y) / mwd;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const month_weekday& mwd, const year& y) NOEXCEPT
+{
+ return y / mwd;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const month_weekday& mwd, int y) NOEXCEPT
+{
+ return year(y) / mwd;
+}
+
+// year_month_weekday_last from operator/()
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT
+{
+ return {ym.year(), ym.month(), wdl};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT
+{
+ return {y, mwdl.month(), mwdl.weekday_last()};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(int y, const month_weekday_last& mwdl) NOEXCEPT
+{
+ return year(y) / mwdl;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT
+{
+ return y / mwdl;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, int y) NOEXCEPT
+{
+ return year(y) / mwdl;
+}
+
+template <class Duration>
+struct fields;
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const fields<Duration>& fds, const std::string* abbrev = nullptr,
+ const std::chrono::seconds* offset_sec = nullptr);
+
+template <class CharT, class Traits, class Duration, class Alloc>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr);
+
+// hh_mm_ss
+
+namespace detail
+{
+
+struct undocumented {explicit undocumented() = default;};
+
+// width<n>::value is the number of fractional decimal digits in 1/n
+// width<0>::value and width<1>::value are defined to be 0
+// If 1/n takes more than 18 fractional decimal digits,
+// the result is truncated to 19.
+// Example: width<2>::value == 1
+// Example: width<3>::value == 19
+// Example: width<4>::value == 2
+// Example: width<10>::value == 1
+// Example: width<1000>::value == 3
+template <std::uint64_t n, std::uint64_t d = 10, unsigned w = 0,
+ bool should_continue = !(n < 2) && d != 0 && (w < 19)>
+struct width
+{
+ static CONSTDATA unsigned value = 1 + width<n, d%n*10, w+1>::value;
+};
+
+template <std::uint64_t n, std::uint64_t d, unsigned w>
+struct width<n, d, w, false>
+{
+ static CONSTDATA unsigned value = 0;
+};
+
+template <unsigned exp>
+struct static_pow10
+{
+private:
+ static CONSTDATA std::uint64_t h = static_pow10<exp/2>::value;
+public:
+ static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
+};
+
+template <>
+struct static_pow10<0>
+{
+ static CONSTDATA std::uint64_t value = 1;
+};
+
+template <class Duration>
+class decimal_format_seconds
+{
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ using rep = typename CT::rep;
+public:
+ static unsigned constexpr width = detail::width<CT::period::den>::value < 19 ?
+ detail::width<CT::period::den>::value : 6u;
+ using precision = std::chrono::duration<rep,
+ std::ratio<1, static_pow10<width>::value>>;
+
+private:
+ std::chrono::seconds s_;
+ precision sub_s_;
+
+public:
+ CONSTCD11 decimal_format_seconds()
+ : s_()
+ , sub_s_()
+ {}
+
+ CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
+ : s_(std::chrono::duration_cast<std::chrono::seconds>(d))
+ , sub_s_(std::chrono::duration_cast<precision>(d - s_))
+ {}
+
+ CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;}
+ CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;}
+ CONSTCD11 precision subseconds() const NOEXCEPT {return sub_s_;}
+
+ CONSTCD14 precision to_duration() const NOEXCEPT
+ {
+ return s_ + sub_s_;
+ }
+
+ CONSTCD11 bool in_conventional_range() const NOEXCEPT
+ {
+ return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1};
+ }
+
+ template <class CharT, class Traits>
+ friend
+ std::basic_ostream<CharT, Traits>&
+ operator<<(std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x)
+ {
+ return x.print(os, std::chrono::treat_as_floating_point<rep>{});
+ }
+
+ template <class CharT, class Traits>
+ std::basic_ostream<CharT, Traits>&
+ print(std::basic_ostream<CharT, Traits>& os, std::true_type) const
+ {
+ date::detail::save_ostream<CharT, Traits> _(os);
+ std::chrono::duration<rep> d = s_ + sub_s_;
+ if (d < std::chrono::seconds{10})
+ os << '0';
+ os << std::fixed << d.count();
+ return os;
+ }
+
+ template <class CharT, class Traits>
+ std::basic_ostream<CharT, Traits>&
+ print(std::basic_ostream<CharT, Traits>& os, std::false_type) const
+ {
+ date::detail::save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << s_.count();
+ if (width > 0)
+ {
+#if !ONLY_C_LOCALE
+ os << std::use_facet<std::numpunct<CharT>>(os.getloc()).decimal_point();
+#else
+ os << '.';
+#endif
+ date::detail::save_ostream<CharT, Traits> _s(os);
+ os.imbue(std::locale::classic());
+ os.width(width);
+ os << sub_s_.count();
+ }
+ return os;
+ }
+};
+
+template <class Rep, class Period>
+inline
+CONSTCD11
+typename std::enable_if
+ <
+ std::numeric_limits<Rep>::is_signed,
+ std::chrono::duration<Rep, Period>
+ >::type
+abs(std::chrono::duration<Rep, Period> d)
+{
+ return d >= d.zero() ? +d : -d;
+}
+
+template <class Rep, class Period>
+inline
+CONSTCD11
+typename std::enable_if
+ <
+ !std::numeric_limits<Rep>::is_signed,
+ std::chrono::duration<Rep, Period>
+ >::type
+abs(std::chrono::duration<Rep, Period> d)
+{
+ return d;
+}
+
+} // namespace detail
+
+template <class Duration>
+class hh_mm_ss
+{
+ using dfs = detail::decimal_format_seconds<typename std::common_type<Duration,
+ std::chrono::seconds>::type>;
+
+ std::chrono::hours h_;
+ std::chrono::minutes m_;
+ dfs s_;
+ bool neg_;
+
+public:
+ static unsigned CONSTDATA fractional_width = dfs::width;
+ using precision = typename dfs::precision;
+
+ CONSTCD11 hh_mm_ss() NOEXCEPT
+ : hh_mm_ss(Duration::zero())
+ {}
+
+ CONSTCD11 explicit hh_mm_ss(Duration d) NOEXCEPT
+ : h_(std::chrono::duration_cast<std::chrono::hours>(detail::abs(d)))
+ , m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(d)) - h_)
+ , s_(detail::abs(d) - h_ - m_)
+ , neg_(d < Duration::zero())
+ {}
+
+ CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
+ CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
+ CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();}
+ CONSTCD14 std::chrono::seconds&
+ seconds(detail::undocumented) NOEXCEPT {return s_.seconds();}
+ CONSTCD11 precision subseconds() const NOEXCEPT {return s_.subseconds();}
+ CONSTCD11 bool is_negative() const NOEXCEPT {return neg_;}
+
+ CONSTCD11 explicit operator precision() const NOEXCEPT {return to_duration();}
+ CONSTCD11 precision to_duration() const NOEXCEPT
+ {return (s_.to_duration() + m_ + h_) * (1-2*neg_);}
+
+ CONSTCD11 bool in_conventional_range() const NOEXCEPT
+ {
+ return !neg_ && h_ < days{1} && m_ < std::chrono::hours{1} &&
+ s_.in_conventional_range();
+ }
+
+private:
+
+ template <class charT, class traits>
+ friend
+ std::basic_ostream<charT, traits>&
+ operator<<(std::basic_ostream<charT, traits>& os, hh_mm_ss const& tod)
+ {
+ if (tod.is_negative())
+ os << '-';
+ if (tod.h_ < std::chrono::hours{10})
+ os << '0';
+ os << tod.h_.count() << ':';
+ if (tod.m_ < std::chrono::minutes{10})
+ os << '0';
+ os << tod.m_.count() << ':' << tod.s_;
+ return os;
+ }
+
+ template <class CharT, class Traits, class Duration2>
+ friend
+ std::basic_ostream<CharT, Traits>&
+ date::to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const fields<Duration2>& fds, const std::string* abbrev,
+ const std::chrono::seconds* offset_sec);
+
+ template <class CharT, class Traits, class Duration2, class Alloc>
+ friend
+ std::basic_istream<CharT, Traits>&
+ date::from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ fields<Duration2>& fds,
+ std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
+};
+
+inline
+CONSTCD14
+bool
+is_am(std::chrono::hours const& h) NOEXCEPT
+{
+ using std::chrono::hours;
+ return hours{0} <= h && h < hours{12};
+}
+
+inline
+CONSTCD14
+bool
+is_pm(std::chrono::hours const& h) NOEXCEPT
+{
+ using std::chrono::hours;
+ return hours{12} <= h && h < hours{24};
+}
+
+inline
+CONSTCD14
+std::chrono::hours
+make12(std::chrono::hours h) NOEXCEPT
+{
+ using std::chrono::hours;
+ if (h < hours{12})
+ {
+ if (h == hours{0})
+ h = hours{12};
+ }
+ else
+ {
+ if (h != hours{12})
+ h = h - hours{12};
+ }
+ return h;
+}
+
+inline
+CONSTCD14
+std::chrono::hours
+make24(std::chrono::hours h, bool is_pm) NOEXCEPT
+{
+ using std::chrono::hours;
+ if (is_pm)
+ {
+ if (h != hours{12})
+ h = h + hours{12};
+ }
+ else if (h == hours{12})
+ h = hours{0};
+ return h;
+}
+
+template <class Duration>
+using time_of_day = hh_mm_ss<Duration>;
+
+template <class Rep, class Period,
+ class = typename std::enable_if
+ <!std::chrono::treat_as_floating_point<Rep>::value>::type>
+CONSTCD11
+inline
+hh_mm_ss<std::chrono::duration<Rep, Period>>
+make_time(const std::chrono::duration<Rep, Period>& d)
+{
+ return hh_mm_ss<std::chrono::duration<Rep, Period>>(d);
+}
+
+template <class CharT, class Traits, class Duration>
+inline
+typename std::enable_if
+<
+ !std::chrono::treat_as_floating_point<typename Duration::rep>::value &&
+ std::ratio_less<typename Duration::period, days::period>::value
+ , std::basic_ostream<CharT, Traits>&
+>::type
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
+{
+ auto const dp = date::floor<days>(tp);
+ return os << year_month_day(dp) << ' ' << make_time(tp-dp);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_days& dp)
+{
+ return os << year_month_day(dp);
+}
+
+template <class CharT, class Traits, class Duration>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut)
+{
+ return (os << sys_time<Duration>{ut.time_since_epoch()});
+}
+
+namespace detail
+{
+
+template <class CharT, std::size_t N>
+class string_literal;
+
+template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
+inline
+CONSTCD14
+string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+ N1 + N2 - 1>
+operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
+
+template <class CharT, std::size_t N>
+class string_literal
+{
+ CharT p_[N];
+
+ CONSTCD11 string_literal() NOEXCEPT
+ : p_{}
+ {}
+
+public:
+ using const_iterator = const CharT*;
+
+ string_literal(string_literal const&) = default;
+ string_literal& operator=(string_literal const&) = delete;
+
+ template <std::size_t N1 = 2,
+ class = typename std::enable_if<N1 == N>::type>
+ CONSTCD11 string_literal(CharT c) NOEXCEPT
+ : p_{c}
+ {
+ }
+
+ template <std::size_t N1 = 3,
+ class = typename std::enable_if<N1 == N>::type>
+ CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT
+ : p_{c1, c2}
+ {
+ }
+
+ template <std::size_t N1 = 4,
+ class = typename std::enable_if<N1 == N>::type>
+ CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT
+ : p_{c1, c2, c3}
+ {
+ }
+
+ CONSTCD14 string_literal(const CharT(&a)[N]) NOEXCEPT
+ : p_{}
+ {
+ for (std::size_t i = 0; i < N; ++i)
+ p_[i] = a[i];
+ }
+
+ template <class U = CharT,
+ class = typename std::enable_if<(1 < sizeof(U))>::type>
+ CONSTCD14 string_literal(const char(&a)[N]) NOEXCEPT
+ : p_{}
+ {
+ for (std::size_t i = 0; i < N; ++i)
+ p_[i] = a[i];
+ }
+
+ template <class CharT2,
+ class = typename std::enable_if<!std::is_same<CharT2, CharT>::value>::type>
+ CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT
+ : p_{}
+ {
+ for (std::size_t i = 0; i < N; ++i)
+ p_[i] = a[i];
+ }
+
+ CONSTCD11 const CharT* data() const NOEXCEPT {return p_;}
+ CONSTCD11 std::size_t size() const NOEXCEPT {return N-1;}
+
+ CONSTCD11 const_iterator begin() const NOEXCEPT {return p_;}
+ CONSTCD11 const_iterator end() const NOEXCEPT {return p_ + N-1;}
+
+ CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT
+ {
+ return p_[n];
+ }
+
+ template <class Traits>
+ friend
+ std::basic_ostream<CharT, Traits>&
+ operator<<(std::basic_ostream<CharT, Traits>& os, const string_literal& s)
+ {
+ return os << s.p_;
+ }
+
+ template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
+ friend
+ CONSTCD14
+ string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+ N1 + N2 - 1>
+ operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
+};
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 3>
+operator+(const string_literal<CharT, 2>& x, const string_literal<CharT, 2>& y) NOEXCEPT
+{
+ return string_literal<CharT, 3>(x[0], y[0]);
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 4>
+operator+(const string_literal<CharT, 3>& x, const string_literal<CharT, 2>& y) NOEXCEPT
+{
+ return string_literal<CharT, 4>(x[0], x[1], y[0]);
+}
+
+template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
+CONSTCD14
+inline
+string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+ N1 + N2 - 1>
+operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT
+{
+ using CT = typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type;
+
+ string_literal<CT, N1 + N2 - 1> r;
+ std::size_t i = 0;
+ for (; i < N1-1; ++i)
+ r.p_[i] = CT(x.p_[i]);
+ for (std::size_t j = 0; j < N2; ++j, ++i)
+ r.p_[i] = CT(y.p_[j]);
+
+ return r;
+}
+
+
+template <class CharT, class Traits, class Alloc, std::size_t N>
+inline
+std::basic_string<CharT, Traits, Alloc>
+operator+(std::basic_string<CharT, Traits, Alloc> x, const string_literal<CharT, N>& y)
+{
+ x.append(y.data(), y.size());
+ return x;
+}
+
+#if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \
+ && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
+
+template <class CharT,
+ class = std::enable_if_t<std::is_same<CharT, char>::value ||
+ std::is_same<CharT, wchar_t>::value ||
+ std::is_same<CharT, char16_t>::value ||
+ std::is_same<CharT, char32_t>::value>>
+CONSTCD14
+inline
+string_literal<CharT, 2>
+msl(CharT c) NOEXCEPT
+{
+ return string_literal<CharT, 2>{c};
+}
+
+CONSTCD14
+inline
+std::size_t
+to_string_len(std::intmax_t i)
+{
+ std::size_t r = 0;
+ do
+ {
+ i /= 10;
+ ++r;
+ } while (i > 0);
+ return r;
+}
+
+template <std::intmax_t N>
+CONSTCD14
+inline
+std::enable_if_t
+<
+ N < 10,
+ string_literal<char, to_string_len(N)+1>
+>
+msl() NOEXCEPT
+{
+ return msl(char(N % 10 + '0'));
+}
+
+template <std::intmax_t N>
+CONSTCD14
+inline
+std::enable_if_t
+<
+ 10 <= N,
+ string_literal<char, to_string_len(N)+1>
+>
+msl() NOEXCEPT
+{
+ return msl<N/10>() + msl(char(N % 10 + '0'));
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+CONSTCD14
+inline
+std::enable_if_t
+<
+ std::ratio<N, D>::type::den != 1,
+ string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) +
+ to_string_len(std::ratio<N, D>::type::den) + 4>
+>
+msl(std::ratio<N, D>) NOEXCEPT
+{
+ using R = typename std::ratio<N, D>::type;
+ return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) +
+ msl<R::den>() + msl(CharT{']'});
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+CONSTCD14
+inline
+std::enable_if_t
+<
+ std::ratio<N, D>::type::den == 1,
+ string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>
+>
+msl(std::ratio<N, D>) NOEXCEPT
+{
+ using R = typename std::ratio<N, D>::type;
+ return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
+}
+
+
+#else // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
+
+inline
+std::string
+to_string(std::uint64_t x)
+{
+ return std::to_string(x);
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+to_string(std::uint64_t x)
+{
+ auto y = std::to_string(x);
+ return std::basic_string<CharT>(y.begin(), y.end());
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+inline
+typename std::enable_if
+<
+ std::ratio<N, D>::type::den != 1,
+ std::basic_string<CharT>
+>::type
+msl(std::ratio<N, D>)
+{
+ using R = typename std::ratio<N, D>::type;
+ return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
+ to_string<CharT>(R::den) + CharT{']'};
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+inline
+typename std::enable_if
+<
+ std::ratio<N, D>::type::den == 1,
+ std::basic_string<CharT>
+>::type
+msl(std::ratio<N, D>)
+{
+ using R = typename std::ratio<N, D>::type;
+ return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
+}
+
+#endif // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::atto) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'a'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::femto) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'f'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::pico) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'p'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::nano) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'n'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+typename std::enable_if
+<
+ std::is_same<CharT, char>::value,
+ string_literal<char, 3>
+>::type
+msl(std::micro) NOEXCEPT
+{
+ return string_literal<char, 3>{'\xC2', '\xB5'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+typename std::enable_if
+<
+ !std::is_same<CharT, char>::value,
+ string_literal<CharT, 2>
+>::type
+msl(std::micro) NOEXCEPT
+{
+ return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::milli) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'m'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::centi) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'c'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 3>
+msl(std::deca) NOEXCEPT
+{
+ return string_literal<CharT, 3>{'d', 'a'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::deci) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'d'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::hecto) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'h'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::kilo) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'k'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::mega) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'M'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::giga) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'G'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::tera) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'T'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::peta) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'P'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+msl(std::exa) NOEXCEPT
+{
+ return string_literal<CharT, 2>{'E'};
+}
+
+template <class CharT, class Period>
+CONSTCD11
+inline
+auto
+get_units(Period p)
+ -> decltype(msl<CharT>(p) + string_literal<CharT, 2>{'s'})
+{
+ return msl<CharT>(p) + string_literal<CharT, 2>{'s'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+get_units(std::ratio<1>)
+{
+ return string_literal<CharT, 2>{'s'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+get_units(std::ratio<3600>)
+{
+ return string_literal<CharT, 2>{'h'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 4>
+get_units(std::ratio<60>)
+{
+ return string_literal<CharT, 4>{'m', 'i', 'n'};
+}
+
+template <class CharT>
+CONSTCD11
+inline
+string_literal<CharT, 2>
+get_units(std::ratio<86400>)
+{
+ return string_literal<CharT, 2>{'d'};
+}
+
+template <class CharT, class Traits = std::char_traits<CharT>>
+struct make_string;
+
+template <>
+struct make_string<char>
+{
+ template <class Rep>
+ static
+ std::string
+ from(Rep n)
+ {
+ return std::to_string(n);
+ }
+};
+
+template <class Traits>
+struct make_string<char, Traits>
+{
+ template <class Rep>
+ static
+ std::basic_string<char, Traits>
+ from(Rep n)
+ {
+ auto s = std::to_string(n);
+ return std::basic_string<char, Traits>(s.begin(), s.end());
+ }
+};
+
+template <>
+struct make_string<wchar_t>
+{
+ template <class Rep>
+ static
+ std::wstring
+ from(Rep n)
+ {
+ return std::to_wstring(n);
+ }
+};
+
+template <class Traits>
+struct make_string<wchar_t, Traits>
+{
+ template <class Rep>
+ static
+ std::basic_string<wchar_t, Traits>
+ from(Rep n)
+ {
+ auto s = std::to_wstring(n);
+ return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
+ }
+};
+
+} // namespace detail
+
+// to_stream
+
+CONSTDATA year nanyear{-32768};
+
+template <class Duration>
+struct fields
+{
+ year_month_day ymd{nanyear/0/0};
+ weekday wd{8u};
+ hh_mm_ss<Duration> tod{};
+ bool has_tod = false;
+
+ fields() = default;
+
+ fields(year_month_day ymd_) : ymd(ymd_) {}
+ fields(weekday wd_) : wd(wd_) {}
+ fields(hh_mm_ss<Duration> tod_) : tod(tod_), has_tod(true) {}
+
+ fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
+ fields(year_month_day ymd_, hh_mm_ss<Duration> tod_) : ymd(ymd_), tod(tod_),
+ has_tod(true) {}
+
+ fields(weekday wd_, hh_mm_ss<Duration> tod_) : wd(wd_), tod(tod_), has_tod(true) {}
+
+ fields(year_month_day ymd_, weekday wd_, hh_mm_ss<Duration> tod_)
+ : ymd(ymd_)
+ , wd(wd_)
+ , tod(tod_)
+ , has_tod(true)
+ {}
+};
+
+namespace detail
+{
+
+template <class CharT, class Traits, class Duration>
+unsigned
+extract_weekday(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
+{
+ if (!fds.ymd.ok() && !fds.wd.ok())
+ {
+ // fds does not contain a valid weekday
+ os.setstate(std::ios::failbit);
+ return 8;
+ }
+ weekday wd;
+ if (fds.ymd.ok())
+ {
+ wd = weekday{sys_days(fds.ymd)};
+ if (fds.wd.ok() && wd != fds.wd)
+ {
+ // fds.ymd and fds.wd are inconsistent
+ os.setstate(std::ios::failbit);
+ return 8;
+ }
+ }
+ else
+ wd = fds.wd;
+ return static_cast<unsigned>((wd - Sunday).count());
+}
+
+template <class CharT, class Traits, class Duration>
+unsigned
+extract_month(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
+{
+ if (!fds.ymd.month().ok())
+ {
+ // fds does not contain a valid month
+ os.setstate(std::ios::failbit);
+ return 0;
+ }
+ return static_cast<unsigned>(fds.ymd.month());
+}
+
+} // namespace detail
+
+#if ONLY_C_LOCALE
+
+namespace detail
+{
+
+inline
+std::pair<const std::string*, const std::string*>
+weekday_names()
+{
+ static const std::string nm[] =
+ {
+ "Sunday",
+ "Monday",
+ "Tuesday",
+ "Wednesday",
+ "Thursday",
+ "Friday",
+ "Saturday",
+ "Sun",
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat"
+ };
+ return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+inline
+std::pair<const std::string*, const std::string*>
+month_names()
+{
+ static const std::string nm[] =
+ {
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December",
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec"
+ };
+ return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+inline
+std::pair<const std::string*, const std::string*>
+ampm_names()
+{
+ static const std::string nm[] =
+ {
+ "AM",
+ "PM"
+ };
+ return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+template <class CharT, class Traits, class FwdIter>
+FwdIter
+scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke)
+{
+ size_t nkw = static_cast<size_t>(std::distance(kb, ke));
+ const unsigned char doesnt_match = '\0';
+ const unsigned char might_match = '\1';
+ const unsigned char does_match = '\2';
+ unsigned char statbuf[100];
+ unsigned char* status = statbuf;
+ std::unique_ptr<unsigned char, void(*)(void*)> stat_hold(0, free);
+ if (nkw > sizeof(statbuf))
+ {
+ status = (unsigned char*)std::malloc(nkw);
+ if (status == nullptr)
+ throw std::bad_alloc();
+ stat_hold.reset(status);
+ }
+ size_t n_might_match = nkw; // At this point, any keyword might match
+ size_t n_does_match = 0; // but none of them definitely do
+ // Initialize all statuses to might_match, except for "" keywords are does_match
+ unsigned char* st = status;
+ for (auto ky = kb; ky != ke; ++ky, ++st)
+ {
+ if (!ky->empty())
+ *st = might_match;
+ else
+ {
+ *st = does_match;
+ --n_might_match;
+ ++n_does_match;
+ }
+ }
+ // While there might be a match, test keywords against the next CharT
+ for (size_t indx = 0; is && n_might_match > 0; ++indx)
+ {
+ // Peek at the next CharT but don't consume it
+ auto ic = is.peek();
+ if (ic == EOF)
+ {
+ is.setstate(std::ios::eofbit);
+ break;
+ }
+ auto c = static_cast<char>(toupper(ic));
+ bool consume = false;
+ // For each keyword which might match, see if the indx character is c
+ // If a match if found, consume c
+ // If a match is found, and that is the last character in the keyword,
+ // then that keyword matches.
+ // If the keyword doesn't match this character, then change the keyword
+ // to doesn't match
+ st = status;
+ for (auto ky = kb; ky != ke; ++ky, ++st)
+ {
+ if (*st == might_match)
+ {
+ if (c == static_cast<char>(toupper((*ky)[indx])))
+ {
+ consume = true;
+ if (ky->size() == indx+1)
+ {
+ *st = does_match;
+ --n_might_match;
+ ++n_does_match;
+ }
+ }
+ else
+ {
+ *st = doesnt_match;
+ --n_might_match;
+ }
+ }
+ }
+ // consume if we matched a character
+ if (consume)
+ {
+ (void)is.get();
+ // If we consumed a character and there might be a matched keyword that
+ // was marked matched on a previous iteration, then such keywords
+ // are now marked as not matching.
+ if (n_might_match + n_does_match > 1)
+ {
+ st = status;
+ for (auto ky = kb; ky != ke; ++ky, ++st)
+ {
+ if (*st == does_match && ky->size() != indx+1)
+ {
+ *st = doesnt_match;
+ --n_does_match;
+ }
+ }
+ }
+ }
+ }
+ // We've exited the loop because we hit eof and/or we have no more "might matches".
+ // Return the first matching result
+ for (st = status; kb != ke; ++kb, ++st)
+ if (*st == does_match)
+ break;
+ if (kb == ke)
+ is.setstate(std::ios::failbit);
+ return kb;
+}
+
+} // namespace detail
+
+#endif // ONLY_C_LOCALE
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const fields<Duration>& fds, const std::string* abbrev,
+ const std::chrono::seconds* offset_sec)
+{
+#if ONLY_C_LOCALE
+ using detail::weekday_names;
+ using detail::month_names;
+ using detail::ampm_names;
+#endif
+ using detail::save_ostream;
+ using detail::get_units;
+ using detail::extract_weekday;
+ using detail::extract_month;
+ using std::ios;
+ using std::chrono::duration_cast;
+ using std::chrono::seconds;
+ using std::chrono::minutes;
+ using std::chrono::hours;
+ date::detail::save_ostream<CharT, Traits> ss(os);
+ os.fill(' ');
+ os.flags(std::ios::skipws | std::ios::dec);
+ os.width(0);
+ tm tm{};
+ bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero();
+#if !ONLY_C_LOCALE
+ auto& facet = std::use_facet<std::time_put<CharT>>(os.getloc());
+#endif
+ const CharT* command = nullptr;
+ CharT modified = CharT{};
+ for (; *fmt; ++fmt)
+ {
+ switch (*fmt)
+ {
+ case 'a':
+ case 'A':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+ if (os.fail())
+ return os;
+#if !ONLY_C_LOCALE
+ const CharT f[] = {'%', *fmt};
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else // ONLY_C_LOCALE
+ os << weekday_names().first[tm.tm_wday+7*(*fmt == 'a')];
+#endif // ONLY_C_LOCALE
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'b':
+ case 'B':
+ case 'h':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
+#if !ONLY_C_LOCALE
+ const CharT f[] = {'%', *fmt};
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else // ONLY_C_LOCALE
+ os << month_names().first[tm.tm_mon+12*(*fmt != 'B')];
+#endif // ONLY_C_LOCALE
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'c':
+ case 'x':
+ if (command)
+ {
+ if (modified == CharT{'O'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.ok())
+ os.setstate(std::ios::failbit);
+ if (*fmt == 'c' && !fds.has_tod)
+ os.setstate(std::ios::failbit);
+#if !ONLY_C_LOCALE
+ tm = std::tm{};
+ auto const& ymd = fds.ymd;
+ auto ld = local_days(ymd);
+ if (*fmt == 'c')
+ {
+ tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+ tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+ tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+ }
+ tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
+ tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
+ tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+ tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+ if (os.fail())
+ return os;
+ tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+ CharT f[3] = {'%'};
+ auto fe = std::begin(f) + 1;
+ if (modified == CharT{'E'})
+ *fe++ = modified;
+ *fe++ = *fmt;
+ facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
+#else // ONLY_C_LOCALE
+ if (*fmt == 'c')
+ {
+ auto wd = static_cast<int>(extract_weekday(os, fds));
+ os << weekday_names().first[static_cast<unsigned>(wd)+7]
+ << ' ';
+ os << month_names().first[extract_month(os, fds)-1+12] << ' ';
+ auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+ if (d < 10)
+ os << ' ';
+ os << d << ' '
+ << make_time(duration_cast<seconds>(fds.tod.to_duration()))
+ << ' ' << fds.ymd.year();
+
+ }
+ else // *fmt == 'x'
+ {
+ auto const& ymd = fds.ymd;
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+ os.width(2);
+ os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+ os.width(2);
+ os << static_cast<int>(ymd.year()) % 100;
+ }
+#endif // ONLY_C_LOCALE
+ }
+ command = nullptr;
+ modified = CharT{};
+ }
+ else
+ os << *fmt;
+ break;
+ case 'C':
+ if (command)
+ {
+ if (modified == CharT{'O'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.year().ok())
+ os.setstate(std::ios::failbit);
+ auto y = static_cast<int>(fds.ymd.year());
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ if (y >= 0)
+ {
+ os.width(2);
+ os << y/100;
+ }
+ else
+ {
+ os << CharT{'-'};
+ os.width(2);
+ os << -(y-99)/100;
+ }
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'E'})
+ {
+ tm.tm_year = y - 1900;
+ CharT f[3] = {'%', 'E', 'C'};
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ command = nullptr;
+ modified = CharT{};
+ }
+ else
+ os << *fmt;
+ break;
+ case 'd':
+ case 'e':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.day().ok())
+ os.setstate(std::ios::failbit);
+ auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ save_ostream<CharT, Traits> _(os);
+ if (*fmt == CharT{'d'})
+ os.fill('0');
+ else
+ os.fill(' ');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << d;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ tm.tm_mday = d;
+ CharT f[3] = {'%', 'O', *fmt};
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ command = nullptr;
+ modified = CharT{};
+ }
+ else
+ os << *fmt;
+ break;
+ case 'D':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto const& ymd = fds.ymd;
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+ os.width(2);
+ os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+ os.width(2);
+ os << static_cast<int>(ymd.year()) % 100;
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'F':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto const& ymd = fds.ymd;
+ save_ostream<CharT, Traits> _(os);
+ os.imbue(std::locale::classic());
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(4);
+ os << static_cast<int>(ymd.year()) << CharT{'-'};
+ os.width(2);
+ os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
+ os.width(2);
+ os << static_cast<unsigned>(ymd.day());
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'g':
+ case 'G':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto ld = local_days(fds.ymd);
+ auto y = year_month_day{ld + days{3}}.year();
+ auto start = local_days((y-years{1})/December/Thursday[last]) +
+ (Monday-Thursday);
+ if (ld < start)
+ --y;
+ if (*fmt == CharT{'G'})
+ os << y;
+ else
+ {
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(2);
+ os << std::abs(static_cast<int>(y)) % 100;
+ }
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'H':
+ case 'I':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ if (insert_negative)
+ {
+ os << '-';
+ insert_negative = false;
+ }
+ auto hms = fds.tod;
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ auto h = *fmt == CharT{'I'} ? date::make12(hms.hours()) : hms.hours();
+ if (h < hours{10})
+ os << CharT{'0'};
+ os << h.count();
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_hour = static_cast<int>(hms.hours().count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'j':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (fds.ymd.ok() || fds.has_tod)
+ {
+ days doy;
+ if (fds.ymd.ok())
+ {
+ auto ld = local_days(fds.ymd);
+ auto y = fds.ymd.year();
+ doy = ld - local_days(y/January/1) + days{1};
+ }
+ else
+ {
+ doy = duration_cast<days>(fds.tod.to_duration());
+ }
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.flags(std::ios::dec | std::ios::right);
+ os.width(3);
+ os << doy.count();
+ }
+ else
+ {
+ os.setstate(std::ios::failbit);
+ }
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'm':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.month().ok())
+ os.setstate(std::ios::failbit);
+ auto m = static_cast<unsigned>(fds.ymd.month());
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ if (m < 10)
+ os << CharT{'0'};
+ os << m;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_mon = static_cast<int>(m-1);
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'M':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ if (insert_negative)
+ {
+ os << '-';
+ insert_negative = false;
+ }
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ if (fds.tod.minutes() < minutes{10})
+ os << CharT{'0'};
+ os << fds.tod.minutes().count();
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'n':
+ if (command)
+ {
+ if (modified == CharT{})
+ os << CharT{'\n'};
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'p':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+#if !ONLY_C_LOCALE
+ const CharT f[] = {'%', *fmt};
+ tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else
+ if (date::is_am(fds.tod.hours()))
+ os << ampm_names().first[0];
+ else
+ os << ampm_names().first[1];
+#endif
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'Q':
+ case 'q':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ auto d = fds.tod.to_duration();
+ if (*fmt == 'q')
+ os << get_units<CharT>(typename decltype(d)::period::type{});
+ else
+ os << d.count();
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'r':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+#if !ONLY_C_LOCALE
+ const CharT f[] = {'%', *fmt};
+ tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+ tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+ tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else
+ hh_mm_ss<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
+ save_ostream<CharT, Traits> _(os);
+ os.fill('0');
+ os.width(2);
+ os << date::make12(tod.hours()).count() << CharT{':'};
+ os.width(2);
+ os << tod.minutes().count() << CharT{':'};
+ os.width(2);
+ os << tod.seconds().count() << CharT{' '};
+ if (date::is_am(tod.hours()))
+ os << ampm_names().first[0];
+ else
+ os << ampm_names().first[1];
+#endif
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'R':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ if (fds.tod.hours() < hours{10})
+ os << CharT{'0'};
+ os << fds.tod.hours().count() << CharT{':'};
+ if (fds.tod.minutes() < minutes{10})
+ os << CharT{'0'};
+ os << fds.tod.minutes().count();
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'S':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ if (insert_negative)
+ {
+ os << '-';
+ insert_negative = false;
+ }
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ os << fds.tod.s_;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 't':
+ if (command)
+ {
+ if (modified == CharT{})
+ os << CharT{'\t'};
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'T':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+ os << fds.tod;
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'u':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ auto wd = extract_weekday(os, fds);
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ os << (wd != 0 ? wd : 7u);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_wday = static_cast<int>(wd);
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'U':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ auto const& ymd = fds.ymd;
+ if (!ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto ld = local_days(ymd);
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ auto st = local_days(Sunday[1]/January/ymd.year());
+ if (ld < st)
+ os << CharT{'0'} << CharT{'0'};
+ else
+ {
+ auto wn = duration_cast<weeks>(ld - st).count() + 1;
+ if (wn < 10)
+ os << CharT{'0'};
+ os << wn;
+ }
+ }
+ #if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+ tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+ if (os.fail())
+ return os;
+ tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'V':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto ld = local_days(fds.ymd);
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ auto y = year_month_day{ld + days{3}}.year();
+ auto st = local_days((y-years{1})/12/Thursday[last]) +
+ (Monday-Thursday);
+ if (ld < st)
+ {
+ --y;
+ st = local_days((y - years{1})/12/Thursday[last]) +
+ (Monday-Thursday);
+ }
+ auto wn = duration_cast<weeks>(ld - st).count() + 1;
+ if (wn < 10)
+ os << CharT{'0'};
+ os << wn;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ auto const& ymd = fds.ymd;
+ tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+ tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+ if (os.fail())
+ return os;
+ tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'w':
+ if (command)
+ {
+ auto wd = extract_weekday(os, fds);
+ if (os.fail())
+ return os;
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ os << wd;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_wday = static_cast<int>(wd);
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'W':
+ if (command)
+ {
+ if (modified == CharT{'E'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ auto const& ymd = fds.ymd;
+ if (!ymd.ok())
+ os.setstate(std::ios::failbit);
+ auto ld = local_days(ymd);
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ auto st = local_days(Monday[1]/January/ymd.year());
+ if (ld < st)
+ os << CharT{'0'} << CharT{'0'};
+ else
+ {
+ auto wn = duration_cast<weeks>(ld - st).count() + 1;
+ if (wn < 10)
+ os << CharT{'0'};
+ os << wn;
+ }
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+ tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+ if (os.fail())
+ return os;
+ tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'X':
+ if (command)
+ {
+ if (modified == CharT{'O'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.has_tod)
+ os.setstate(std::ios::failbit);
+#if !ONLY_C_LOCALE
+ tm = std::tm{};
+ tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+ tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+ tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+ CharT f[3] = {'%'};
+ auto fe = std::begin(f) + 1;
+ if (modified == CharT{'E'})
+ *fe++ = modified;
+ *fe++ = *fmt;
+ facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
+#else
+ os << fds.tod;
+#endif
+ }
+ command = nullptr;
+ modified = CharT{};
+ }
+ else
+ os << *fmt;
+ break;
+ case 'y':
+ if (command)
+ {
+ if (!fds.ymd.year().ok())
+ os.setstate(std::ios::failbit);
+ auto y = static_cast<int>(fds.ymd.year());
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+ {
+#endif
+ y = std::abs(y) % 100;
+ if (y < 10)
+ os << CharT{'0'};
+ os << y;
+#if !ONLY_C_LOCALE
+ }
+ else
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_year = y - 1900;
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'Y':
+ if (command)
+ {
+ if (modified == CharT{'O'})
+ os << CharT{'%'} << modified << *fmt;
+ else
+ {
+ if (!fds.ymd.year().ok())
+ os.setstate(std::ios::failbit);
+ auto y = fds.ymd.year();
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ save_ostream<CharT, Traits> _(os);
+ os.imbue(std::locale::classic());
+ os << y;
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'E'})
+ {
+ const CharT f[] = {'%', modified, *fmt};
+ tm.tm_year = static_cast<int>(y) - 1900;
+ facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+ }
+#endif
+ }
+ modified = CharT{};
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'z':
+ if (command)
+ {
+ if (offset_sec == nullptr)
+ {
+ // Can not format %z with unknown offset
+ os.setstate(ios::failbit);
+ return os;
+ }
+ auto m = duration_cast<minutes>(*offset_sec);
+ auto neg = m < minutes{0};
+ m = date::abs(m);
+ auto h = duration_cast<hours>(m);
+ m -= h;
+ if (neg)
+ os << CharT{'-'};
+ else
+ os << CharT{'+'};
+ if (h < hours{10})
+ os << CharT{'0'};
+ os << h.count();
+ if (modified != CharT{})
+ os << CharT{':'};
+ if (m < minutes{10})
+ os << CharT{'0'};
+ os << m.count();
+ command = nullptr;
+ modified = CharT{};
+ }
+ else
+ os << *fmt;
+ break;
+ case 'Z':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ if (abbrev == nullptr)
+ {
+ // Can not format %Z with unknown time_zone
+ os.setstate(ios::failbit);
+ return os;
+ }
+ for (auto c : *abbrev)
+ os << CharT(c);
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ modified = CharT{};
+ }
+ command = nullptr;
+ }
+ else
+ os << *fmt;
+ break;
+ case 'E':
+ case 'O':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ modified = *fmt;
+ }
+ else
+ {
+ os << CharT{'%'} << modified << *fmt;
+ command = nullptr;
+ modified = CharT{};
+ }
+ }
+ else
+ os << *fmt;
+ break;
+ case '%':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ os << CharT{'%'};
+ command = nullptr;
+ }
+ else
+ {
+ os << CharT{'%'} << modified << CharT{'%'};
+ command = nullptr;
+ modified = CharT{};
+ }
+ }
+ else
+ command = fmt;
+ break;
+ default:
+ if (command)
+ {
+ os << CharT{'%'};
+ command = nullptr;
+ }
+ if (modified != CharT{})
+ {
+ os << modified;
+ modified = CharT{};
+ }
+ os << *fmt;
+ break;
+ }
+ }
+ if (command)
+ os << CharT{'%'};
+ if (modified != CharT{})
+ os << modified;
+ return os;
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year& y)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{y/0/0};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month& m)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{m/0/nanyear};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const day& d)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{d/0/nanyear};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const weekday& wd)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{wd};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year_month& ym)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{ym/0};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month_day& md)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{md/nanyear};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const year_month_day& ymd)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{ymd};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits, class Rep, class Period>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const std::chrono::duration<Rep, Period>& d)
+{
+ using Duration = std::chrono::duration<Rep, Period>;
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ fields<CT> fds{hh_mm_ss<CT>{d}};
+ return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const local_time<Duration>& tp, const std::string* abbrev = nullptr,
+ const std::chrono::seconds* offset_sec = nullptr)
+{
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ auto ld = floor<days>(tp);
+ fields<CT> fds{year_month_day{ld}, hh_mm_ss<CT>{tp-local_seconds{ld}}};
+ return to_stream(os, fmt, fds, abbrev, offset_sec);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const sys_time<Duration>& tp)
+{
+ using std::chrono::seconds;
+ using CT = typename std::common_type<Duration, seconds>::type;
+ const std::string abbrev("UTC");
+ CONSTDATA seconds offset{0};
+ auto sd = floor<days>(tp);
+ fields<CT> fds{year_month_day{sd}, hh_mm_ss<CT>{tp-sys_seconds{sd}}};
+ return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+// format
+
+template <class CharT, class Streamable>
+auto
+format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
+ -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
+ std::basic_string<CharT>{})
+{
+ std::basic_ostringstream<CharT> os;
+ os.exceptions(std::ios::failbit | std::ios::badbit);
+ os.imbue(loc);
+ to_stream(os, fmt, tp);
+ return os.str();
+}
+
+template <class CharT, class Streamable>
+auto
+format(const CharT* fmt, const Streamable& tp)
+ -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
+ std::basic_string<CharT>{})
+{
+ std::basic_ostringstream<CharT> os;
+ os.exceptions(std::ios::failbit | std::ios::badbit);
+ to_stream(os, fmt, tp);
+ return os.str();
+}
+
+template <class CharT, class Traits, class Alloc, class Streamable>
+auto
+format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
+ const Streamable& tp)
+ -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
+ std::basic_string<CharT, Traits, Alloc>{})
+{
+ std::basic_ostringstream<CharT, Traits, Alloc> os;
+ os.exceptions(std::ios::failbit | std::ios::badbit);
+ os.imbue(loc);
+ to_stream(os, fmt.c_str(), tp);
+ return os.str();
+}
+
+template <class CharT, class Traits, class Alloc, class Streamable>
+auto
+format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
+ -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
+ std::basic_string<CharT, Traits, Alloc>{})
+{
+ std::basic_ostringstream<CharT, Traits, Alloc> os;
+ os.exceptions(std::ios::failbit | std::ios::badbit);
+ to_stream(os, fmt.c_str(), tp);
+ return os.str();
+}
+
+// parse
+
+namespace detail
+{
+
+template <class CharT, class Traits>
+bool
+read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err)
+{
+ auto ic = is.get();
+ if (Traits::eq_int_type(ic, Traits::eof()) ||
+ !Traits::eq(Traits::to_char_type(ic), fmt))
+ {
+ err |= std::ios::failbit;
+ is.setstate(std::ios::failbit);
+ return false;
+ }
+ return true;
+}
+
+template <class CharT, class Traits>
+unsigned
+read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+ unsigned x = 0;
+ unsigned count = 0;
+ while (true)
+ {
+ auto ic = is.peek();
+ if (Traits::eq_int_type(ic, Traits::eof()))
+ break;
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if (!('0' <= c && c <= '9'))
+ break;
+ (void)is.get();
+ ++count;
+ x = 10*x + static_cast<unsigned>(c - '0');
+ if (count == M)
+ break;
+ }
+ if (count < m)
+ is.setstate(std::ios::failbit);
+ return x;
+}
+
+template <class CharT, class Traits>
+int
+read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+ auto ic = is.peek();
+ if (!Traits::eq_int_type(ic, Traits::eof()))
+ {
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if (('0' <= c && c <= '9') || c == '-' || c == '+')
+ {
+ if (c == '-' || c == '+')
+ (void)is.get();
+ auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
+ if (!is.fail())
+ {
+ if (c == '-')
+ x = -x;
+ return x;
+ }
+ }
+ }
+ if (m > 0)
+ is.setstate(std::ios::failbit);
+ return 0;
+}
+
+template <class CharT, class Traits>
+long double
+read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+ unsigned count = 0;
+ auto decimal_point = Traits::to_int_type(
+ std::use_facet<std::numpunct<CharT>>(is.getloc()).decimal_point());
+ std::string buf;
+ while (true)
+ {
+ auto ic = is.peek();
+ if (Traits::eq_int_type(ic, Traits::eof()))
+ break;
+ if (Traits::eq_int_type(ic, decimal_point))
+ {
+ buf += '.';
+ decimal_point = Traits::eof();
+ is.get();
+ }
+ else
+ {
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if (!('0' <= c && c <= '9'))
+ break;
+ buf += c;
+ (void)is.get();
+ }
+ if (++count == M)
+ break;
+ }
+ if (count < m)
+ {
+ is.setstate(std::ios::failbit);
+ return 0;
+ }
+ return std::stold(buf);
+}
+
+struct rs
+{
+ int& i;
+ unsigned m;
+ unsigned M;
+};
+
+struct ru
+{
+ int& i;
+ unsigned m;
+ unsigned M;
+};
+
+struct rld
+{
+ long double& i;
+ unsigned m;
+ unsigned M;
+};
+
+template <class CharT, class Traits>
+void
+read(std::basic_istream<CharT, Traits>&)
+{
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args)
+{
+ // No-op if a0 == CharT{}
+ if (a0 != CharT{})
+ {
+ auto ic = is.peek();
+ if (Traits::eq_int_type(ic, Traits::eof()))
+ {
+ is.setstate(std::ios::failbit | std::ios::eofbit);
+ return;
+ }
+ if (!Traits::eq(Traits::to_char_type(ic), a0))
+ {
+ is.setstate(std::ios::failbit);
+ return;
+ }
+ (void)is.get();
+ }
+ read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args)
+{
+ auto x = read_signed(is, a0.m, a0.M);
+ if (is.fail())
+ return;
+ a0.i = x;
+ read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args)
+{
+ auto x = read_unsigned(is, a0.m, a0.M);
+ if (is.fail())
+ return;
+ a0.i = static_cast<int>(x);
+ read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args)
+{
+ if (a0 != -1)
+ {
+ auto u = static_cast<unsigned>(a0);
+ CharT buf[std::numeric_limits<unsigned>::digits10+2u] = {};
+ auto e = buf;
+ do
+ {
+ *e++ = static_cast<CharT>(CharT(u % 10) + CharT{'0'});
+ u /= 10;
+ } while (u > 0);
+ std::reverse(buf, e);
+ for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p)
+ read(is, *p);
+ }
+ if (is.rdstate() == std::ios::goodbit)
+ read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args)
+{
+ auto x = read_long_double(is, a0.m, a0.M);
+ if (is.fail())
+ return;
+ a0.i = x;
+ read(is, std::forward<Args>(args)...);
+}
+
+template <class T, class CharT, class Traits>
+inline
+void
+checked_set(T& value, T from, T not_a_value, std::basic_ios<CharT, Traits>& is)
+{
+ if (!is.fail())
+ {
+ if (value == not_a_value)
+ value = std::move(from);
+ else if (value != from)
+ is.setstate(std::ios::failbit);
+ }
+}
+
+} // namespace detail;
+
+template <class CharT, class Traits, class Duration, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev,
+ std::chrono::minutes* offset)
+{
+ using std::numeric_limits;
+ using std::ios;
+ using std::chrono::duration;
+ using std::chrono::duration_cast;
+ using std::chrono::seconds;
+ using std::chrono::minutes;
+ using std::chrono::hours;
+ typename std::basic_istream<CharT, Traits>::sentry ok{is, true};
+ if (ok)
+ {
+ date::detail::save_istream<CharT, Traits> ss(is);
+ is.fill(' ');
+ is.flags(std::ios::skipws | std::ios::dec);
+ is.width(0);
+#if !ONLY_C_LOCALE
+ auto& f = std::use_facet<std::time_get<CharT>>(is.getloc());
+ std::tm tm{};
+#endif
+ const CharT* command = nullptr;
+ auto modified = CharT{};
+ auto width = -1;
+
+ CONSTDATA int not_a_year = numeric_limits<int>::min();
+ CONSTDATA int not_a_2digit_year = 100;
+ CONSTDATA int not_a_century = not_a_year / 100;
+ CONSTDATA int not_a_month = 0;
+ CONSTDATA int not_a_day = 0;
+ CONSTDATA int not_a_hour = numeric_limits<int>::min();
+ CONSTDATA int not_a_hour_12_value = 0;
+ CONSTDATA int not_a_minute = not_a_hour;
+ CONSTDATA Duration not_a_second = Duration::min();
+ CONSTDATA int not_a_doy = -1;
+ CONSTDATA int not_a_weekday = 8;
+ CONSTDATA int not_a_week_num = 100;
+ CONSTDATA int not_a_ampm = -1;
+ CONSTDATA minutes not_a_offset = minutes::min();
+
+ int Y = not_a_year; // c, F, Y *
+ int y = not_a_2digit_year; // D, x, y *
+ int g = not_a_2digit_year; // g *
+ int G = not_a_year; // G *
+ int C = not_a_century; // C *
+ int m = not_a_month; // b, B, h, m, c, D, F, x *
+ int d = not_a_day; // c, d, D, e, F, x *
+ int j = not_a_doy; // j *
+ int wd = not_a_weekday; // a, A, u, w *
+ int H = not_a_hour; // c, H, R, T, X *
+ int I = not_a_hour_12_value; // I, r *
+ int p = not_a_ampm; // p, r *
+ int M = not_a_minute; // c, M, r, R, T, X *
+ Duration s = not_a_second; // c, r, S, T, X *
+ int U = not_a_week_num; // U *
+ int V = not_a_week_num; // V *
+ int W = not_a_week_num; // W *
+ std::basic_string<CharT, Traits, Alloc> temp_abbrev; // Z *
+ minutes temp_offset = not_a_offset; // z *
+
+ using detail::read;
+ using detail::rs;
+ using detail::ru;
+ using detail::rld;
+ using detail::checked_set;
+ for (; *fmt != CharT{} && !is.fail(); ++fmt)
+ {
+ switch (*fmt)
+ {
+ case 'a':
+ case 'A':
+ case 'u':
+ case 'w': // wd: a, A, u, w
+ if (command)
+ {
+ int trial_wd = not_a_weekday;
+ if (*fmt == 'a' || *fmt == 'A')
+ {
+ if (modified == CharT{})
+ {
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ is.setstate(err);
+ if (!is.fail())
+ trial_wd = tm.tm_wday;
+#else
+ auto nm = detail::weekday_names();
+ auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ if (!is.fail())
+ trial_wd = i % 7;
+#endif
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ }
+ else // *fmt == 'u' || *fmt == 'w'
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ read(is, ru{trial_wd, 1, width == -1 ?
+ 1u : static_cast<unsigned>(width)});
+ if (!is.fail())
+ {
+ if (*fmt == 'u')
+ {
+ if (!(1 <= trial_wd && trial_wd <= 7))
+ {
+ trial_wd = not_a_weekday;
+ is.setstate(ios::failbit);
+ }
+ else if (trial_wd == 7)
+ trial_wd = 0;
+ }
+ else // *fmt == 'w'
+ {
+ if (!(0 <= trial_wd && trial_wd <= 6))
+ {
+ trial_wd = not_a_weekday;
+ is.setstate(ios::failbit);
+ }
+ }
+ }
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ is.setstate(err);
+ if (!is.fail())
+ trial_wd = tm.tm_wday;
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ }
+ if (trial_wd != not_a_weekday)
+ checked_set(wd, trial_wd, not_a_weekday, is);
+ }
+ else // !command
+ read(is, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ break;
+ case 'b':
+ case 'B':
+ case 'h':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int ttm = not_a_month;
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ ttm = tm.tm_mon + 1;
+ is.setstate(err);
+#else
+ auto nm = detail::month_names();
+ auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ if (!is.fail())
+ ttm = i % 12 + 1;
+#endif
+ checked_set(m, ttm, not_a_month, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'c':
+ if (command)
+ {
+ if (modified != CharT{'O'})
+ {
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ {
+ checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+ checked_set(m, tm.tm_mon + 1, not_a_month, is);
+ checked_set(d, tm.tm_mday, not_a_day, is);
+ checked_set(H, tm.tm_hour, not_a_hour, is);
+ checked_set(M, tm.tm_min, not_a_minute, is);
+ checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
+ not_a_second, is);
+ }
+ is.setstate(err);
+#else
+ // "%a %b %e %T %Y"
+ auto nm = detail::weekday_names();
+ auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ checked_set(wd, static_cast<int>(i % 7), not_a_weekday, is);
+ ws(is);
+ nm = detail::month_names();
+ i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ checked_set(m, static_cast<int>(i % 12 + 1), not_a_month, is);
+ ws(is);
+ int td = not_a_day;
+ read(is, rs{td, 1, 2});
+ checked_set(d, td, not_a_day, is);
+ ws(is);
+ using dfs = detail::decimal_format_seconds<Duration>;
+ CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+ int tH;
+ int tM;
+ long double S;
+ read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
+ CharT{':'}, rld{S, 1, w});
+ checked_set(H, tH, not_a_hour, is);
+ checked_set(M, tM, not_a_minute, is);
+ checked_set(s, round<Duration>(duration<long double>{S}),
+ not_a_second, is);
+ ws(is);
+ int tY = not_a_year;
+ read(is, rs{tY, 1, 4u});
+ checked_set(Y, tY, not_a_year, is);
+#endif
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'x':
+ if (command)
+ {
+ if (modified != CharT{'O'})
+ {
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ {
+ checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+ checked_set(m, tm.tm_mon + 1, not_a_month, is);
+ checked_set(d, tm.tm_mday, not_a_day, is);
+ }
+ is.setstate(err);
+#else
+ // "%m/%d/%y"
+ int ty = not_a_2digit_year;
+ int tm = not_a_month;
+ int td = not_a_day;
+ read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'},
+ rs{ty, 1, 2});
+ checked_set(y, ty, not_a_2digit_year, is);
+ checked_set(m, tm, not_a_month, is);
+ checked_set(d, td, not_a_day, is);
+#endif
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'X':
+ if (command)
+ {
+ if (modified != CharT{'O'})
+ {
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ {
+ checked_set(H, tm.tm_hour, not_a_hour, is);
+ checked_set(M, tm.tm_min, not_a_minute, is);
+ checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
+ not_a_second, is);
+ }
+ is.setstate(err);
+#else
+ // "%T"
+ using dfs = detail::decimal_format_seconds<Duration>;
+ CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+ int tH = not_a_hour;
+ int tM = not_a_minute;
+ long double S;
+ read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
+ CharT{':'}, rld{S, 1, w});
+ checked_set(H, tH, not_a_hour, is);
+ checked_set(M, tM, not_a_minute, is);
+ checked_set(s, round<Duration>(duration<long double>{S}),
+ not_a_second, is);
+#endif
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'C':
+ if (command)
+ {
+ int tC = not_a_century;
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+ {
+#endif
+ read(is, rs{tC, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+ }
+ else
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ {
+ auto tY = tm.tm_year + 1900;
+ tC = (tY >= 0 ? tY : tY-99) / 100;
+ }
+ is.setstate(err);
+ }
+#endif
+ checked_set(C, tC, not_a_century, is);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'D':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tn = not_a_month;
+ int td = not_a_day;
+ int ty = not_a_2digit_year;
+ read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
+ ru{td, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
+ rs{ty, 1, 2});
+ checked_set(y, ty, not_a_2digit_year, is);
+ checked_set(m, tn, not_a_month, is);
+ checked_set(d, td, not_a_day, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'F':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tY = not_a_year;
+ int tn = not_a_month;
+ int td = not_a_day;
+ read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
+ CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2});
+ checked_set(Y, tY, not_a_year, is);
+ checked_set(m, tn, not_a_month, is);
+ checked_set(d, td, not_a_day, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'd':
+ case 'e':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ int td = not_a_day;
+ read(is, rs{td, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(d, td, not_a_day, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ if ((err & ios::failbit) == 0)
+ checked_set(d, tm.tm_mday, not_a_day, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'H':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ int tH = not_a_hour;
+ read(is, ru{tH, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(H, tH, not_a_hour, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(H, tm.tm_hour, not_a_hour, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'I':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tI = not_a_hour_12_value;
+ // reads in an hour into I, but most be in [1, 12]
+ read(is, rs{tI, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ if (!(1 <= tI && tI <= 12))
+ is.setstate(ios::failbit);
+ checked_set(I, tI, not_a_hour_12_value, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'j':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tj = not_a_doy;
+ read(is, ru{tj, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
+ checked_set(j, tj, not_a_doy, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'M':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ int tM = not_a_minute;
+ read(is, ru{tM, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(M, tM, not_a_minute, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(M, tm.tm_min, not_a_minute, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'm':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ int tn = not_a_month;
+ read(is, rs{tn, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(m, tn, not_a_month, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(m, tm.tm_mon + 1, not_a_month, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'n':
+ case 't':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ // %n matches a single white space character
+ // %t matches 0 or 1 white space characters
+ auto ic = is.peek();
+ if (Traits::eq_int_type(ic, Traits::eof()))
+ {
+ ios::iostate err = ios::eofbit;
+ if (*fmt == 'n')
+ err |= ios::failbit;
+ is.setstate(err);
+ break;
+ }
+ if (isspace(ic))
+ {
+ (void)is.get();
+ }
+ else if (*fmt == 'n')
+ is.setstate(ios::failbit);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'p':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tp = not_a_ampm;
+#if !ONLY_C_LOCALE
+ tm = std::tm{};
+ tm.tm_hour = 1;
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ is.setstate(err);
+ if (tm.tm_hour == 1)
+ tp = 0;
+ else if (tm.tm_hour == 13)
+ tp = 1;
+ else
+ is.setstate(err);
+#else
+ auto nm = detail::ampm_names();
+ auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ tp = i;
+#endif
+ checked_set(p, tp, not_a_ampm, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+
+ break;
+ case 'r':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+#if !ONLY_C_LOCALE
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ {
+ checked_set(H, tm.tm_hour, not_a_hour, is);
+ checked_set(M, tm.tm_min, not_a_hour, is);
+ checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
+ not_a_second, is);
+ }
+ is.setstate(err);
+#else
+ // "%I:%M:%S %p"
+ using dfs = detail::decimal_format_seconds<Duration>;
+ CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+ long double S;
+ int tI = not_a_hour_12_value;
+ int tM = not_a_minute;
+ read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2},
+ CharT{':'}, rld{S, 1, w});
+ checked_set(I, tI, not_a_hour_12_value, is);
+ checked_set(M, tM, not_a_minute, is);
+ checked_set(s, round<Duration>(duration<long double>{S}),
+ not_a_second, is);
+ ws(is);
+ auto nm = detail::ampm_names();
+ auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+ checked_set(p, static_cast<int>(i), not_a_ampm, is);
+#endif
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'R':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tH = not_a_hour;
+ int tM = not_a_minute;
+ read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'},
+ ru{tM, 1, 2}, CharT{'\0'});
+ checked_set(H, tH, not_a_hour, is);
+ checked_set(M, tM, not_a_minute, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'S':
+ if (command)
+ {
+ #if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'E'})
+#endif
+ {
+ using dfs = detail::decimal_format_seconds<Duration>;
+ CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+ long double S;
+ read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
+ checked_set(s, round<Duration>(duration<long double>{S}),
+ not_a_second, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'O'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
+ not_a_second, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'T':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ using dfs = detail::decimal_format_seconds<Duration>;
+ CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+ int tH = not_a_hour;
+ int tM = not_a_minute;
+ long double S;
+ read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
+ CharT{':'}, rld{S, 1, w});
+ checked_set(H, tH, not_a_hour, is);
+ checked_set(M, tM, not_a_minute, is);
+ checked_set(s, round<Duration>(duration<long double>{S}),
+ not_a_second, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'Y':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#else
+ if (modified != CharT{'O'})
+#endif
+ {
+ int tY = not_a_year;
+ read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+ checked_set(Y, tY, not_a_year, is);
+ }
+#if !ONLY_C_LOCALE
+ else if (modified == CharT{'E'})
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+ is.setstate(err);
+ }
+#endif
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'y':
+ if (command)
+ {
+#if !ONLY_C_LOCALE
+ if (modified == CharT{})
+#endif
+ {
+ int ty = not_a_2digit_year;
+ read(is, ru{ty, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(y, ty, not_a_2digit_year, is);
+ }
+#if !ONLY_C_LOCALE
+ else
+ {
+ ios::iostate err = ios::goodbit;
+ f.get(is, nullptr, is, err, &tm, command, fmt+1);
+ if ((err & ios::failbit) == 0)
+ checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+ is.setstate(err);
+ }
+#endif
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'g':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tg = not_a_2digit_year;
+ read(is, ru{tg, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(g, tg, not_a_2digit_year, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'G':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tG = not_a_year;
+ read(is, rs{tG, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+ checked_set(G, tG, not_a_year, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'U':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tU = not_a_week_num;
+ read(is, ru{tU, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(U, tU, not_a_week_num, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'V':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tV = not_a_week_num;
+ read(is, ru{tV, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(V, tV, not_a_week_num, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'W':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ int tW = not_a_week_num;
+ read(is, ru{tW, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+ checked_set(W, tW, not_a_week_num, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'E':
+ case 'O':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ modified = *fmt;
+ }
+ else
+ {
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ }
+ else
+ read(is, *fmt);
+ break;
+ case '%':
+ if (command)
+ {
+ if (modified == CharT{})
+ read(is, *fmt);
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ command = fmt;
+ break;
+ case 'z':
+ if (command)
+ {
+ int tH, tM;
+ minutes toff = not_a_offset;
+ bool neg = false;
+ auto ic = is.peek();
+ if (!Traits::eq_int_type(ic, Traits::eof()))
+ {
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if (c == '-')
+ neg = true;
+ }
+ if (modified == CharT{})
+ {
+ read(is, rs{tH, 2, 2});
+ if (!is.fail())
+ toff = hours{std::abs(tH)};
+ if (is.good())
+ {
+ ic = is.peek();
+ if (!Traits::eq_int_type(ic, Traits::eof()))
+ {
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if ('0' <= c && c <= '9')
+ {
+ read(is, ru{tM, 2, 2});
+ if (!is.fail())
+ toff += minutes{tM};
+ }
+ }
+ }
+ }
+ else
+ {
+ read(is, rs{tH, 1, 2});
+ if (!is.fail())
+ toff = hours{std::abs(tH)};
+ if (is.good())
+ {
+ ic = is.peek();
+ if (!Traits::eq_int_type(ic, Traits::eof()))
+ {
+ auto c = static_cast<char>(Traits::to_char_type(ic));
+ if (c == ':')
+ {
+ (void)is.get();
+ read(is, ru{tM, 2, 2});
+ if (!is.fail())
+ toff += minutes{tM};
+ }
+ }
+ }
+ }
+ if (neg)
+ toff = -toff;
+ checked_set(temp_offset, toff, not_a_offset, is);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ case 'Z':
+ if (command)
+ {
+ if (modified == CharT{})
+ {
+ std::basic_string<CharT, Traits, Alloc> buf;
+ while (is.rdstate() == std::ios::goodbit)
+ {
+ auto i = is.rdbuf()->sgetc();
+ if (Traits::eq_int_type(i, Traits::eof()))
+ {
+ is.setstate(ios::eofbit);
+ break;
+ }
+ auto wc = Traits::to_char_type(i);
+ auto c = static_cast<char>(wc);
+ // is c a valid time zone name or abbreviation character?
+ if (!(CharT{1} < wc && wc < CharT{127}) || !(isalnum(c) ||
+ c == '_' || c == '/' || c == '-' || c == '+'))
+ break;
+ buf.push_back(c);
+ is.rdbuf()->sbumpc();
+ }
+ if (buf.empty())
+ is.setstate(ios::failbit);
+ checked_set(temp_abbrev, buf, {}, is);
+ }
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ else
+ read(is, *fmt);
+ break;
+ default:
+ if (command)
+ {
+ if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9')
+ {
+ width = static_cast<char>(*fmt) - '0';
+ while ('0' <= fmt[1] && fmt[1] <= '9')
+ width = 10*width + static_cast<char>(*++fmt) - '0';
+ }
+ else
+ {
+ if (modified == CharT{})
+ read(is, CharT{'%'}, width, *fmt);
+ else
+ read(is, CharT{'%'}, width, modified, *fmt);
+ command = nullptr;
+ width = -1;
+ modified = CharT{};
+ }
+ }
+ else // !command
+ {
+ if (isspace(static_cast<unsigned char>(*fmt)))
+ {
+ // space matches 0 or more white space characters
+ if (is.good())
+ ws(is);
+ }
+ else
+ read(is, *fmt);
+ }
+ break;
+ }
+ }
+ // is.fail() || *fmt == CharT{}
+ if (is.rdstate() == ios::goodbit && command)
+ {
+ if (modified == CharT{})
+ read(is, CharT{'%'}, width);
+ else
+ read(is, CharT{'%'}, width, modified);
+ }
+ if (!is.fail())
+ {
+ if (y != not_a_2digit_year)
+ {
+ // Convert y and an optional C to Y
+ if (!(0 <= y && y <= 99))
+ goto broken;
+ if (C == not_a_century)
+ {
+ if (Y == not_a_year)
+ {
+ if (y >= 69)
+ C = 19;
+ else
+ C = 20;
+ }
+ else
+ {
+ C = (Y >= 0 ? Y : Y-100) / 100;
+ }
+ }
+ int tY;
+ if (C >= 0)
+ tY = 100*C + y;
+ else
+ tY = 100*(C+1) - (y == 0 ? 100 : y);
+ if (Y != not_a_year && Y != tY)
+ goto broken;
+ Y = tY;
+ }
+ if (g != not_a_2digit_year)
+ {
+ // Convert g and an optional C to G
+ if (!(0 <= g && g <= 99))
+ goto broken;
+ if (C == not_a_century)
+ {
+ if (G == not_a_year)
+ {
+ if (g >= 69)
+ C = 19;
+ else
+ C = 20;
+ }
+ else
+ {
+ C = (G >= 0 ? G : G-100) / 100;
+ }
+ }
+ int tG;
+ if (C >= 0)
+ tG = 100*C + g;
+ else
+ tG = 100*(C+1) - (g == 0 ? 100 : g);
+ if (G != not_a_year && G != tG)
+ goto broken;
+ G = tG;
+ }
+ if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
+ Y = not_a_year;
+ bool computed = false;
+ if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday)
+ {
+ year_month_day ymd_trial = sys_days(year{G-1}/December/Thursday[last]) +
+ (Monday-Thursday) + weeks{V-1} +
+ (weekday{static_cast<unsigned>(wd)}-Monday);
+ if (Y == not_a_year)
+ Y = static_cast<int>(ymd_trial.year());
+ else if (year{Y} != ymd_trial.year())
+ goto broken;
+ if (m == not_a_month)
+ m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+ else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+ goto broken;
+ if (d == not_a_day)
+ d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+ else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+ goto broken;
+ computed = true;
+ }
+ if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday)
+ {
+ year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) +
+ weeks{U-1} +
+ (weekday{static_cast<unsigned>(wd)} - Sunday);
+ if (Y == not_a_year)
+ Y = static_cast<int>(ymd_trial.year());
+ else if (year{Y} != ymd_trial.year())
+ goto broken;
+ if (m == not_a_month)
+ m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+ else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+ goto broken;
+ if (d == not_a_day)
+ d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+ else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+ goto broken;
+ computed = true;
+ }
+ if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday)
+ {
+ year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) +
+ weeks{W-1} +
+ (weekday{static_cast<unsigned>(wd)} - Monday);
+ if (Y == not_a_year)
+ Y = static_cast<int>(ymd_trial.year());
+ else if (year{Y} != ymd_trial.year())
+ goto broken;
+ if (m == not_a_month)
+ m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+ else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+ goto broken;
+ if (d == not_a_day)
+ d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+ else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+ goto broken;
+ computed = true;
+ }
+ if (j != not_a_doy && Y != not_a_year)
+ {
+ auto ymd_trial = year_month_day{local_days(year{Y}/1/1) + days{j-1}};
+ if (m == 0)
+ m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+ else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+ goto broken;
+ if (d == 0)
+ d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+ else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+ goto broken;
+ j = not_a_doy;
+ }
+ auto ymd = year{Y}/m/d;
+ if (ymd.ok())
+ {
+ if (wd == not_a_weekday)
+ wd = static_cast<int>((weekday(sys_days(ymd)) - Sunday).count());
+ else if (wd != static_cast<int>((weekday(sys_days(ymd)) - Sunday).count()))
+ goto broken;
+ if (!computed)
+ {
+ if (G != not_a_year || V != not_a_week_num)
+ {
+ sys_days sd = ymd;
+ auto G_trial = year_month_day{sd + days{3}}.year();
+ auto start = sys_days((G_trial - years{1})/December/Thursday[last]) +
+ (Monday - Thursday);
+ if (sd < start)
+ {
+ --G_trial;
+ if (V != not_a_week_num)
+ start = sys_days((G_trial - years{1})/December/Thursday[last])
+ + (Monday - Thursday);
+ }
+ if (G != not_a_year && G != static_cast<int>(G_trial))
+ goto broken;
+ if (V != not_a_week_num)
+ {
+ auto V_trial = duration_cast<weeks>(sd - start).count() + 1;
+ if (V != V_trial)
+ goto broken;
+ }
+ }
+ if (U != not_a_week_num)
+ {
+ auto start = sys_days(Sunday[1]/January/ymd.year());
+ auto U_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
+ if (U != U_trial)
+ goto broken;
+ }
+ if (W != not_a_week_num)
+ {
+ auto start = sys_days(Monday[1]/January/ymd.year());
+ auto W_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
+ if (W != W_trial)
+ goto broken;
+ }
+ }
+ }
+ fds.ymd = ymd;
+ if (I != not_a_hour_12_value)
+ {
+ if (!(1 <= I && I <= 12))
+ goto broken;
+ if (p != not_a_ampm)
+ {
+ // p is in [0, 1] == [AM, PM]
+ // Store trial H in I
+ if (I == 12)
+ --p;
+ I += p*12;
+ // Either set H from I or make sure H and I are consistent
+ if (H == not_a_hour)
+ H = I;
+ else if (I != H)
+ goto broken;
+ }
+ else // p == not_a_ampm
+ {
+ // if H, make sure H and I could be consistent
+ if (H != not_a_hour)
+ {
+ if (I == 12)
+ {
+ if (H != 0 && H != 12)
+ goto broken;
+ }
+ else if (!(I == H || I == H+12))
+ {
+ goto broken;
+ }
+ }
+ }
+ }
+ if (H != not_a_hour)
+ {
+ fds.has_tod = true;
+ fds.tod = hh_mm_ss<Duration>{hours{H}};
+ }
+ if (M != not_a_minute)
+ {
+ fds.has_tod = true;
+ fds.tod.m_ = minutes{M};
+ }
+ if (s != not_a_second)
+ {
+ fds.has_tod = true;
+ fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
+ }
+ if (j != not_a_doy)
+ {
+ fds.has_tod = true;
+ fds.tod.h_ += hours{days{j}};
+ }
+ if (wd != not_a_weekday)
+ fds.wd = weekday{static_cast<unsigned>(wd)};
+ if (abbrev != nullptr)
+ *abbrev = std::move(temp_abbrev);
+ if (offset != nullptr && temp_offset != not_a_offset)
+ *offset = temp_offset;
+ }
+ return is;
+ }
+broken:
+ is.setstate(ios::failbit);
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.year().ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ y = fds.ymd.year();
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.month().ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ m = fds.ymd.month();
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.day().ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ d = fds.ymd.day();
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.wd.ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ wd = fds.wd;
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.month().ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ ym = fds.ymd.year()/fds.ymd.month();
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.month().ok() || !fds.ymd.day().ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ md = fds.ymd.month()/fds.ymd.day();
+ return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ year_month_day& ymd, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = std::chrono::seconds;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ ymd = fds.ymd;
+ return is;
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ sys_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ std::chrono::minutes offset_local{};
+ auto offptr = offset ? offset : &offset_local;
+ fields<CT> fds{};
+ fds.has_tod = true;
+ from_stream(is, fmt, fds, abbrev, offptr);
+ if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
+ return is;
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ local_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ fields<CT> fds{};
+ fds.has_tod = true;
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
+ return is;
+}
+
+template <class Rep, class Period, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ std::chrono::duration<Rep, Period>& d,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using Duration = std::chrono::duration<Rep, Period>;
+ using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+ fields<CT> fds{};
+ from_stream(is, fmt, fds, abbrev, offset);
+ if (!fds.has_tod)
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ d = std::chrono::duration_cast<Duration>(fds.tod.to_duration());
+ return is;
+}
+
+template <class Parsable, class CharT, class Traits = std::char_traits<CharT>,
+ class Alloc = std::allocator<CharT>>
+struct parse_manip
+{
+ const std::basic_string<CharT, Traits, Alloc> format_;
+ Parsable& tp_;
+ std::basic_string<CharT, Traits, Alloc>* abbrev_;
+ std::chrono::minutes* offset_;
+
+public:
+ parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+ : format_(std::move(format))
+ , tp_(tp)
+ , abbrev_(abbrev)
+ , offset_(offset)
+ {}
+
+};
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+std::basic_istream<CharT, Traits>&
+operator>>(std::basic_istream<CharT, Traits>& is,
+ const parse_manip<Parsable, CharT, Traits, Alloc>& x)
+{
+ return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+ format.c_str(), tp),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp})
+{
+ return {format, tp};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+ std::basic_string<CharT, Traits, Alloc>& abbrev)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+ format.c_str(), tp, &abbrev),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
+{
+ return {format, tp, &abbrev};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+ std::chrono::minutes& offset)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+ format.c_str(), tp,
+ std::declval<std::basic_string<CharT, Traits, Alloc>*>(),
+ &offset),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr, &offset})
+{
+ return {format, tp, nullptr, &offset};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+ std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+ format.c_str(), tp, &abbrev, &offset),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
+{
+ return {format, tp, &abbrev, &offset};
+}
+
+// const CharT* formats
+
+template <class Parsable, class CharT>
+inline
+auto
+parse(const CharT* format, Parsable& tp)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp),
+ parse_manip<Parsable, CharT>{format, tp})
+{
+ return {format, tp};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const CharT* format, Parsable& tp, std::basic_string<CharT, Traits, Alloc>& abbrev)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
+ tp, &abbrev),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
+{
+ return {format, tp, &abbrev};
+}
+
+template <class Parsable, class CharT>
+inline
+auto
+parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format,
+ tp, std::declval<std::basic_string<CharT>*>(), &offset),
+ parse_manip<Parsable, CharT>{format, tp, nullptr, &offset})
+{
+ return {format, tp, nullptr, &offset};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const CharT* format, Parsable& tp,
+ std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+ -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
+ tp, &abbrev, &offset),
+ parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
+{
+ return {format, tp, &abbrev, &offset};
+}
+
+// duration streaming
+
+template <class CharT, class Traits, class Rep, class Period>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os,
+ const std::chrono::duration<Rep, Period>& d)
+{
+ return os << detail::make_string<CharT, Traits>::from(d.count()) +
+ detail::get_units<CharT>(typename Period::type{});
+}
+
+} // namespace date
+} // namespace arrow_vendored
+
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+
+#ifdef __GNUC__
+# pragma GCC diagnostic pop
+#endif
+
+#endif // DATE_H
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.cpp b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.cpp
index e80e392bd73..787f7b0ea6b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.cpp
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.cpp
@@ -1,3877 +1,3877 @@
-// The MIT License (MIT)
-//
-// Copyright (c) 2015, 2016, 2017 Howard Hinnant
-// Copyright (c) 2015 Ville Voutilainen
-// Copyright (c) 2016 Alexander Kormanovsky
-// Copyright (c) 2016, 2017 Jiangang Zhuang
-// Copyright (c) 2017 Nicolas Veloz Savino
-// Copyright (c) 2017 Florian Dang
-// Copyright (c) 2017 Aaron Bishop
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// Our apologies. When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
-
-// NOTE(ARROW): This is required so that symbols are properly exported from the DLL
-#include "visibility.h"
-
-#ifdef _WIN32
- // windows.h will be included directly and indirectly (e.g. by curl).
- // We need to define these macros to prevent windows.h bringing in
- // more than we need and do it early so windows.h doesn't get included
- // without these macros having been defined.
- // min/max macros interfere with the C++ versions.
-# ifndef NOMINMAX
-# define NOMINMAX
-# endif
- // We don't need all that Windows has to offer.
-# ifndef WIN32_LEAN_AND_MEAN
-# define WIN32_LEAN_AND_MEAN
-# endif
-
- // for wcstombs
-# ifndef _CRT_SECURE_NO_WARNINGS
-# define _CRT_SECURE_NO_WARNINGS
-# endif
-
- // None of this happens with the MS SDK (at least VS14 which I tested), but:
- // Compiling with mingw, we get "error: 'KF_FLAG_DEFAULT' was not declared in this scope."
- // and error: 'SHGetKnownFolderPath' was not declared in this scope.".
- // It seems when using mingw NTDDI_VERSION is undefined and that
- // causes KNOWN_FOLDER_FLAG and the KF_ flags to not get defined.
- // So we must define NTDDI_VERSION to get those flags on mingw.
- // The docs say though here:
- // https://msdn.microsoft.com/en-nz/library/windows/desktop/aa383745(v=vs.85).aspx
- // that "If you define NTDDI_VERSION, you must also define _WIN32_WINNT."
- // So we declare we require Vista or greater.
-# ifdef __MINGW32__
-
-# ifndef NTDDI_VERSION
-# define NTDDI_VERSION 0x06000000
-# define _WIN32_WINNT _WIN32_WINNT_VISTA
-# elif NTDDI_VERSION < 0x06000000
-# warning "If this fails to compile NTDDI_VERSION may be to low. See comments above."
-# endif
- // But once we define the values above we then get this linker error:
- // "tz.cpp:(.rdata$.refptr.FOLDERID_Downloads[.refptr.FOLDERID_Downloads]+0x0): "
- // "undefined reference to `FOLDERID_Downloads'"
- // which #include <initguid.h> cures see:
- // https://support.microsoft.com/en-us/kb/130869
-# include <initguid.h>
- // But with <initguid.h> included, the error moves on to:
- // error: 'FOLDERID_Downloads' was not declared in this scope
- // Which #include <knownfolders.h> cures.
-# error #include <knownfolders.h>
-
-# endif // __MINGW32__
-
-# include <windows.h>
-#endif // _WIN32
-
-#include "tz_private.h"
-
-#ifdef __APPLE__
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2015 Ville Voutilainen
+// Copyright (c) 2016 Alexander Kormanovsky
+// Copyright (c) 2016, 2017 Jiangang Zhuang
+// Copyright (c) 2017 Nicolas Veloz Savino
+// Copyright (c) 2017 Florian Dang
+// Copyright (c) 2017 Aaron Bishop
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies. When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+// NOTE(ARROW): This is required so that symbols are properly exported from the DLL
+#include "visibility.h"
+
+#ifdef _WIN32
+ // windows.h will be included directly and indirectly (e.g. by curl).
+ // We need to define these macros to prevent windows.h bringing in
+ // more than we need and do it early so windows.h doesn't get included
+ // without these macros having been defined.
+ // min/max macros interfere with the C++ versions.
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+ // We don't need all that Windows has to offer.
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# endif
+
+ // for wcstombs
+# ifndef _CRT_SECURE_NO_WARNINGS
+# define _CRT_SECURE_NO_WARNINGS
+# endif
+
+ // None of this happens with the MS SDK (at least VS14 which I tested), but:
+ // Compiling with mingw, we get "error: 'KF_FLAG_DEFAULT' was not declared in this scope."
+ // and error: 'SHGetKnownFolderPath' was not declared in this scope.".
+ // It seems when using mingw NTDDI_VERSION is undefined and that
+ // causes KNOWN_FOLDER_FLAG and the KF_ flags to not get defined.
+ // So we must define NTDDI_VERSION to get those flags on mingw.
+ // The docs say though here:
+ // https://msdn.microsoft.com/en-nz/library/windows/desktop/aa383745(v=vs.85).aspx
+ // that "If you define NTDDI_VERSION, you must also define _WIN32_WINNT."
+ // So we declare we require Vista or greater.
+# ifdef __MINGW32__
+
+# ifndef NTDDI_VERSION
+# define NTDDI_VERSION 0x06000000
+# define _WIN32_WINNT _WIN32_WINNT_VISTA
+# elif NTDDI_VERSION < 0x06000000
+# warning "If this fails to compile NTDDI_VERSION may be to low. See comments above."
+# endif
+ // But once we define the values above we then get this linker error:
+ // "tz.cpp:(.rdata$.refptr.FOLDERID_Downloads[.refptr.FOLDERID_Downloads]+0x0): "
+ // "undefined reference to `FOLDERID_Downloads'"
+ // which #include <initguid.h> cures see:
+ // https://support.microsoft.com/en-us/kb/130869
+# include <initguid.h>
+ // But with <initguid.h> included, the error moves on to:
+ // error: 'FOLDERID_Downloads' was not declared in this scope
+ // Which #include <knownfolders.h> cures.
+# error #include <knownfolders.h>
+
+# endif // __MINGW32__
+
+# include <windows.h>
+#endif // _WIN32
+
+#include "tz_private.h"
+
+#ifdef __APPLE__
# include "ios.h"
-#else
-# define TARGET_OS_IPHONE 0
-# define TARGET_OS_SIMULATOR 0
-#endif
-
-#if USE_OS_TZDB
-# include <dirent.h>
-#endif
-#include <algorithm>
-#include <cctype>
-#include <cstdlib>
-#include <cstring>
-#include <cwchar>
-#include <exception>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <memory>
-#if USE_OS_TZDB
-# include <queue>
-#endif
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <vector>
-#include <sys/stat.h>
-
-// unistd.h is used on some platforms as part of the the means to get
-// the current time zone. On Win32 windows.h provides a means to do it.
-// gcc/mingw supports unistd.h on Win32 but MSVC does not.
-
-#ifdef _WIN32
-# ifdef WINAPI_FAMILY
-# include <winapifamily.h>
-# if WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP
-# define WINRT
-# define INSTALL .
-# endif
-# endif
-
-# include <io.h> // _unlink etc.
-
-# if defined(__clang__)
- struct IUnknown; // fix for issue with static_cast<> in objbase.h
- // (see https://github.com/philsquared/Catch/issues/690)
-# endif
-
-# include <shlobj.h> // CoTaskFree, ShGetKnownFolderPath etc.
-# if HAS_REMOTE_API
-# include <direct.h> // _mkdir
-# include <shellapi.h> // ShFileOperation etc.
-# endif // HAS_REMOTE_API
-#else // !_WIN32
-# include <unistd.h>
-# if !USE_OS_TZDB
-# include <wordexp.h>
-# endif
-# include <limits.h>
-# include <string.h>
-# if !USE_SHELL_API
-# include <sys/stat.h>
-# include <sys/fcntl.h>
-# include <dirent.h>
-# include <cstring>
-# include <sys/wait.h>
-# include <sys/types.h>
-# endif //!USE_SHELL_API
-#endif // !_WIN32
-
-
-#if HAS_REMOTE_API
- // Note curl includes windows.h so we must include curl AFTER definitions of things
- // that affect windows.h such as NOMINMAX.
-#if defined(_MSC_VER) && defined(SHORTENED_CURL_INCLUDE)
- // For rmt_curl nuget package
-# error #include <curl.h>
-#else
-# error #include <curl/curl.h>
-#endif
-#endif
-
-#ifdef _WIN32
-static CONSTDATA char folder_delimiter = '\\';
-#else // !_WIN32
-static CONSTDATA char folder_delimiter = '/';
-#endif // !_WIN32
-
-#if defined(__GNUC__) && __GNUC__ < 5
- // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-#endif // defined(__GNUC__) && __GNUC__ < 5
-
-#if !USE_OS_TZDB
-
-# ifdef _WIN32
-# ifndef WINRT
-
-namespace
-{
- struct task_mem_deleter
- {
- void operator()(wchar_t buf[])
- {
- if (buf != nullptr)
- CoTaskMemFree(buf);
- }
- };
- using co_task_mem_ptr = std::unique_ptr<wchar_t[], task_mem_deleter>;
-}
-
-// We might need to know certain locations even if not using the remote API,
-// so keep these routines out of that block for now.
-static
-std::string
-get_known_folder(const GUID& folderid)
-{
- std::string folder;
- PWSTR pfolder = nullptr;
- HRESULT hr = SHGetKnownFolderPath(folderid, KF_FLAG_DEFAULT, nullptr, &pfolder);
- if (SUCCEEDED(hr))
- {
- co_task_mem_ptr folder_ptr(pfolder);
- const wchar_t* fptr = folder_ptr.get();
- auto state = std::mbstate_t();
- const auto required = std::wcsrtombs(nullptr, &fptr, 0, &state);
- if (required != 0 && required != std::size_t(-1))
- {
- folder.resize(required);
- std::wcsrtombs(&folder[0], &fptr, folder.size(), &state);
- }
- }
- return folder;
-}
-
-# ifndef INSTALL
-
-// Usually something like "c:\Users\username\Downloads".
-static
-std::string
-get_download_folder()
-{
- return get_known_folder(FOLDERID_Downloads);
-}
-
-# endif // !INSTALL
-
-# endif // WINRT
-# else // !_WIN32
-
-# if !defined(INSTALL)
-
-static
-std::string
-expand_path(std::string path)
-{
-# if TARGET_OS_IPHONE
- return date::iOSUtils::get_tzdata_path();
-# else // !TARGET_OS_IPHONE
- ::wordexp_t w{};
- std::unique_ptr<::wordexp_t, void(*)(::wordexp_t*)> hold{&w, ::wordfree};
- ::wordexp(path.c_str(), &w, 0);
- if (w.we_wordc != 1)
- throw std::runtime_error("Cannot expand path: " + path);
- path = w.we_wordv[0];
- return path;
-# endif // !TARGET_OS_IPHONE
-}
-
-static
-std::string
-get_download_folder()
-{
- return expand_path("~/Downloads");
-}
-
-# endif // !defined(INSTALL)
-
-# endif // !_WIN32
-
-#endif // !USE_OS_TZDB
-
-namespace arrow_vendored
-{
-namespace date
-{
-// +---------------------+
-// | Begin Configuration |
-// +---------------------+
-
-using namespace detail;
-
-#if !USE_OS_TZDB
-
-static
-std::string&
-access_install()
-{
- static std::string install
-#ifndef INSTALL
-
- = get_download_folder() + folder_delimiter + "tzdata";
-
-#else // !INSTALL
-
-# define STRINGIZEIMP(x) #x
-# define STRINGIZE(x) STRINGIZEIMP(x)
-
- = STRINGIZE(INSTALL) + std::string(1, folder_delimiter) + "tzdata";
-
- #undef STRINGIZEIMP
- #undef STRINGIZE
-#endif // !INSTALL
-
- return install;
-}
-
-void
-set_install(const std::string& s)
-{
- access_install() = s;
-}
-
-static
-const std::string&
-get_install()
-{
- static const std::string& ref = access_install();
- return ref;
-}
-
-#if HAS_REMOTE_API
-static
-std::string
-get_download_gz_file(const std::string& version)
-{
- auto file = get_install() + version + ".tar.gz";
- return file;
-}
-#endif // HAS_REMOTE_API
-
-#endif // !USE_OS_TZDB
-
-// These can be used to reduce the range of the database to save memory
-CONSTDATA auto min_year = date::year::min();
-CONSTDATA auto max_year = date::year::max();
-
-CONSTDATA auto min_day = date::January/1;
-CONSTDATA auto max_day = date::December/31;
-
-#if USE_OS_TZDB
-
-CONSTCD14 const sys_seconds min_seconds = sys_days(min_year/min_day);
-
-#endif // USE_OS_TZDB
-
-#ifndef _WIN32
-
-static
-std::string
-discover_tz_dir()
-{
- struct stat sb;
- using namespace std;
-# ifndef __APPLE__
- CONSTDATA auto tz_dir_default = "/usr/share/zoneinfo";
- CONSTDATA auto tz_dir_buildroot = "/usr/share/zoneinfo/uclibc";
-
- // Check special path which is valid for buildroot with uclibc builds
- if(stat(tz_dir_buildroot, &sb) == 0 && S_ISDIR(sb.st_mode))
- return tz_dir_buildroot;
- else if(stat(tz_dir_default, &sb) == 0 && S_ISDIR(sb.st_mode))
- return tz_dir_default;
- else
- throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
-# else // __APPLE__
-# if TARGET_OS_IPHONE
-# if TARGET_OS_SIMULATOR
- return "/usr/share/zoneinfo";
-# else
- return "/var/db/timezone/zoneinfo";
-# endif
-# else
- CONSTDATA auto timezone = "/etc/localtime";
- if (!(lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0))
- throw runtime_error("discover_tz_dir failed\n");
- string result;
- char rp[PATH_MAX+1] = {};
- if (readlink(timezone, rp, sizeof(rp)-1) > 0)
- result = string(rp);
- else
- throw system_error(errno, system_category(), "readlink() failed");
- auto i = result.find("zoneinfo");
- if (i == string::npos)
- throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
- i = result.find('/', i);
- if (i == string::npos)
- throw runtime_error("discover_tz_dir failed to find '/'\n");
- return result.substr(0, i);
-# endif
-# endif // __APPLE__
-}
-
-static
-const std::string&
-get_tz_dir()
-{
- static const std::string tz_dir = discover_tz_dir();
- return tz_dir;
-}
-
-#endif
-
-// +-------------------+
-// | End Configuration |
-// +-------------------+
-
-#ifndef _MSC_VER
-static_assert(min_year <= max_year, "Configuration error");
-#endif
-
-static std::unique_ptr<tzdb> init_tzdb();
-
-tzdb_list::~tzdb_list()
-{
- const tzdb* ptr = head_;
- head_ = nullptr;
- while (ptr != nullptr)
- {
- auto next = ptr->next;
- delete ptr;
- ptr = next;
- }
-}
-
-tzdb_list::tzdb_list(tzdb_list&& x) noexcept
- : head_{x.head_.exchange(nullptr)}
-{
-}
-
-void
-tzdb_list::push_front(tzdb* tzdb) noexcept
-{
- tzdb->next = head_;
- head_ = tzdb;
-}
-
-tzdb_list::const_iterator
-tzdb_list::erase_after(const_iterator p) noexcept
-{
- auto t = p.p_->next;
- p.p_->next = p.p_->next->next;
- delete t;
- return ++p;
-}
-
-struct tzdb_list::undocumented_helper
-{
- static void push_front(tzdb_list& db_list, tzdb* tzdb) noexcept
- {
- db_list.push_front(tzdb);
- }
-};
-
-static
-tzdb_list
-create_tzdb()
-{
- tzdb_list tz_db;
- tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());
- return tz_db;
-}
-
-tzdb_list&
-get_tzdb_list()
-{
- static tzdb_list tz_db = create_tzdb();
- return tz_db;
-}
-
-#if !USE_OS_TZDB
-
-#ifdef _WIN32
-
-static
-void
-sort_zone_mappings(std::vector<date::detail::timezone_mapping>& mappings)
-{
- std::sort(mappings.begin(), mappings.end(),
- [](const date::detail::timezone_mapping& lhs,
- const date::detail::timezone_mapping& rhs)->bool
- {
- auto other_result = lhs.other.compare(rhs.other);
- if (other_result < 0)
- return true;
- else if (other_result == 0)
- {
- auto territory_result = lhs.territory.compare(rhs.territory);
- if (territory_result < 0)
- return true;
- else if (territory_result == 0)
- {
- if (lhs.type < rhs.type)
- return true;
- }
- }
- return false;
- });
-}
-
-static
-bool
-native_to_standard_timezone_name(const std::string& native_tz_name,
- std::string& standard_tz_name)
-{
- // TOOD! Need be a case insensitive compare?
- if (native_tz_name == "UTC")
- {
- standard_tz_name = "Etc/UTC";
- return true;
- }
- standard_tz_name.clear();
- // TODO! we can improve on linear search.
- const auto& mappings = date::get_tzdb().mappings;
- for (const auto& tzm : mappings)
- {
- if (tzm.other == native_tz_name)
- {
- standard_tz_name = tzm.type;
- return true;
- }
- }
- return false;
-}
-
-// Parse this XML file:
-// https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
-// The parsing method is designed to be simple and quick. It is not overly
-// forgiving of change but it should diagnose basic format issues.
-// See timezone_mapping structure for more info.
-static
-std::vector<detail::timezone_mapping>
-load_timezone_mappings_from_xml_file(const std::string& input_path)
-{
- std::size_t line_num = 0;
- std::vector<detail::timezone_mapping> mappings;
- std::string line;
-
- std::ifstream is(input_path);
- if (!is.is_open())
- {
- // We don't emit file exceptions because that's an implementation detail.
- std::string msg = "Error opening time zone mapping file \"";
- msg += input_path;
- msg += "\".";
- throw std::runtime_error(msg);
- }
-
- auto error = [&input_path, &line_num](const char* info)
- {
- std::string msg = "Error loading time zone mapping file \"";
- msg += input_path;
- msg += "\" at line ";
- msg += std::to_string(line_num);
- msg += ": ";
- msg += info;
- throw std::runtime_error(msg);
- };
- // [optional space]a="b"
- auto read_attribute = [&line, &error]
- (const char* name, std::string& value, std::size_t startPos)
- ->std::size_t
- {
- value.clear();
- // Skip leading space before attribute name.
- std::size_t spos = line.find_first_not_of(' ', startPos);
- if (spos == std::string::npos)
- spos = startPos;
- // Assume everything up to next = is the attribute name
- // and that an = will always delimit that.
- std::size_t epos = line.find('=', spos);
- if (epos == std::string::npos)
- error("Expected \'=\' right after attribute name.");
- std::size_t name_len = epos - spos;
- // Expect the name we find matches the name we expect.
- if (line.compare(spos, name_len, name) != 0)
- {
- std::string msg;
- msg = "Expected attribute name \'";
- msg += name;
- msg += "\' around position ";
- msg += std::to_string(spos);
- msg += " but found something else.";
- error(msg.c_str());
- }
- ++epos; // Skip the '=' that is after the attribute name.
- spos = epos;
- if (spos < line.length() && line[spos] == '\"')
- ++spos; // Skip the quote that is before the attribute value.
- else
- {
- std::string msg = "Expected '\"' to begin value of attribute \'";
- msg += name;
- msg += "\'.";
- error(msg.c_str());
- }
- epos = line.find('\"', spos);
- if (epos == std::string::npos)
- {
- std::string msg = "Expected '\"' to end value of attribute \'";
- msg += name;
- msg += "\'.";
- error(msg.c_str());
- }
- // Extract everything in between the quotes. Note no escaping is done.
- std::size_t value_len = epos - spos;
- value.assign(line, spos, value_len);
- ++epos; // Skip the quote that is after the attribute value;
- return epos;
- };
-
- // Quick but not overly forgiving XML mapping file processing.
- bool mapTimezonesOpenTagFound = false;
- bool mapTimezonesCloseTagFound = false;
- std::size_t mapZonePos = std::string::npos;
- std::size_t mapTimezonesPos = std::string::npos;
- CONSTDATA char mapTimeZonesOpeningTag[] = { "<mapTimezones " };
- CONSTDATA char mapZoneOpeningTag[] = { "<mapZone " };
- CONSTDATA std::size_t mapZoneOpeningTagLen = sizeof(mapZoneOpeningTag) /
- sizeof(mapZoneOpeningTag[0]) - 1;
- while (!mapTimezonesOpenTagFound)
- {
- std::getline(is, line);
- ++line_num;
- if (is.eof())
- {
- // If there is no mapTimezones tag is it an error?
- // Perhaps if there are no mapZone mappings it might be ok for
- // its parent mapTimezones element to be missing?
- // We treat this as an error though on the assumption that if there
- // really are no mappings we should still get a mapTimezones parent
- // element but no mapZone elements inside. Assuming we must
- // find something will hopefully at least catch more drastic formatting
- // changes or errors than if we don't do this and assume nothing found.
- error("Expected a mapTimezones opening tag.");
- }
- mapTimezonesPos = line.find(mapTimeZonesOpeningTag);
- mapTimezonesOpenTagFound = (mapTimezonesPos != std::string::npos);
- }
-
- // NOTE: We could extract the version info that follows the opening
- // mapTimezones tag and compare that to the version of other data we have.
- // I would have expected them to be kept in synch but testing has shown
- // it typically does not match anyway. So what's the point?
- while (!mapTimezonesCloseTagFound)
- {
- std::ws(is);
- std::getline(is, line);
- ++line_num;
- if (is.eof())
- error("Expected a mapTimezones closing tag.");
- if (line.empty())
- continue;
- mapZonePos = line.find(mapZoneOpeningTag);
- if (mapZonePos != std::string::npos)
- {
- mapZonePos += mapZoneOpeningTagLen;
- detail::timezone_mapping zm{};
- std::size_t pos = read_attribute("other", zm.other, mapZonePos);
- pos = read_attribute("territory", zm.territory, pos);
- read_attribute("type", zm.type, pos);
- mappings.push_back(std::move(zm));
-
- continue;
- }
- mapTimezonesPos = line.find("</mapTimezones>");
- mapTimezonesCloseTagFound = (mapTimezonesPos != std::string::npos);
- if (!mapTimezonesCloseTagFound)
- {
- std::size_t commentPos = line.find("<!--");
- if (commentPos == std::string::npos)
- error("Unexpected mapping record found. A xml mapZone or comment "
- "attribute or mapTimezones closing tag was expected.");
- }
- }
-
- is.close();
- return mappings;
-}
-
-#endif // _WIN32
-
-// Parsing helpers
-
-static
-std::string
-parse3(std::istream& in)
-{
- std::string r(3, ' ');
- ws(in);
- r[0] = static_cast<char>(in.get());
- r[1] = static_cast<char>(in.get());
- r[2] = static_cast<char>(in.get());
- return r;
-}
-
-static
-unsigned
-parse_dow(std::istream& in)
-{
- CONSTDATA char*const dow_names[] =
- {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
- auto s = parse3(in);
- auto dow = std::find(std::begin(dow_names), std::end(dow_names), s) - dow_names;
- if (dow >= std::end(dow_names) - std::begin(dow_names))
- throw std::runtime_error("oops: bad dow name: " + s);
- return static_cast<unsigned>(dow);
-}
-
-static
-unsigned
-parse_month(std::istream& in)
-{
- CONSTDATA char*const month_names[] =
- {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
- auto s = parse3(in);
- auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names;
- if (m >= std::end(month_names) - std::begin(month_names))
- throw std::runtime_error("oops: bad month name: " + s);
- return static_cast<unsigned>(++m);
-}
-
-static
-std::chrono::seconds
-parse_unsigned_time(std::istream& in)
-{
- using namespace std::chrono;
- int x;
- in >> x;
- auto r = seconds{hours{x}};
- if (!in.eof() && in.peek() == ':')
- {
- in.get();
- in >> x;
- r += minutes{x};
- if (!in.eof() && in.peek() == ':')
- {
- in.get();
- in >> x;
- r += seconds{x};
- }
- }
- return r;
-}
-
-static
-std::chrono::seconds
-parse_signed_time(std::istream& in)
-{
- ws(in);
- auto sign = 1;
- if (in.peek() == '-')
- {
- sign = -1;
- in.get();
- }
- else if (in.peek() == '+')
- in.get();
- return sign * parse_unsigned_time(in);
-}
-
-// MonthDayTime
-
-detail::MonthDayTime::MonthDayTime(local_seconds tp, tz timezone)
- : zone_(timezone)
-{
- using namespace date;
- const auto dp = date::floor<days>(tp);
- const auto hms = make_time(tp - dp);
- const auto ymd = year_month_day(dp);
- u = ymd.month() / ymd.day();
- h_ = hms.hours();
- m_ = hms.minutes();
- s_ = hms.seconds();
-}
-
-detail::MonthDayTime::MonthDayTime(const date::month_day& md, tz timezone)
- : zone_(timezone)
-{
- u = md;
-}
-
-date::day
-detail::MonthDayTime::day() const
-{
- switch (type_)
- {
- case month_day:
- return u.month_day_.day();
- case month_last_dow:
- return date::day{31};
- case lteq:
- case gteq:
- break;
- }
- return u.month_day_weekday_.month_day_.day();
-}
-
-date::month
-detail::MonthDayTime::month() const
-{
- switch (type_)
- {
- case month_day:
- return u.month_day_.month();
- case month_last_dow:
- return u.month_weekday_last_.month();
- case lteq:
- case gteq:
- break;
- }
- return u.month_day_weekday_.month_day_.month();
-}
-
-int
-detail::MonthDayTime::compare(date::year y, const MonthDayTime& x, date::year yx,
- std::chrono::seconds offset, std::chrono::minutes prev_save) const
-{
- if (zone_ != x.zone_)
- {
- auto dp0 = to_sys_days(y);
- auto dp1 = x.to_sys_days(yx);
- if (std::abs((dp0-dp1).count()) > 1)
- return dp0 < dp1 ? -1 : 1;
- if (zone_ == tz::local)
- {
- auto tp0 = to_time_point(y) - prev_save;
- if (x.zone_ == tz::utc)
- tp0 -= offset;
- auto tp1 = x.to_time_point(yx);
- return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
- }
- else if (zone_ == tz::standard)
- {
- auto tp0 = to_time_point(y);
- auto tp1 = x.to_time_point(yx);
- if (x.zone_ == tz::local)
- tp1 -= prev_save;
- else
- tp0 -= offset;
- return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
- }
- // zone_ == tz::utc
- auto tp0 = to_time_point(y);
- auto tp1 = x.to_time_point(yx);
- if (x.zone_ == tz::local)
- tp1 -= offset + prev_save;
- else
- tp1 -= offset;
- return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
- }
- auto const t0 = to_time_point(y);
- auto const t1 = x.to_time_point(yx);
- return t0 < t1 ? -1 : t0 == t1 ? 0 : 1;
-}
-
-sys_seconds
-detail::MonthDayTime::to_sys(date::year y, std::chrono::seconds offset,
- std::chrono::seconds save) const
-{
- using namespace date;
- using namespace std::chrono;
- auto until_utc = to_time_point(y);
- if (zone_ == tz::standard)
- until_utc -= offset;
- else if (zone_ == tz::local)
- until_utc -= offset + save;
- return until_utc;
-}
-
-detail::MonthDayTime::U&
-detail::MonthDayTime::U::operator=(const date::month_day& x)
-{
- month_day_ = x;
- return *this;
-}
-
-detail::MonthDayTime::U&
-detail::MonthDayTime::U::operator=(const date::month_weekday_last& x)
-{
- month_weekday_last_ = x;
- return *this;
-}
-
-detail::MonthDayTime::U&
-detail::MonthDayTime::U::operator=(const pair& x)
-{
- month_day_weekday_ = x;
- return *this;
-}
-
-date::sys_days
-detail::MonthDayTime::to_sys_days(date::year y) const
-{
- using namespace std::chrono;
- using namespace date;
- switch (type_)
- {
- case month_day:
- return sys_days(y/u.month_day_);
- case month_last_dow:
- return sys_days(y/u.month_weekday_last_);
- case lteq:
- {
- auto const x = y/u.month_day_weekday_.month_day_;
- auto const wd1 = weekday(static_cast<sys_days>(x));
- auto const wd0 = u.month_day_weekday_.weekday_;
- return sys_days(x) - (wd1-wd0);
- }
- case gteq:
- break;
- }
- auto const x = y/u.month_day_weekday_.month_day_;
- auto const wd1 = u.month_day_weekday_.weekday_;
- auto const wd0 = weekday(static_cast<sys_days>(x));
- return sys_days(x) + (wd1-wd0);
-}
-
-sys_seconds
-detail::MonthDayTime::to_time_point(date::year y) const
-{
- // Add seconds first to promote to largest rep early to prevent overflow
- return to_sys_days(y) + s_ + h_ + m_;
-}
-
-void
-detail::MonthDayTime::canonicalize(date::year y)
-{
- using namespace std::chrono;
- using namespace date;
- switch (type_)
- {
- case month_day:
- return;
- case month_last_dow:
- {
- auto const ymd = year_month_day(sys_days(y/u.month_weekday_last_));
- u.month_day_ = ymd.month()/ymd.day();
- type_ = month_day;
- return;
- }
- case lteq:
- {
- auto const x = y/u.month_day_weekday_.month_day_;
- auto const wd1 = weekday(static_cast<sys_days>(x));
- auto const wd0 = u.month_day_weekday_.weekday_;
- auto const ymd = year_month_day(sys_days(x) - (wd1-wd0));
- u.month_day_ = ymd.month()/ymd.day();
- type_ = month_day;
- return;
- }
- case gteq:
- {
- auto const x = y/u.month_day_weekday_.month_day_;
- auto const wd1 = u.month_day_weekday_.weekday_;
- auto const wd0 = weekday(static_cast<sys_days>(x));
- auto const ymd = year_month_day(sys_days(x) + (wd1-wd0));
- u.month_day_ = ymd.month()/ymd.day();
- type_ = month_day;
- return;
- }
- }
-}
-
-std::istream&
-detail::operator>>(std::istream& is, MonthDayTime& x)
-{
- using namespace date;
- using namespace std::chrono;
- assert(((std::ios::failbit | std::ios::badbit) & is.exceptions()) ==
- (std::ios::failbit | std::ios::badbit));
- x = MonthDayTime{};
- if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
- {
- auto m = parse_month(is);
- if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
- {
- if (is.peek() == 'l')
- {
- for (int i = 0; i < 4; ++i)
- is.get();
- auto dow = parse_dow(is);
- x.type_ = MonthDayTime::month_last_dow;
- x.u = date::month(m)/weekday(dow)[last];
- }
- else if (std::isalpha(is.peek()))
- {
- auto dow = parse_dow(is);
- char c{};
- is >> c;
- if (c == '<' || c == '>')
- {
- char c2{};
- is >> c2;
- if (c2 != '=')
- throw std::runtime_error(std::string("bad operator: ") + c + c2);
- int d;
- is >> d;
- if (d < 1 || d > 31)
- throw std::runtime_error(std::string("bad operator: ") + c + c2
- + std::to_string(d));
- x.type_ = c == '<' ? MonthDayTime::lteq : MonthDayTime::gteq;
- x.u = MonthDayTime::pair{ date::month(m) / d, date::weekday(dow) };
- }
- else
- throw std::runtime_error(std::string("bad operator: ") + c);
- }
- else // if (std::isdigit(is.peek())
- {
- int d;
- is >> d;
- if (d < 1 || d > 31)
- throw std::runtime_error(std::string("day of month: ")
- + std::to_string(d));
- x.type_ = MonthDayTime::month_day;
- x.u = date::month(m)/d;
- }
- if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
- {
- int t;
- is >> t;
- x.h_ = hours{t};
- if (!is.eof() && is.peek() == ':')
- {
- is.get();
- is >> t;
- x.m_ = minutes{t};
- if (!is.eof() && is.peek() == ':')
- {
- is.get();
- is >> t;
- x.s_ = seconds{t};
- }
- }
- if (!is.eof() && std::isalpha(is.peek()))
- {
- char c;
- is >> c;
- switch (c)
- {
- case 's':
- x.zone_ = tz::standard;
- break;
- case 'u':
- x.zone_ = tz::utc;
- break;
- }
- }
- }
- }
- else
- {
- x.u = month{m}/1;
- }
- }
- return is;
-}
-
-std::ostream&
-detail::operator<<(std::ostream& os, const MonthDayTime& x)
-{
- switch (x.type_)
- {
- case MonthDayTime::month_day:
- os << x.u.month_day_ << " ";
- break;
- case MonthDayTime::month_last_dow:
- os << x.u.month_weekday_last_ << " ";
- break;
- case MonthDayTime::lteq:
- os << x.u.month_day_weekday_.weekday_ << " on or before "
- << x.u.month_day_weekday_.month_day_ << " ";
- break;
- case MonthDayTime::gteq:
- if ((static_cast<unsigned>(x.day()) - 1) % 7 == 0)
- {
- os << (x.u.month_day_weekday_.month_day_.month() /
- x.u.month_day_weekday_.weekday_[
- (static_cast<unsigned>(x.day()) - 1)/7+1]) << " ";
- }
- else
- {
- os << x.u.month_day_weekday_.weekday_ << " on or after "
- << x.u.month_day_weekday_.month_day_ << " ";
- }
- break;
- }
- os << date::make_time(x.s_ + x.h_ + x.m_);
- if (x.zone_ == tz::utc)
- os << "UTC ";
- else if (x.zone_ == tz::standard)
- os << "STD ";
- else
- os << " ";
- return os;
-}
-
-// Rule
-
-detail::Rule::Rule(const std::string& s)
-{
- try
- {
- using namespace date;
- using namespace std::chrono;
- std::istringstream in(s);
- in.exceptions(std::ios::failbit | std::ios::badbit);
- std::string word;
- in >> word >> name_;
- int x;
- ws(in);
- if (std::isalpha(in.peek()))
- {
- in >> word;
- if (word == "min")
- {
- starting_year_ = year::min();
- }
- else
- throw std::runtime_error("Didn't find expected word: " + word);
- }
- else
- {
- in >> x;
- starting_year_ = year{x};
- }
- std::ws(in);
- if (std::isalpha(in.peek()))
- {
- in >> word;
- if (word == "only")
- {
- ending_year_ = starting_year_;
- }
- else if (word == "max")
- {
- ending_year_ = year::max();
- }
- else
- throw std::runtime_error("Didn't find expected word: " + word);
- }
- else
- {
- in >> x;
- ending_year_ = year{x};
- }
- in >> word; // TYPE (always "-")
- assert(word == "-");
- in >> starting_at_;
- save_ = duration_cast<minutes>(parse_signed_time(in));
- in >> abbrev_;
- if (abbrev_ == "-")
- abbrev_.clear();
- assert(hours{-1} <= save_ && save_ <= hours{2});
- }
- catch (...)
- {
- std::cerr << s << '\n';
- std::cerr << *this << '\n';
- throw;
- }
-}
-
-detail::Rule::Rule(const Rule& r, date::year starting_year, date::year ending_year)
- : name_(r.name_)
- , starting_year_(starting_year)
- , ending_year_(ending_year)
- , starting_at_(r.starting_at_)
- , save_(r.save_)
- , abbrev_(r.abbrev_)
-{
-}
-
-bool
-detail::operator==(const Rule& x, const Rule& y)
-{
- if (std::tie(x.name_, x.save_, x.starting_year_, x.ending_year_) ==
- std::tie(y.name_, y.save_, y.starting_year_, y.ending_year_))
- return x.month() == y.month() && x.day() == y.day();
- return false;
-}
-
-bool
-detail::operator<(const Rule& x, const Rule& y)
-{
- using namespace std::chrono;
- auto const xm = x.month();
- auto const ym = y.month();
- if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) <
- std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
- return true;
- if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) >
- std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
- return false;
- return x.day() < y.day();
-}
-
-bool
-detail::operator==(const Rule& x, const date::year& y)
-{
- return x.starting_year_ <= y && y <= x.ending_year_;
-}
-
-bool
-detail::operator<(const Rule& x, const date::year& y)
-{
- return x.ending_year_ < y;
-}
-
-bool
-detail::operator==(const date::year& x, const Rule& y)
-{
- return y.starting_year_ <= x && x <= y.ending_year_;
-}
-
-bool
-detail::operator<(const date::year& x, const Rule& y)
-{
- return x < y.starting_year_;
-}
-
-bool
-detail::operator==(const Rule& x, const std::string& y)
-{
- return x.name() == y;
-}
-
-bool
-detail::operator<(const Rule& x, const std::string& y)
-{
- return x.name() < y;
-}
-
-bool
-detail::operator==(const std::string& x, const Rule& y)
-{
- return y.name() == x;
-}
-
-bool
-detail::operator<(const std::string& x, const Rule& y)
-{
- return x < y.name();
-}
-
-std::ostream&
-detail::operator<<(std::ostream& os, const Rule& r)
-{
- using namespace date;
- using namespace std::chrono;
- detail::save_ostream<char> _(os);
- os.fill(' ');
- os.flags(std::ios::dec | std::ios::left);
- os.width(15);
- os << r.name_;
- os << r.starting_year_ << " " << r.ending_year_ << " ";
- os << r.starting_at_;
- if (r.save_ >= minutes{0})
- os << ' ';
- os << date::make_time(r.save_) << " ";
- os << r.abbrev_;
- return os;
-}
-
-date::day
-detail::Rule::day() const
-{
- return starting_at_.day();
-}
-
-date::month
-detail::Rule::month() const
-{
- return starting_at_.month();
-}
-
-struct find_rule_by_name
-{
- bool operator()(const Rule& x, const std::string& nm) const
- {
- return x.name() < nm;
- }
-
- bool operator()(const std::string& nm, const Rule& x) const
- {
- return nm < x.name();
- }
-};
-
-bool
-detail::Rule::overlaps(const Rule& x, const Rule& y)
-{
- // assume x.starting_year_ <= y.starting_year_;
- if (!(x.starting_year_ <= y.starting_year_))
- {
- std::cerr << x << '\n';
- std::cerr << y << '\n';
- assert(x.starting_year_ <= y.starting_year_);
- }
- if (y.starting_year_ > x.ending_year_)
- return false;
- return !(x.starting_year_ == y.starting_year_ && x.ending_year_ == y.ending_year_);
-}
-
-void
-detail::Rule::split(std::vector<Rule>& rules, std::size_t i, std::size_t k, std::size_t& e)
-{
- using namespace date;
+#else
+# define TARGET_OS_IPHONE 0
+# define TARGET_OS_SIMULATOR 0
+#endif
+
+#if USE_OS_TZDB
+# include <dirent.h>
+#endif
+#include <algorithm>
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <cwchar>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#if USE_OS_TZDB
+# include <queue>
+#endif
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <sys/stat.h>
+
+// unistd.h is used on some platforms as part of the the means to get
+// the current time zone. On Win32 windows.h provides a means to do it.
+// gcc/mingw supports unistd.h on Win32 but MSVC does not.
+
+#ifdef _WIN32
+# ifdef WINAPI_FAMILY
+# include <winapifamily.h>
+# if WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP
+# define WINRT
+# define INSTALL .
+# endif
+# endif
+
+# include <io.h> // _unlink etc.
+
+# if defined(__clang__)
+ struct IUnknown; // fix for issue with static_cast<> in objbase.h
+ // (see https://github.com/philsquared/Catch/issues/690)
+# endif
+
+# include <shlobj.h> // CoTaskFree, ShGetKnownFolderPath etc.
+# if HAS_REMOTE_API
+# include <direct.h> // _mkdir
+# include <shellapi.h> // ShFileOperation etc.
+# endif // HAS_REMOTE_API
+#else // !_WIN32
+# include <unistd.h>
+# if !USE_OS_TZDB
+# include <wordexp.h>
+# endif
+# include <limits.h>
+# include <string.h>
+# if !USE_SHELL_API
+# include <sys/stat.h>
+# include <sys/fcntl.h>
+# include <dirent.h>
+# include <cstring>
+# include <sys/wait.h>
+# include <sys/types.h>
+# endif //!USE_SHELL_API
+#endif // !_WIN32
+
+
+#if HAS_REMOTE_API
+ // Note curl includes windows.h so we must include curl AFTER definitions of things
+ // that affect windows.h such as NOMINMAX.
+#if defined(_MSC_VER) && defined(SHORTENED_CURL_INCLUDE)
+ // For rmt_curl nuget package
+# error #include <curl.h>
+#else
+# error #include <curl/curl.h>
+#endif
+#endif
+
+#ifdef _WIN32
+static CONSTDATA char folder_delimiter = '\\';
+#else // !_WIN32
+static CONSTDATA char folder_delimiter = '/';
+#endif // !_WIN32
+
+#if defined(__GNUC__) && __GNUC__ < 5
+ // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif // defined(__GNUC__) && __GNUC__ < 5
+
+#if !USE_OS_TZDB
+
+# ifdef _WIN32
+# ifndef WINRT
+
+namespace
+{
+ struct task_mem_deleter
+ {
+ void operator()(wchar_t buf[])
+ {
+ if (buf != nullptr)
+ CoTaskMemFree(buf);
+ }
+ };
+ using co_task_mem_ptr = std::unique_ptr<wchar_t[], task_mem_deleter>;
+}
+
+// We might need to know certain locations even if not using the remote API,
+// so keep these routines out of that block for now.
+static
+std::string
+get_known_folder(const GUID& folderid)
+{
+ std::string folder;
+ PWSTR pfolder = nullptr;
+ HRESULT hr = SHGetKnownFolderPath(folderid, KF_FLAG_DEFAULT, nullptr, &pfolder);
+ if (SUCCEEDED(hr))
+ {
+ co_task_mem_ptr folder_ptr(pfolder);
+ const wchar_t* fptr = folder_ptr.get();
+ auto state = std::mbstate_t();
+ const auto required = std::wcsrtombs(nullptr, &fptr, 0, &state);
+ if (required != 0 && required != std::size_t(-1))
+ {
+ folder.resize(required);
+ std::wcsrtombs(&folder[0], &fptr, folder.size(), &state);
+ }
+ }
+ return folder;
+}
+
+# ifndef INSTALL
+
+// Usually something like "c:\Users\username\Downloads".
+static
+std::string
+get_download_folder()
+{
+ return get_known_folder(FOLDERID_Downloads);
+}
+
+# endif // !INSTALL
+
+# endif // WINRT
+# else // !_WIN32
+
+# if !defined(INSTALL)
+
+static
+std::string
+expand_path(std::string path)
+{
+# if TARGET_OS_IPHONE
+ return date::iOSUtils::get_tzdata_path();
+# else // !TARGET_OS_IPHONE
+ ::wordexp_t w{};
+ std::unique_ptr<::wordexp_t, void(*)(::wordexp_t*)> hold{&w, ::wordfree};
+ ::wordexp(path.c_str(), &w, 0);
+ if (w.we_wordc != 1)
+ throw std::runtime_error("Cannot expand path: " + path);
+ path = w.we_wordv[0];
+ return path;
+# endif // !TARGET_OS_IPHONE
+}
+
+static
+std::string
+get_download_folder()
+{
+ return expand_path("~/Downloads");
+}
+
+# endif // !defined(INSTALL)
+
+# endif // !_WIN32
+
+#endif // !USE_OS_TZDB
+
+namespace arrow_vendored
+{
+namespace date
+{
+// +---------------------+
+// | Begin Configuration |
+// +---------------------+
+
+using namespace detail;
+
+#if !USE_OS_TZDB
+
+static
+std::string&
+access_install()
+{
+ static std::string install
+#ifndef INSTALL
+
+ = get_download_folder() + folder_delimiter + "tzdata";
+
+#else // !INSTALL
+
+# define STRINGIZEIMP(x) #x
+# define STRINGIZE(x) STRINGIZEIMP(x)
+
+ = STRINGIZE(INSTALL) + std::string(1, folder_delimiter) + "tzdata";
+
+ #undef STRINGIZEIMP
+ #undef STRINGIZE
+#endif // !INSTALL
+
+ return install;
+}
+
+void
+set_install(const std::string& s)
+{
+ access_install() = s;
+}
+
+static
+const std::string&
+get_install()
+{
+ static const std::string& ref = access_install();
+ return ref;
+}
+
+#if HAS_REMOTE_API
+static
+std::string
+get_download_gz_file(const std::string& version)
+{
+ auto file = get_install() + version + ".tar.gz";
+ return file;
+}
+#endif // HAS_REMOTE_API
+
+#endif // !USE_OS_TZDB
+
+// These can be used to reduce the range of the database to save memory
+CONSTDATA auto min_year = date::year::min();
+CONSTDATA auto max_year = date::year::max();
+
+CONSTDATA auto min_day = date::January/1;
+CONSTDATA auto max_day = date::December/31;
+
+#if USE_OS_TZDB
+
+CONSTCD14 const sys_seconds min_seconds = sys_days(min_year/min_day);
+
+#endif // USE_OS_TZDB
+
+#ifndef _WIN32
+
+static
+std::string
+discover_tz_dir()
+{
+ struct stat sb;
+ using namespace std;
+# ifndef __APPLE__
+ CONSTDATA auto tz_dir_default = "/usr/share/zoneinfo";
+ CONSTDATA auto tz_dir_buildroot = "/usr/share/zoneinfo/uclibc";
+
+ // Check special path which is valid for buildroot with uclibc builds
+ if(stat(tz_dir_buildroot, &sb) == 0 && S_ISDIR(sb.st_mode))
+ return tz_dir_buildroot;
+ else if(stat(tz_dir_default, &sb) == 0 && S_ISDIR(sb.st_mode))
+ return tz_dir_default;
+ else
+ throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
+# else // __APPLE__
+# if TARGET_OS_IPHONE
+# if TARGET_OS_SIMULATOR
+ return "/usr/share/zoneinfo";
+# else
+ return "/var/db/timezone/zoneinfo";
+# endif
+# else
+ CONSTDATA auto timezone = "/etc/localtime";
+ if (!(lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0))
+ throw runtime_error("discover_tz_dir failed\n");
+ string result;
+ char rp[PATH_MAX+1] = {};
+ if (readlink(timezone, rp, sizeof(rp)-1) > 0)
+ result = string(rp);
+ else
+ throw system_error(errno, system_category(), "readlink() failed");
+ auto i = result.find("zoneinfo");
+ if (i == string::npos)
+ throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
+ i = result.find('/', i);
+ if (i == string::npos)
+ throw runtime_error("discover_tz_dir failed to find '/'\n");
+ return result.substr(0, i);
+# endif
+# endif // __APPLE__
+}
+
+static
+const std::string&
+get_tz_dir()
+{
+ static const std::string tz_dir = discover_tz_dir();
+ return tz_dir;
+}
+
+#endif
+
+// +-------------------+
+// | End Configuration |
+// +-------------------+
+
+#ifndef _MSC_VER
+static_assert(min_year <= max_year, "Configuration error");
+#endif
+
+static std::unique_ptr<tzdb> init_tzdb();
+
+tzdb_list::~tzdb_list()
+{
+ const tzdb* ptr = head_;
+ head_ = nullptr;
+ while (ptr != nullptr)
+ {
+ auto next = ptr->next;
+ delete ptr;
+ ptr = next;
+ }
+}
+
+tzdb_list::tzdb_list(tzdb_list&& x) noexcept
+ : head_{x.head_.exchange(nullptr)}
+{
+}
+
+void
+tzdb_list::push_front(tzdb* tzdb) noexcept
+{
+ tzdb->next = head_;
+ head_ = tzdb;
+}
+
+tzdb_list::const_iterator
+tzdb_list::erase_after(const_iterator p) noexcept
+{
+ auto t = p.p_->next;
+ p.p_->next = p.p_->next->next;
+ delete t;
+ return ++p;
+}
+
+struct tzdb_list::undocumented_helper
+{
+ static void push_front(tzdb_list& db_list, tzdb* tzdb) noexcept
+ {
+ db_list.push_front(tzdb);
+ }
+};
+
+static
+tzdb_list
+create_tzdb()
+{
+ tzdb_list tz_db;
+ tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());
+ return tz_db;
+}
+
+tzdb_list&
+get_tzdb_list()
+{
+ static tzdb_list tz_db = create_tzdb();
+ return tz_db;
+}
+
+#if !USE_OS_TZDB
+
+#ifdef _WIN32
+
+static
+void
+sort_zone_mappings(std::vector<date::detail::timezone_mapping>& mappings)
+{
+ std::sort(mappings.begin(), mappings.end(),
+ [](const date::detail::timezone_mapping& lhs,
+ const date::detail::timezone_mapping& rhs)->bool
+ {
+ auto other_result = lhs.other.compare(rhs.other);
+ if (other_result < 0)
+ return true;
+ else if (other_result == 0)
+ {
+ auto territory_result = lhs.territory.compare(rhs.territory);
+ if (territory_result < 0)
+ return true;
+ else if (territory_result == 0)
+ {
+ if (lhs.type < rhs.type)
+ return true;
+ }
+ }
+ return false;
+ });
+}
+
+static
+bool
+native_to_standard_timezone_name(const std::string& native_tz_name,
+ std::string& standard_tz_name)
+{
+ // TOOD! Need be a case insensitive compare?
+ if (native_tz_name == "UTC")
+ {
+ standard_tz_name = "Etc/UTC";
+ return true;
+ }
+ standard_tz_name.clear();
+ // TODO! we can improve on linear search.
+ const auto& mappings = date::get_tzdb().mappings;
+ for (const auto& tzm : mappings)
+ {
+ if (tzm.other == native_tz_name)
+ {
+ standard_tz_name = tzm.type;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Parse this XML file:
+// https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
+// The parsing method is designed to be simple and quick. It is not overly
+// forgiving of change but it should diagnose basic format issues.
+// See timezone_mapping structure for more info.
+static
+std::vector<detail::timezone_mapping>
+load_timezone_mappings_from_xml_file(const std::string& input_path)
+{
+ std::size_t line_num = 0;
+ std::vector<detail::timezone_mapping> mappings;
+ std::string line;
+
+ std::ifstream is(input_path);
+ if (!is.is_open())
+ {
+ // We don't emit file exceptions because that's an implementation detail.
+ std::string msg = "Error opening time zone mapping file \"";
+ msg += input_path;
+ msg += "\".";
+ throw std::runtime_error(msg);
+ }
+
+ auto error = [&input_path, &line_num](const char* info)
+ {
+ std::string msg = "Error loading time zone mapping file \"";
+ msg += input_path;
+ msg += "\" at line ";
+ msg += std::to_string(line_num);
+ msg += ": ";
+ msg += info;
+ throw std::runtime_error(msg);
+ };
+ // [optional space]a="b"
+ auto read_attribute = [&line, &error]
+ (const char* name, std::string& value, std::size_t startPos)
+ ->std::size_t
+ {
+ value.clear();
+ // Skip leading space before attribute name.
+ std::size_t spos = line.find_first_not_of(' ', startPos);
+ if (spos == std::string::npos)
+ spos = startPos;
+ // Assume everything up to next = is the attribute name
+ // and that an = will always delimit that.
+ std::size_t epos = line.find('=', spos);
+ if (epos == std::string::npos)
+ error("Expected \'=\' right after attribute name.");
+ std::size_t name_len = epos - spos;
+ // Expect the name we find matches the name we expect.
+ if (line.compare(spos, name_len, name) != 0)
+ {
+ std::string msg;
+ msg = "Expected attribute name \'";
+ msg += name;
+ msg += "\' around position ";
+ msg += std::to_string(spos);
+ msg += " but found something else.";
+ error(msg.c_str());
+ }
+ ++epos; // Skip the '=' that is after the attribute name.
+ spos = epos;
+ if (spos < line.length() && line[spos] == '\"')
+ ++spos; // Skip the quote that is before the attribute value.
+ else
+ {
+ std::string msg = "Expected '\"' to begin value of attribute \'";
+ msg += name;
+ msg += "\'.";
+ error(msg.c_str());
+ }
+ epos = line.find('\"', spos);
+ if (epos == std::string::npos)
+ {
+ std::string msg = "Expected '\"' to end value of attribute \'";
+ msg += name;
+ msg += "\'.";
+ error(msg.c_str());
+ }
+ // Extract everything in between the quotes. Note no escaping is done.
+ std::size_t value_len = epos - spos;
+ value.assign(line, spos, value_len);
+ ++epos; // Skip the quote that is after the attribute value;
+ return epos;
+ };
+
+ // Quick but not overly forgiving XML mapping file processing.
+ bool mapTimezonesOpenTagFound = false;
+ bool mapTimezonesCloseTagFound = false;
+ std::size_t mapZonePos = std::string::npos;
+ std::size_t mapTimezonesPos = std::string::npos;
+ CONSTDATA char mapTimeZonesOpeningTag[] = { "<mapTimezones " };
+ CONSTDATA char mapZoneOpeningTag[] = { "<mapZone " };
+ CONSTDATA std::size_t mapZoneOpeningTagLen = sizeof(mapZoneOpeningTag) /
+ sizeof(mapZoneOpeningTag[0]) - 1;
+ while (!mapTimezonesOpenTagFound)
+ {
+ std::getline(is, line);
+ ++line_num;
+ if (is.eof())
+ {
+ // If there is no mapTimezones tag is it an error?
+ // Perhaps if there are no mapZone mappings it might be ok for
+ // its parent mapTimezones element to be missing?
+ // We treat this as an error though on the assumption that if there
+ // really are no mappings we should still get a mapTimezones parent
+ // element but no mapZone elements inside. Assuming we must
+ // find something will hopefully at least catch more drastic formatting
+ // changes or errors than if we don't do this and assume nothing found.
+ error("Expected a mapTimezones opening tag.");
+ }
+ mapTimezonesPos = line.find(mapTimeZonesOpeningTag);
+ mapTimezonesOpenTagFound = (mapTimezonesPos != std::string::npos);
+ }
+
+ // NOTE: We could extract the version info that follows the opening
+ // mapTimezones tag and compare that to the version of other data we have.
+ // I would have expected them to be kept in synch but testing has shown
+ // it typically does not match anyway. So what's the point?
+ while (!mapTimezonesCloseTagFound)
+ {
+ std::ws(is);
+ std::getline(is, line);
+ ++line_num;
+ if (is.eof())
+ error("Expected a mapTimezones closing tag.");
+ if (line.empty())
+ continue;
+ mapZonePos = line.find(mapZoneOpeningTag);
+ if (mapZonePos != std::string::npos)
+ {
+ mapZonePos += mapZoneOpeningTagLen;
+ detail::timezone_mapping zm{};
+ std::size_t pos = read_attribute("other", zm.other, mapZonePos);
+ pos = read_attribute("territory", zm.territory, pos);
+ read_attribute("type", zm.type, pos);
+ mappings.push_back(std::move(zm));
+
+ continue;
+ }
+ mapTimezonesPos = line.find("</mapTimezones>");
+ mapTimezonesCloseTagFound = (mapTimezonesPos != std::string::npos);
+ if (!mapTimezonesCloseTagFound)
+ {
+ std::size_t commentPos = line.find("<!--");
+ if (commentPos == std::string::npos)
+ error("Unexpected mapping record found. A xml mapZone or comment "
+ "attribute or mapTimezones closing tag was expected.");
+ }
+ }
+
+ is.close();
+ return mappings;
+}
+
+#endif // _WIN32
+
+// Parsing helpers
+
+static
+std::string
+parse3(std::istream& in)
+{
+ std::string r(3, ' ');
+ ws(in);
+ r[0] = static_cast<char>(in.get());
+ r[1] = static_cast<char>(in.get());
+ r[2] = static_cast<char>(in.get());
+ return r;
+}
+
+static
+unsigned
+parse_dow(std::istream& in)
+{
+ CONSTDATA char*const dow_names[] =
+ {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+ auto s = parse3(in);
+ auto dow = std::find(std::begin(dow_names), std::end(dow_names), s) - dow_names;
+ if (dow >= std::end(dow_names) - std::begin(dow_names))
+ throw std::runtime_error("oops: bad dow name: " + s);
+ return static_cast<unsigned>(dow);
+}
+
+static
+unsigned
+parse_month(std::istream& in)
+{
+ CONSTDATA char*const month_names[] =
+ {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+ auto s = parse3(in);
+ auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names;
+ if (m >= std::end(month_names) - std::begin(month_names))
+ throw std::runtime_error("oops: bad month name: " + s);
+ return static_cast<unsigned>(++m);
+}
+
+static
+std::chrono::seconds
+parse_unsigned_time(std::istream& in)
+{
+ using namespace std::chrono;
+ int x;
+ in >> x;
+ auto r = seconds{hours{x}};
+ if (!in.eof() && in.peek() == ':')
+ {
+ in.get();
+ in >> x;
+ r += minutes{x};
+ if (!in.eof() && in.peek() == ':')
+ {
+ in.get();
+ in >> x;
+ r += seconds{x};
+ }
+ }
+ return r;
+}
+
+static
+std::chrono::seconds
+parse_signed_time(std::istream& in)
+{
+ ws(in);
+ auto sign = 1;
+ if (in.peek() == '-')
+ {
+ sign = -1;
+ in.get();
+ }
+ else if (in.peek() == '+')
+ in.get();
+ return sign * parse_unsigned_time(in);
+}
+
+// MonthDayTime
+
+detail::MonthDayTime::MonthDayTime(local_seconds tp, tz timezone)
+ : zone_(timezone)
+{
+ using namespace date;
+ const auto dp = date::floor<days>(tp);
+ const auto hms = make_time(tp - dp);
+ const auto ymd = year_month_day(dp);
+ u = ymd.month() / ymd.day();
+ h_ = hms.hours();
+ m_ = hms.minutes();
+ s_ = hms.seconds();
+}
+
+detail::MonthDayTime::MonthDayTime(const date::month_day& md, tz timezone)
+ : zone_(timezone)
+{
+ u = md;
+}
+
+date::day
+detail::MonthDayTime::day() const
+{
+ switch (type_)
+ {
+ case month_day:
+ return u.month_day_.day();
+ case month_last_dow:
+ return date::day{31};
+ case lteq:
+ case gteq:
+ break;
+ }
+ return u.month_day_weekday_.month_day_.day();
+}
+
+date::month
+detail::MonthDayTime::month() const
+{
+ switch (type_)
+ {
+ case month_day:
+ return u.month_day_.month();
+ case month_last_dow:
+ return u.month_weekday_last_.month();
+ case lteq:
+ case gteq:
+ break;
+ }
+ return u.month_day_weekday_.month_day_.month();
+}
+
+int
+detail::MonthDayTime::compare(date::year y, const MonthDayTime& x, date::year yx,
+ std::chrono::seconds offset, std::chrono::minutes prev_save) const
+{
+ if (zone_ != x.zone_)
+ {
+ auto dp0 = to_sys_days(y);
+ auto dp1 = x.to_sys_days(yx);
+ if (std::abs((dp0-dp1).count()) > 1)
+ return dp0 < dp1 ? -1 : 1;
+ if (zone_ == tz::local)
+ {
+ auto tp0 = to_time_point(y) - prev_save;
+ if (x.zone_ == tz::utc)
+ tp0 -= offset;
+ auto tp1 = x.to_time_point(yx);
+ return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+ }
+ else if (zone_ == tz::standard)
+ {
+ auto tp0 = to_time_point(y);
+ auto tp1 = x.to_time_point(yx);
+ if (x.zone_ == tz::local)
+ tp1 -= prev_save;
+ else
+ tp0 -= offset;
+ return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+ }
+ // zone_ == tz::utc
+ auto tp0 = to_time_point(y);
+ auto tp1 = x.to_time_point(yx);
+ if (x.zone_ == tz::local)
+ tp1 -= offset + prev_save;
+ else
+ tp1 -= offset;
+ return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+ }
+ auto const t0 = to_time_point(y);
+ auto const t1 = x.to_time_point(yx);
+ return t0 < t1 ? -1 : t0 == t1 ? 0 : 1;
+}
+
+sys_seconds
+detail::MonthDayTime::to_sys(date::year y, std::chrono::seconds offset,
+ std::chrono::seconds save) const
+{
+ using namespace date;
+ using namespace std::chrono;
+ auto until_utc = to_time_point(y);
+ if (zone_ == tz::standard)
+ until_utc -= offset;
+ else if (zone_ == tz::local)
+ until_utc -= offset + save;
+ return until_utc;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const date::month_day& x)
+{
+ month_day_ = x;
+ return *this;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const date::month_weekday_last& x)
+{
+ month_weekday_last_ = x;
+ return *this;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const pair& x)
+{
+ month_day_weekday_ = x;
+ return *this;
+}
+
+date::sys_days
+detail::MonthDayTime::to_sys_days(date::year y) const
+{
+ using namespace std::chrono;
+ using namespace date;
+ switch (type_)
+ {
+ case month_day:
+ return sys_days(y/u.month_day_);
+ case month_last_dow:
+ return sys_days(y/u.month_weekday_last_);
+ case lteq:
+ {
+ auto const x = y/u.month_day_weekday_.month_day_;
+ auto const wd1 = weekday(static_cast<sys_days>(x));
+ auto const wd0 = u.month_day_weekday_.weekday_;
+ return sys_days(x) - (wd1-wd0);
+ }
+ case gteq:
+ break;
+ }
+ auto const x = y/u.month_day_weekday_.month_day_;
+ auto const wd1 = u.month_day_weekday_.weekday_;
+ auto const wd0 = weekday(static_cast<sys_days>(x));
+ return sys_days(x) + (wd1-wd0);
+}
+
+sys_seconds
+detail::MonthDayTime::to_time_point(date::year y) const
+{
+ // Add seconds first to promote to largest rep early to prevent overflow
+ return to_sys_days(y) + s_ + h_ + m_;
+}
+
+void
+detail::MonthDayTime::canonicalize(date::year y)
+{
+ using namespace std::chrono;
+ using namespace date;
+ switch (type_)
+ {
+ case month_day:
+ return;
+ case month_last_dow:
+ {
+ auto const ymd = year_month_day(sys_days(y/u.month_weekday_last_));
+ u.month_day_ = ymd.month()/ymd.day();
+ type_ = month_day;
+ return;
+ }
+ case lteq:
+ {
+ auto const x = y/u.month_day_weekday_.month_day_;
+ auto const wd1 = weekday(static_cast<sys_days>(x));
+ auto const wd0 = u.month_day_weekday_.weekday_;
+ auto const ymd = year_month_day(sys_days(x) - (wd1-wd0));
+ u.month_day_ = ymd.month()/ymd.day();
+ type_ = month_day;
+ return;
+ }
+ case gteq:
+ {
+ auto const x = y/u.month_day_weekday_.month_day_;
+ auto const wd1 = u.month_day_weekday_.weekday_;
+ auto const wd0 = weekday(static_cast<sys_days>(x));
+ auto const ymd = year_month_day(sys_days(x) + (wd1-wd0));
+ u.month_day_ = ymd.month()/ymd.day();
+ type_ = month_day;
+ return;
+ }
+ }
+}
+
+std::istream&
+detail::operator>>(std::istream& is, MonthDayTime& x)
+{
+ using namespace date;
+ using namespace std::chrono;
+ assert(((std::ios::failbit | std::ios::badbit) & is.exceptions()) ==
+ (std::ios::failbit | std::ios::badbit));
+ x = MonthDayTime{};
+ if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+ {
+ auto m = parse_month(is);
+ if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+ {
+ if (is.peek() == 'l')
+ {
+ for (int i = 0; i < 4; ++i)
+ is.get();
+ auto dow = parse_dow(is);
+ x.type_ = MonthDayTime::month_last_dow;
+ x.u = date::month(m)/weekday(dow)[last];
+ }
+ else if (std::isalpha(is.peek()))
+ {
+ auto dow = parse_dow(is);
+ char c{};
+ is >> c;
+ if (c == '<' || c == '>')
+ {
+ char c2{};
+ is >> c2;
+ if (c2 != '=')
+ throw std::runtime_error(std::string("bad operator: ") + c + c2);
+ int d;
+ is >> d;
+ if (d < 1 || d > 31)
+ throw std::runtime_error(std::string("bad operator: ") + c + c2
+ + std::to_string(d));
+ x.type_ = c == '<' ? MonthDayTime::lteq : MonthDayTime::gteq;
+ x.u = MonthDayTime::pair{ date::month(m) / d, date::weekday(dow) };
+ }
+ else
+ throw std::runtime_error(std::string("bad operator: ") + c);
+ }
+ else // if (std::isdigit(is.peek())
+ {
+ int d;
+ is >> d;
+ if (d < 1 || d > 31)
+ throw std::runtime_error(std::string("day of month: ")
+ + std::to_string(d));
+ x.type_ = MonthDayTime::month_day;
+ x.u = date::month(m)/d;
+ }
+ if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+ {
+ int t;
+ is >> t;
+ x.h_ = hours{t};
+ if (!is.eof() && is.peek() == ':')
+ {
+ is.get();
+ is >> t;
+ x.m_ = minutes{t};
+ if (!is.eof() && is.peek() == ':')
+ {
+ is.get();
+ is >> t;
+ x.s_ = seconds{t};
+ }
+ }
+ if (!is.eof() && std::isalpha(is.peek()))
+ {
+ char c;
+ is >> c;
+ switch (c)
+ {
+ case 's':
+ x.zone_ = tz::standard;
+ break;
+ case 'u':
+ x.zone_ = tz::utc;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ x.u = month{m}/1;
+ }
+ }
+ return is;
+}
+
+std::ostream&
+detail::operator<<(std::ostream& os, const MonthDayTime& x)
+{
+ switch (x.type_)
+ {
+ case MonthDayTime::month_day:
+ os << x.u.month_day_ << " ";
+ break;
+ case MonthDayTime::month_last_dow:
+ os << x.u.month_weekday_last_ << " ";
+ break;
+ case MonthDayTime::lteq:
+ os << x.u.month_day_weekday_.weekday_ << " on or before "
+ << x.u.month_day_weekday_.month_day_ << " ";
+ break;
+ case MonthDayTime::gteq:
+ if ((static_cast<unsigned>(x.day()) - 1) % 7 == 0)
+ {
+ os << (x.u.month_day_weekday_.month_day_.month() /
+ x.u.month_day_weekday_.weekday_[
+ (static_cast<unsigned>(x.day()) - 1)/7+1]) << " ";
+ }
+ else
+ {
+ os << x.u.month_day_weekday_.weekday_ << " on or after "
+ << x.u.month_day_weekday_.month_day_ << " ";
+ }
+ break;
+ }
+ os << date::make_time(x.s_ + x.h_ + x.m_);
+ if (x.zone_ == tz::utc)
+ os << "UTC ";
+ else if (x.zone_ == tz::standard)
+ os << "STD ";
+ else
+ os << " ";
+ return os;
+}
+
+// Rule
+
+detail::Rule::Rule(const std::string& s)
+{
+ try
+ {
+ using namespace date;
+ using namespace std::chrono;
+ std::istringstream in(s);
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::string word;
+ in >> word >> name_;
+ int x;
+ ws(in);
+ if (std::isalpha(in.peek()))
+ {
+ in >> word;
+ if (word == "min")
+ {
+ starting_year_ = year::min();
+ }
+ else
+ throw std::runtime_error("Didn't find expected word: " + word);
+ }
+ else
+ {
+ in >> x;
+ starting_year_ = year{x};
+ }
+ std::ws(in);
+ if (std::isalpha(in.peek()))
+ {
+ in >> word;
+ if (word == "only")
+ {
+ ending_year_ = starting_year_;
+ }
+ else if (word == "max")
+ {
+ ending_year_ = year::max();
+ }
+ else
+ throw std::runtime_error("Didn't find expected word: " + word);
+ }
+ else
+ {
+ in >> x;
+ ending_year_ = year{x};
+ }
+ in >> word; // TYPE (always "-")
+ assert(word == "-");
+ in >> starting_at_;
+ save_ = duration_cast<minutes>(parse_signed_time(in));
+ in >> abbrev_;
+ if (abbrev_ == "-")
+ abbrev_.clear();
+ assert(hours{-1} <= save_ && save_ <= hours{2});
+ }
+ catch (...)
+ {
+ std::cerr << s << '\n';
+ std::cerr << *this << '\n';
+ throw;
+ }
+}
+
+detail::Rule::Rule(const Rule& r, date::year starting_year, date::year ending_year)
+ : name_(r.name_)
+ , starting_year_(starting_year)
+ , ending_year_(ending_year)
+ , starting_at_(r.starting_at_)
+ , save_(r.save_)
+ , abbrev_(r.abbrev_)
+{
+}
+
+bool
+detail::operator==(const Rule& x, const Rule& y)
+{
+ if (std::tie(x.name_, x.save_, x.starting_year_, x.ending_year_) ==
+ std::tie(y.name_, y.save_, y.starting_year_, y.ending_year_))
+ return x.month() == y.month() && x.day() == y.day();
+ return false;
+}
+
+bool
+detail::operator<(const Rule& x, const Rule& y)
+{
+ using namespace std::chrono;
+ auto const xm = x.month();
+ auto const ym = y.month();
+ if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) <
+ std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
+ return true;
+ if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) >
+ std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
+ return false;
+ return x.day() < y.day();
+}
+
+bool
+detail::operator==(const Rule& x, const date::year& y)
+{
+ return x.starting_year_ <= y && y <= x.ending_year_;
+}
+
+bool
+detail::operator<(const Rule& x, const date::year& y)
+{
+ return x.ending_year_ < y;
+}
+
+bool
+detail::operator==(const date::year& x, const Rule& y)
+{
+ return y.starting_year_ <= x && x <= y.ending_year_;
+}
+
+bool
+detail::operator<(const date::year& x, const Rule& y)
+{
+ return x < y.starting_year_;
+}
+
+bool
+detail::operator==(const Rule& x, const std::string& y)
+{
+ return x.name() == y;
+}
+
+bool
+detail::operator<(const Rule& x, const std::string& y)
+{
+ return x.name() < y;
+}
+
+bool
+detail::operator==(const std::string& x, const Rule& y)
+{
+ return y.name() == x;
+}
+
+bool
+detail::operator<(const std::string& x, const Rule& y)
+{
+ return x < y.name();
+}
+
+std::ostream&
+detail::operator<<(std::ostream& os, const Rule& r)
+{
+ using namespace date;
+ using namespace std::chrono;
+ detail::save_ostream<char> _(os);
+ os.fill(' ');
+ os.flags(std::ios::dec | std::ios::left);
+ os.width(15);
+ os << r.name_;
+ os << r.starting_year_ << " " << r.ending_year_ << " ";
+ os << r.starting_at_;
+ if (r.save_ >= minutes{0})
+ os << ' ';
+ os << date::make_time(r.save_) << " ";
+ os << r.abbrev_;
+ return os;
+}
+
+date::day
+detail::Rule::day() const
+{
+ return starting_at_.day();
+}
+
+date::month
+detail::Rule::month() const
+{
+ return starting_at_.month();
+}
+
+struct find_rule_by_name
+{
+ bool operator()(const Rule& x, const std::string& nm) const
+ {
+ return x.name() < nm;
+ }
+
+ bool operator()(const std::string& nm, const Rule& x) const
+ {
+ return nm < x.name();
+ }
+};
+
+bool
+detail::Rule::overlaps(const Rule& x, const Rule& y)
+{
+ // assume x.starting_year_ <= y.starting_year_;
+ if (!(x.starting_year_ <= y.starting_year_))
+ {
+ std::cerr << x << '\n';
+ std::cerr << y << '\n';
+ assert(x.starting_year_ <= y.starting_year_);
+ }
+ if (y.starting_year_ > x.ending_year_)
+ return false;
+ return !(x.starting_year_ == y.starting_year_ && x.ending_year_ == y.ending_year_);
+}
+
+void
+detail::Rule::split(std::vector<Rule>& rules, std::size_t i, std::size_t k, std::size_t& e)
+{
+ using namespace date;
using difference_type = std::iterator_traits<std::vector<Rule>::iterator>::difference_type;
- // rules[i].starting_year_ <= rules[k].starting_year_ &&
- // rules[i].ending_year_ >= rules[k].starting_year_ &&
- // (rules[i].starting_year_ != rules[k].starting_year_ ||
- // rules[i].ending_year_ != rules[k].ending_year_)
- assert(rules[i].starting_year_ <= rules[k].starting_year_ &&
- rules[i].ending_year_ >= rules[k].starting_year_ &&
- (rules[i].starting_year_ != rules[k].starting_year_ ||
- rules[i].ending_year_ != rules[k].ending_year_));
- if (rules[i].starting_year_ == rules[k].starting_year_)
- {
- if (rules[k].ending_year_ < rules[i].ending_year_)
- {
- rules.insert(rules.begin() + static_cast<difference_type>(k+1),
- Rule(rules[i], rules[k].ending_year_ + years{1},
- std::move(rules[i].ending_year_)));
- ++e;
- rules[i].ending_year_ = rules[k].ending_year_;
- }
- else // rules[k].ending_year_ > rules[i].ending_year_
- {
- rules.insert(rules.begin() + static_cast<difference_type>(k+1),
- Rule(rules[k], rules[i].ending_year_ + years{1},
- std::move(rules[k].ending_year_)));
- ++e;
- rules[k].ending_year_ = rules[i].ending_year_;
- }
- }
- else // rules[i].starting_year_ < rules[k].starting_year_
- {
- if (rules[k].ending_year_ < rules[i].ending_year_)
- {
- rules.insert(rules.begin() + static_cast<difference_type>(k),
- Rule(rules[i], rules[k].starting_year_, rules[k].ending_year_));
- ++k;
- rules.insert(rules.begin() + static_cast<difference_type>(k+1),
- Rule(rules[i], rules[k].ending_year_ + years{1},
- std::move(rules[i].ending_year_)));
- rules[i].ending_year_ = rules[k].starting_year_ - years{1};
- e += 2;
- }
- else if (rules[k].ending_year_ > rules[i].ending_year_)
- {
- rules.insert(rules.begin() + static_cast<difference_type>(k),
- Rule(rules[i], rules[k].starting_year_, rules[i].ending_year_));
- ++k;
- rules.insert(rules.begin() + static_cast<difference_type>(k+1),
- Rule(rules[k], rules[i].ending_year_ + years{1},
- std::move(rules[k].ending_year_)));
- e += 2;
- rules[k].ending_year_ = std::move(rules[i].ending_year_);
- rules[i].ending_year_ = rules[k].starting_year_ - years{1};
- }
- else // rules[k].ending_year_ == rules[i].ending_year_
- {
- rules.insert(rules.begin() + static_cast<difference_type>(k),
- Rule(rules[i], rules[k].starting_year_,
- std::move(rules[i].ending_year_)));
- ++k;
- ++e;
- rules[i].ending_year_ = rules[k].starting_year_ - years{1};
- }
- }
-}
-
-void
-detail::Rule::split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e)
-{
+ // rules[i].starting_year_ <= rules[k].starting_year_ &&
+ // rules[i].ending_year_ >= rules[k].starting_year_ &&
+ // (rules[i].starting_year_ != rules[k].starting_year_ ||
+ // rules[i].ending_year_ != rules[k].ending_year_)
+ assert(rules[i].starting_year_ <= rules[k].starting_year_ &&
+ rules[i].ending_year_ >= rules[k].starting_year_ &&
+ (rules[i].starting_year_ != rules[k].starting_year_ ||
+ rules[i].ending_year_ != rules[k].ending_year_));
+ if (rules[i].starting_year_ == rules[k].starting_year_)
+ {
+ if (rules[k].ending_year_ < rules[i].ending_year_)
+ {
+ rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+ Rule(rules[i], rules[k].ending_year_ + years{1},
+ std::move(rules[i].ending_year_)));
+ ++e;
+ rules[i].ending_year_ = rules[k].ending_year_;
+ }
+ else // rules[k].ending_year_ > rules[i].ending_year_
+ {
+ rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+ Rule(rules[k], rules[i].ending_year_ + years{1},
+ std::move(rules[k].ending_year_)));
+ ++e;
+ rules[k].ending_year_ = rules[i].ending_year_;
+ }
+ }
+ else // rules[i].starting_year_ < rules[k].starting_year_
+ {
+ if (rules[k].ending_year_ < rules[i].ending_year_)
+ {
+ rules.insert(rules.begin() + static_cast<difference_type>(k),
+ Rule(rules[i], rules[k].starting_year_, rules[k].ending_year_));
+ ++k;
+ rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+ Rule(rules[i], rules[k].ending_year_ + years{1},
+ std::move(rules[i].ending_year_)));
+ rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+ e += 2;
+ }
+ else if (rules[k].ending_year_ > rules[i].ending_year_)
+ {
+ rules.insert(rules.begin() + static_cast<difference_type>(k),
+ Rule(rules[i], rules[k].starting_year_, rules[i].ending_year_));
+ ++k;
+ rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+ Rule(rules[k], rules[i].ending_year_ + years{1},
+ std::move(rules[k].ending_year_)));
+ e += 2;
+ rules[k].ending_year_ = std::move(rules[i].ending_year_);
+ rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+ }
+ else // rules[k].ending_year_ == rules[i].ending_year_
+ {
+ rules.insert(rules.begin() + static_cast<difference_type>(k),
+ Rule(rules[i], rules[k].starting_year_,
+ std::move(rules[i].ending_year_)));
+ ++k;
+ ++e;
+ rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+ }
+ }
+}
+
+void
+detail::Rule::split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e)
+{
using difference_type = std::iterator_traits<std::vector<Rule>::iterator>::difference_type;
- auto j = i;
- for (; i + 1 < e; ++i)
- {
- for (auto k = i + 1; k < e; ++k)
- {
- if (overlaps(rules[i], rules[k]))
- {
- split(rules, i, k, e);
- std::sort(rules.begin() + static_cast<difference_type>(i),
- rules.begin() + static_cast<difference_type>(e));
- }
- }
- }
- for (; j < e; ++j)
- {
- if (rules[j].starting_year() == rules[j].ending_year())
- rules[j].starting_at_.canonicalize(rules[j].starting_year());
- }
-}
-
-void
-detail::Rule::split_overlaps(std::vector<Rule>& rules)
-{
+ auto j = i;
+ for (; i + 1 < e; ++i)
+ {
+ for (auto k = i + 1; k < e; ++k)
+ {
+ if (overlaps(rules[i], rules[k]))
+ {
+ split(rules, i, k, e);
+ std::sort(rules.begin() + static_cast<difference_type>(i),
+ rules.begin() + static_cast<difference_type>(e));
+ }
+ }
+ }
+ for (; j < e; ++j)
+ {
+ if (rules[j].starting_year() == rules[j].ending_year())
+ rules[j].starting_at_.canonicalize(rules[j].starting_year());
+ }
+}
+
+void
+detail::Rule::split_overlaps(std::vector<Rule>& rules)
+{
using difference_type = std::iterator_traits<std::vector<Rule>::iterator>::difference_type;
- for (std::size_t i = 0; i < rules.size();)
- {
- auto e = static_cast<std::size_t>(std::upper_bound(
- rules.cbegin()+static_cast<difference_type>(i), rules.cend(), rules[i].name(),
- [](const std::string& nm, const Rule& x)
- {
- return nm < x.name();
- }) - rules.cbegin());
- split_overlaps(rules, i, e);
- auto first_rule = rules.begin() + static_cast<difference_type>(i);
- auto last_rule = rules.begin() + static_cast<difference_type>(e);
- auto t = std::lower_bound(first_rule, last_rule, min_year);
- if (t > first_rule+1)
- {
- if (t == last_rule || t->starting_year() >= min_year)
- --t;
- auto d = static_cast<std::size_t>(t - first_rule);
- rules.erase(first_rule, t);
- e -= d;
- }
- first_rule = rules.begin() + static_cast<difference_type>(i);
- last_rule = rules.begin() + static_cast<difference_type>(e);
- t = std::upper_bound(first_rule, last_rule, max_year);
- if (t != last_rule)
- {
- auto d = static_cast<std::size_t>(last_rule - t);
- rules.erase(t, last_rule);
- e -= d;
- }
- i = e;
- }
- rules.shrink_to_fit();
-}
-
-// Find the rule that comes chronologically before Rule r. For multi-year rules,
-// y specifies which rules in r. For single year rules, y is assumed to be equal
-// to the year specified by r.
-// Returns a pointer to the chronologically previous rule, and the year within
-// that rule. If there is no previous rule, returns nullptr and year::min().
-// Preconditions:
-// r->starting_year() <= y && y <= r->ending_year()
-static
-std::pair<const Rule*, date::year>
-find_previous_rule(const Rule* r, date::year y)
-{
- using namespace date;
- auto const& rules = get_tzdb().rules;
- if (y == r->starting_year())
- {
- if (r == &rules.front() || r->name() != r[-1].name())
- std::terminate(); // never called with first rule
- --r;
- if (y == r->starting_year())
- return {r, y};
- return {r, r->ending_year()};
- }
- if (r == &rules.front() || r->name() != r[-1].name() ||
- r[-1].starting_year() < r->starting_year())
- {
- while (r < &rules.back() && r->name() == r[1].name() &&
- r->starting_year() == r[1].starting_year())
- ++r;
- return {r, --y};
- }
- --r;
- return {r, y};
-}
-
-// Find the rule that comes chronologically after Rule r. For multi-year rules,
-// y specifies which rules in r. For single year rules, y is assumed to be equal
-// to the year specified by r.
-// Returns a pointer to the chronologically next rule, and the year within
-// that rule. If there is no next rule, return a pointer to a defaulted rule
-// and y+1.
-// Preconditions:
-// first <= r && r < last && r->starting_year() <= y && y <= r->ending_year()
-// [first, last) all have the same name
-static
-std::pair<const Rule*, date::year>
-find_next_rule(const Rule* first_rule, const Rule* last_rule, const Rule* r, date::year y)
-{
- using namespace date;
- if (y == r->ending_year())
- {
- if (r == last_rule-1)
- return {nullptr, year::max()};
- ++r;
- if (y == r->ending_year())
- return {r, y};
- return {r, r->starting_year()};
- }
- if (r == last_rule-1 || r->ending_year() < r[1].ending_year())
- {
- while (r > first_rule && r->starting_year() == r[-1].starting_year())
- --r;
- return {r, ++y};
- }
- ++r;
- return {r, y};
-}
-
-// Find the rule that comes chronologically after Rule r. For multi-year rules,
-// y specifies which rules in r. For single year rules, y is assumed to be equal
-// to the year specified by r.
-// Returns a pointer to the chronologically next rule, and the year within
-// that rule. If there is no next rule, return nullptr and year::max().
-// Preconditions:
-// r->starting_year() <= y && y <= r->ending_year()
-static
-std::pair<const Rule*, date::year>
-find_next_rule(const Rule* r, date::year y)
-{
- using namespace date;
- auto const& rules = get_tzdb().rules;
- if (y == r->ending_year())
- {
- if (r == &rules.back() || r->name() != r[1].name())
- return {nullptr, year::max()};
- ++r;
- if (y == r->ending_year())
- return {r, y};
- return {r, r->starting_year()};
- }
- if (r == &rules.back() || r->name() != r[1].name() ||
- r->ending_year() < r[1].ending_year())
- {
- while (r > &rules.front() && r->name() == r[-1].name() &&
- r->starting_year() == r[-1].starting_year())
- --r;
- return {r, ++y};
- }
- ++r;
- return {r, y};
-}
-
-static
-const Rule*
-find_first_std_rule(const std::pair<const Rule*, const Rule*>& eqr)
-{
- auto r = eqr.first;
- auto ry = r->starting_year();
- while (r->save() != std::chrono::minutes{0})
- {
- std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
- if (r == nullptr)
- throw std::runtime_error("Could not find standard offset in rule "
- + eqr.first->name());
- }
- return r;
-}
-
-static
-std::pair<const Rule*, date::year>
-find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
- const date::year& y, const std::chrono::seconds& offset,
- const MonthDayTime& mdt)
-{
- assert(eqr.first != nullptr);
- assert(eqr.second != nullptr);
-
- using namespace std::chrono;
- using namespace date;
- auto r = eqr.first;
- auto ry = r->starting_year();
- auto prev_save = minutes{0};
- auto prev_year = year::min();
- const Rule* prev_rule = nullptr;
- while (r != nullptr)
- {
- if (mdt.compare(y, r->mdt(), ry, offset, prev_save) <= 0)
- break;
- prev_rule = r;
- prev_year = ry;
- prev_save = prev_rule->save();
- std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
- }
- return {prev_rule, prev_year};
-}
-
-static
-std::pair<const Rule*, date::year>
-find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
- const sys_seconds& tp_utc,
- const local_seconds& tp_std,
- const local_seconds& tp_loc)
-{
- using namespace std::chrono;
- using namespace date;
- auto r = eqr.first;
- auto ry = r->starting_year();
- auto prev_save = minutes{0};
- auto prev_year = year::min();
- const Rule* prev_rule = nullptr;
- while (r != nullptr)
- {
- bool found = false;
- switch (r->mdt().zone())
- {
- case tz::utc:
- found = tp_utc < r->mdt().to_time_point(ry);
- break;
- case tz::standard:
- found = sys_seconds{tp_std.time_since_epoch()} < r->mdt().to_time_point(ry);
- break;
- case tz::local:
- found = sys_seconds{tp_loc.time_since_epoch()} < r->mdt().to_time_point(ry);
- break;
- }
- if (found)
- break;
- prev_rule = r;
- prev_year = ry;
- prev_save = prev_rule->save();
- std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
- }
- return {prev_rule, prev_year};
-}
-
-static
-sys_info
-find_rule(const std::pair<const Rule*, date::year>& first_rule,
- const std::pair<const Rule*, date::year>& last_rule,
- const date::year& y, const std::chrono::seconds& offset,
- const MonthDayTime& mdt, const std::chrono::minutes& initial_save,
- const std::string& initial_abbrev)
-{
- using namespace std::chrono;
- using namespace date;
- auto r = first_rule.first;
- auto ry = first_rule.second;
- sys_info x{sys_days(year::min()/min_day), sys_days(year::max()/max_day),
- seconds{0}, initial_save, initial_abbrev};
- while (r != nullptr)
- {
- auto tr = r->mdt().to_sys(ry, offset, x.save);
- auto tx = mdt.to_sys(y, offset, x.save);
- // Find last rule where tx >= tr
- if (tx <= tr || (r == last_rule.first && ry == last_rule.second))
- {
- if (tx < tr && r == first_rule.first && ry == first_rule.second)
- {
- x.end = r->mdt().to_sys(ry, offset, x.save);
- break;
- }
- if (tx < tr)
- {
- std::tie(r, ry) = find_previous_rule(r, ry); // can't return nullptr for r
- assert(r != nullptr);
- }
- // r != nullptr && tx >= tr (if tr were to be recomputed)
- auto prev_save = initial_save;
- if (!(r == first_rule.first && ry == first_rule.second))
- prev_save = find_previous_rule(r, ry).first->save();
- x.begin = r->mdt().to_sys(ry, offset, prev_save);
- x.save = r->save();
- x.abbrev = r->abbrev();
- if (!(r == last_rule.first && ry == last_rule.second))
- {
- std::tie(r, ry) = find_next_rule(r, ry); // can't return nullptr for r
- assert(r != nullptr);
- x.end = r->mdt().to_sys(ry, offset, x.save);
- }
- else
- x.end = sys_days(year::max()/max_day);
- break;
- }
- x.save = r->save();
- std::tie(r, ry) = find_next_rule(r, ry); // Can't return nullptr for r
- assert(r != nullptr);
- }
- return x;
-}
-
-// zonelet
-
-detail::zonelet::~zonelet()
-{
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- using minutes = std::chrono::minutes;
- using string = std::string;
- if (tag_ == has_save)
- u.save_.~minutes();
- else
- u.rule_.~string();
-#endif
-}
-
-detail::zonelet::zonelet()
-{
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- ::new(&u.rule_) std::string();
-#endif
-}
-
-detail::zonelet::zonelet(const zonelet& i)
- : gmtoff_(i.gmtoff_)
- , tag_(i.tag_)
- , format_(i.format_)
- , until_year_(i.until_year_)
- , until_date_(i.until_date_)
- , until_utc_(i.until_utc_)
- , until_std_(i.until_std_)
- , until_loc_(i.until_loc_)
- , initial_save_(i.initial_save_)
- , initial_abbrev_(i.initial_abbrev_)
- , first_rule_(i.first_rule_)
- , last_rule_(i.last_rule_)
-{
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- if (tag_ == has_save)
- ::new(&u.save_) std::chrono::minutes(i.u.save_);
- else
- ::new(&u.rule_) std::string(i.u.rule_);
-#else
- if (tag_ == has_save)
- u.save_ = i.u.save_;
- else
- u.rule_ = i.u.rule_;
-#endif
-}
-
-#endif // !USE_OS_TZDB
-
-// time_zone
-
-#if USE_OS_TZDB
-
-time_zone::time_zone(const std::string& s, detail::undocumented)
- : name_(s)
- , adjusted_(new std::once_flag{})
-{
-}
-
-enum class endian
-{
- native = __BYTE_ORDER__,
- little = __ORDER_LITTLE_ENDIAN__,
- big = __ORDER_BIG_ENDIAN__
-};
-
-static
-inline
-std::uint32_t
-reverse_bytes(std::uint32_t i)
-{
- return
- (i & 0xff000000u) >> 24 |
- (i & 0x00ff0000u) >> 8 |
- (i & 0x0000ff00u) << 8 |
- (i & 0x000000ffu) << 24;
-}
-
-static
-inline
-std::uint64_t
-reverse_bytes(std::uint64_t i)
-{
- return
- (i & 0xff00000000000000ull) >> 56 |
- (i & 0x00ff000000000000ull) >> 40 |
- (i & 0x0000ff0000000000ull) >> 24 |
- (i & 0x000000ff00000000ull) >> 8 |
- (i & 0x00000000ff000000ull) << 8 |
- (i & 0x0000000000ff0000ull) << 24 |
- (i & 0x000000000000ff00ull) << 40 |
- (i & 0x00000000000000ffull) << 56;
-}
-
-template <class T>
-static
-inline
-void
-maybe_reverse_bytes(T&, std::false_type)
-{
-}
-
-static
-inline
-void
-maybe_reverse_bytes(std::int32_t& t, std::true_type)
-{
- t = static_cast<std::int32_t>(reverse_bytes(static_cast<std::uint32_t>(t)));
-}
-
-static
-inline
-void
-maybe_reverse_bytes(std::int64_t& t, std::true_type)
-{
- t = static_cast<std::int64_t>(reverse_bytes(static_cast<std::uint64_t>(t)));
-}
-
-template <class T>
-static
-inline
-void
-maybe_reverse_bytes(T& t)
-{
- maybe_reverse_bytes(t, std::integral_constant<bool,
- endian::native == endian::little>{});
-}
-
-static
-void
-load_header(std::istream& inf)
-{
- // Read TZif
- auto t = inf.get();
- auto z = inf.get();
- auto i = inf.get();
- auto f = inf.get();
-#ifndef NDEBUG
- assert(t == 'T');
- assert(z == 'Z');
- assert(i == 'i');
- assert(f == 'f');
-#else
- (void)t;
- (void)z;
- (void)i;
- (void)f;
-#endif
-}
-
-static
-unsigned char
-load_version(std::istream& inf)
-{
- // Read version
- auto v = inf.get();
- assert(v != EOF);
- return static_cast<unsigned char>(v);
-}
-
-static
-void
-skip_reserve(std::istream& inf)
-{
- inf.ignore(15);
-}
-
-static
-void
-load_counts(std::istream& inf,
- std::int32_t& tzh_ttisgmtcnt, std::int32_t& tzh_ttisstdcnt,
- std::int32_t& tzh_leapcnt, std::int32_t& tzh_timecnt,
- std::int32_t& tzh_typecnt, std::int32_t& tzh_charcnt)
-{
- // Read counts;
- inf.read(reinterpret_cast<char*>(&tzh_ttisgmtcnt), 4);
- maybe_reverse_bytes(tzh_ttisgmtcnt);
- inf.read(reinterpret_cast<char*>(&tzh_ttisstdcnt), 4);
- maybe_reverse_bytes(tzh_ttisstdcnt);
- inf.read(reinterpret_cast<char*>(&tzh_leapcnt), 4);
- maybe_reverse_bytes(tzh_leapcnt);
- inf.read(reinterpret_cast<char*>(&tzh_timecnt), 4);
- maybe_reverse_bytes(tzh_timecnt);
- inf.read(reinterpret_cast<char*>(&tzh_typecnt), 4);
- maybe_reverse_bytes(tzh_typecnt);
- inf.read(reinterpret_cast<char*>(&tzh_charcnt), 4);
- maybe_reverse_bytes(tzh_charcnt);
-}
-
-template <class TimeType>
-static
-std::vector<detail::transition>
-load_transitions(std::istream& inf, std::int32_t tzh_timecnt)
-{
- // Read transitions
- using namespace std::chrono;
- std::vector<detail::transition> transitions;
- transitions.reserve(static_cast<unsigned>(tzh_timecnt));
- for (std::int32_t i = 0; i < tzh_timecnt; ++i)
- {
- TimeType t;
- inf.read(reinterpret_cast<char*>(&t), sizeof(t));
- maybe_reverse_bytes(t);
- transitions.emplace_back(sys_seconds{seconds{t}});
- if (transitions.back().timepoint < min_seconds)
- transitions.back().timepoint = min_seconds;
- }
- return transitions;
-}
-
-static
-std::vector<std::uint8_t>
-load_indices(std::istream& inf, std::int32_t tzh_timecnt)
-{
- // Read indices
- std::vector<std::uint8_t> indices;
- indices.reserve(static_cast<unsigned>(tzh_timecnt));
- for (std::int32_t i = 0; i < tzh_timecnt; ++i)
- {
- std::uint8_t t;
- inf.read(reinterpret_cast<char*>(&t), sizeof(t));
- indices.emplace_back(t);
- }
- return indices;
-}
-
-static
-std::vector<ttinfo>
-load_ttinfo(std::istream& inf, std::int32_t tzh_typecnt)
-{
- // Read ttinfo
- std::vector<ttinfo> ttinfos;
- ttinfos.reserve(static_cast<unsigned>(tzh_typecnt));
- for (std::int32_t i = 0; i < tzh_typecnt; ++i)
- {
- ttinfo t;
- inf.read(reinterpret_cast<char*>(&t), 6);
- maybe_reverse_bytes(t.tt_gmtoff);
- ttinfos.emplace_back(t);
- }
- return ttinfos;
-}
-
-static
-std::string
-load_abbreviations(std::istream& inf, std::int32_t tzh_charcnt)
-{
- // Read abbreviations
- std::string abbrev;
- abbrev.resize(static_cast<unsigned>(tzh_charcnt), '\0');
- inf.read(&abbrev[0], tzh_charcnt);
- return abbrev;
-}
-
-#if !MISSING_LEAP_SECONDS
-
-template <class TimeType>
-static
-std::vector<leap_second>
-load_leaps(std::istream& inf, std::int32_t tzh_leapcnt)
-{
- // Read tzh_leapcnt pairs
- using namespace std::chrono;
- std::vector<leap_second> leap_seconds;
- leap_seconds.reserve(static_cast<std::size_t>(tzh_leapcnt));
- for (std::int32_t i = 0; i < tzh_leapcnt; ++i)
- {
- TimeType t0;
- std::int32_t t1;
- inf.read(reinterpret_cast<char*>(&t0), sizeof(t0));
- inf.read(reinterpret_cast<char*>(&t1), sizeof(t1));
- maybe_reverse_bytes(t0);
- maybe_reverse_bytes(t1);
- leap_seconds.emplace_back(sys_seconds{seconds{t0 - (t1-1)}},
- detail::undocumented{});
- }
- return leap_seconds;
-}
-
-template <class TimeType>
-static
-std::vector<leap_second>
-load_leap_data(std::istream& inf,
- std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
- std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
-{
- inf.ignore(tzh_timecnt*static_cast<std::int32_t>(sizeof(TimeType)) + tzh_timecnt +
- tzh_typecnt*6 + tzh_charcnt);
- return load_leaps<TimeType>(inf, tzh_leapcnt);
-}
-
-static
-std::vector<leap_second>
-load_just_leaps(std::istream& inf)
-{
- // Read tzh_leapcnt pairs
- using namespace std::chrono;
- load_header(inf);
- auto v = load_version(inf);
- std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt;
- skip_reserve(inf);
- load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt);
- if (v == 0)
- return load_leap_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
- tzh_charcnt);
-#if !defined(NDEBUG)
- inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
- tzh_ttisstdcnt + tzh_ttisgmtcnt);
- load_header(inf);
- auto v2 = load_version(inf);
- assert(v == v2);
- skip_reserve(inf);
-#else // defined(NDEBUG)
- inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
- tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
-#endif // defined(NDEBUG)
- load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt);
- return load_leap_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
- tzh_charcnt);
-}
-
-#endif // !MISSING_LEAP_SECONDS
-
-template <class TimeType>
-void
-time_zone::load_data(std::istream& inf,
- std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
- std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
-{
- using namespace std::chrono;
- transitions_ = load_transitions<TimeType>(inf, tzh_timecnt);
- auto indices = load_indices(inf, tzh_timecnt);
- auto infos = load_ttinfo(inf, tzh_typecnt);
- auto abbrev = load_abbreviations(inf, tzh_charcnt);
-#if !MISSING_LEAP_SECONDS
- auto& leap_seconds = get_tzdb_list().front().leap_seconds;
- if (leap_seconds.empty() && tzh_leapcnt > 0)
- leap_seconds = load_leaps<TimeType>(inf, tzh_leapcnt);
-#endif
- ttinfos_.reserve(infos.size());
- for (auto& info : infos)
- {
- ttinfos_.push_back({seconds{info.tt_gmtoff},
- abbrev.c_str() + info.tt_abbrind,
- info.tt_isdst != 0});
- }
- auto i = 0u;
- if (transitions_.empty() || transitions_.front().timepoint != min_seconds)
- {
- transitions_.emplace(transitions_.begin(), min_seconds);
- auto tf = std::find_if(ttinfos_.begin(), ttinfos_.end(),
- [](const expanded_ttinfo& ti)
- {return ti.is_dst == 0;});
- if (tf == ttinfos_.end())
- tf = ttinfos_.begin();
- transitions_[i].info = &*tf;
- ++i;
- }
- for (auto j = 0u; i < transitions_.size(); ++i, ++j)
- transitions_[i].info = ttinfos_.data() + indices[j];
-}
-
-void
-time_zone::init_impl()
-{
- using namespace std;
- using namespace std::chrono;
- auto name = get_tz_dir() + ('/' + name_);
- std::ifstream inf(name);
- if (!inf.is_open())
- throw std::runtime_error{"Unable to open " + name};
- inf.exceptions(std::ios::failbit | std::ios::badbit);
- load_header(inf);
- auto v = load_version(inf);
- std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt;
- skip_reserve(inf);
- load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt);
- if (v == 0)
- {
- load_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
- }
- else
- {
-#if !defined(NDEBUG)
- inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
- tzh_ttisstdcnt + tzh_ttisgmtcnt);
- load_header(inf);
- auto v2 = load_version(inf);
- assert(v == v2);
- skip_reserve(inf);
-#else // defined(NDEBUG)
- inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
- tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
-#endif // defined(NDEBUG)
- load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
- tzh_timecnt, tzh_typecnt, tzh_charcnt);
- load_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
- }
-#if !MISSING_LEAP_SECONDS
- if (tzh_leapcnt > 0)
- {
- auto& leap_seconds = get_tzdb_list().front().leap_seconds;
- auto itr = leap_seconds.begin();
- auto l = itr->date();
- seconds leap_count{0};
- for (auto t = std::upper_bound(transitions_.begin(), transitions_.end(), l,
- [](const sys_seconds& x, const transition& ct)
- {
- return x < ct.timepoint;
- });
- t != transitions_.end(); ++t)
- {
- while (t->timepoint >= l)
- {
- ++leap_count;
- if (++itr == leap_seconds.end())
- l = sys_days(max_year/max_day);
- else
- l = itr->date() + leap_count;
- }
- t->timepoint -= leap_count;
- }
- }
-#endif // !MISSING_LEAP_SECONDS
- auto b = transitions_.begin();
- auto i = transitions_.end();
- if (i != b)
- {
- for (--i; i != b; --i)
- {
- if (i->info->offset == i[-1].info->offset &&
- i->info->abbrev == i[-1].info->abbrev &&
- i->info->is_dst == i[-1].info->is_dst)
- i = transitions_.erase(i);
- }
- }
-}
-
-void
-time_zone::init() const
-{
- std::call_once(*adjusted_, [this]() {const_cast<time_zone*>(this)->init_impl();});
-}
-
-sys_info
-time_zone::load_sys_info(std::vector<detail::transition>::const_iterator i) const
-{
- using namespace std::chrono;
- assert(!transitions_.empty());
- assert(i != transitions_.begin());
- sys_info r;
- r.begin = i[-1].timepoint;
- r.end = i != transitions_.end() ? i->timepoint :
- sys_seconds(sys_days(year::max()/max_day));
- r.offset = i[-1].info->offset;
- r.save = i[-1].info->is_dst ? minutes{1} : minutes{0};
- r.abbrev = i[-1].info->abbrev;
- return r;
-}
-
-sys_info
-time_zone::get_info_impl(sys_seconds tp) const
-{
- using namespace std;
- init();
- return load_sys_info(upper_bound(transitions_.begin(), transitions_.end(), tp,
- [](const sys_seconds& x, const transition& t)
- {
- return x < t.timepoint;
- }));
-}
-
-local_info
-time_zone::get_info_impl(local_seconds tp) const
-{
- using namespace std::chrono;
- init();
- local_info i;
- i.result = local_info::unique;
- auto tr = upper_bound(transitions_.begin(), transitions_.end(), tp,
- [](const local_seconds& x, const transition& t)
- {
- return sys_seconds{x.time_since_epoch()} -
- t.info->offset < t.timepoint;
- });
- i.first = load_sys_info(tr);
- auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
- if (tps < i.first.begin + days{1} && tr != transitions_.begin())
- {
- i.second = load_sys_info(--tr);
- tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
- if (tps < i.second.end)
- {
- i.result = local_info::ambiguous;
- std::swap(i.first, i.second);
- }
- else
- {
- i.second = {};
- }
- }
- else if (tps >= i.first.end && tr != transitions_.end())
- {
- i.second = load_sys_info(++tr);
- tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
- if (tps < i.second.begin)
- i.result = local_info::nonexistent;
- else
- i.second = {};
- }
- return i;
-}
-
-std::ostream&
-operator<<(std::ostream& os, const time_zone& z)
-{
- using namespace std::chrono;
- z.init();
- os << z.name_ << '\n';
- os << "Initially: ";
- auto const& t = z.transitions_.front();
- if (t.info->offset >= seconds{0})
- os << '+';
- os << make_time(t.info->offset);
- if (t.info->is_dst > 0)
- os << " daylight ";
- else
- os << " standard ";
- os << t.info->abbrev << '\n';
- for (auto i = std::next(z.transitions_.cbegin()); i < z.transitions_.cend(); ++i)
- os << *i << '\n';
- return os;
-}
-
-#if !MISSING_LEAP_SECONDS
-
-leap_second::leap_second(const sys_seconds& s, detail::undocumented)
- : date_(s)
-{
-}
-
-#endif // !MISSING_LEAP_SECONDS
-
-#else // !USE_OS_TZDB
-
-time_zone::time_zone(const std::string& s, detail::undocumented)
- : adjusted_(new std::once_flag{})
-{
- try
- {
- using namespace date;
- std::istringstream in(s);
- in.exceptions(std::ios::failbit | std::ios::badbit);
- std::string word;
- in >> word >> name_;
- parse_info(in);
- }
- catch (...)
- {
- std::cerr << s << '\n';
- std::cerr << *this << '\n';
- zonelets_.pop_back();
- throw;
- }
-}
-
-sys_info
-time_zone::get_info_impl(sys_seconds tp) const
-{
- return get_info_impl(tp, static_cast<int>(tz::utc));
-}
-
-local_info
-time_zone::get_info_impl(local_seconds tp) const
-{
- using namespace std::chrono;
- local_info i{};
- i.first = get_info_impl(sys_seconds{tp.time_since_epoch()}, static_cast<int>(tz::local));
- auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
- if (tps < i.first.begin)
- {
- i.second = std::move(i.first);
- i.first = get_info_impl(i.second.begin - seconds{1}, static_cast<int>(tz::utc));
- i.result = local_info::nonexistent;
- }
- else if (i.first.end - tps <= days{1})
- {
- i.second = get_info_impl(i.first.end, static_cast<int>(tz::utc));
- tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
- if (tps >= i.second.begin)
- i.result = local_info::ambiguous;
- else
- i.second = {};
- }
- return i;
-}
-
-void
-time_zone::add(const std::string& s)
-{
- try
- {
- std::istringstream in(s);
- in.exceptions(std::ios::failbit | std::ios::badbit);
- ws(in);
- if (!in.eof() && in.peek() != '#')
- parse_info(in);
- }
- catch (...)
- {
- std::cerr << s << '\n';
- std::cerr << *this << '\n';
- zonelets_.pop_back();
- throw;
- }
-}
-
-void
-time_zone::parse_info(std::istream& in)
-{
- using namespace date;
- using namespace std::chrono;
- zonelets_.emplace_back();
- auto& zonelet = zonelets_.back();
- zonelet.gmtoff_ = parse_signed_time(in);
- in >> zonelet.u.rule_;
- if (zonelet.u.rule_ == "-")
- zonelet.u.rule_.clear();
- in >> zonelet.format_;
- if (!in.eof())
- ws(in);
- if (in.eof() || in.peek() == '#')
- {
- zonelet.until_year_ = year::max();
- zonelet.until_date_ = MonthDayTime(max_day, tz::utc);
- }
- else
- {
- int y;
- in >> y;
- zonelet.until_year_ = year{y};
- in >> zonelet.until_date_;
- zonelet.until_date_.canonicalize(zonelet.until_year_);
- }
- if ((zonelet.until_year_ < min_year) ||
- (zonelets_.size() > 1 && zonelets_.end()[-2].until_year_ > max_year))
- zonelets_.pop_back();
-}
-
-void
-time_zone::adjust_infos(const std::vector<Rule>& rules)
-{
- using namespace std::chrono;
- using namespace date;
- const zonelet* prev_zonelet = nullptr;
- for (auto& z : zonelets_)
- {
- std::pair<const Rule*, const Rule*> eqr{};
- std::istringstream in;
- in.exceptions(std::ios::failbit | std::ios::badbit);
- // Classify info as rule-based, has save, or neither
- if (!z.u.rule_.empty())
- {
- // Find out if this zonelet has a rule or a save
- eqr = std::equal_range(rules.data(), rules.data() + rules.size(), z.u.rule_);
- if (eqr.first == eqr.second)
- {
- // The rule doesn't exist. Assume this is a save
- try
- {
- using namespace std::chrono;
- using string = std::string;
- in.str(z.u.rule_);
- auto tmp = duration_cast<minutes>(parse_signed_time(in));
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- z.u.rule_.~string();
- z.tag_ = zonelet::has_save;
- ::new(&z.u.save_) minutes(tmp);
-#else
- z.u.rule_.clear();
- z.tag_ = zonelet::has_save;
- z.u.save_ = tmp;
-#endif
- }
- catch (...)
- {
- std::cerr << name_ << " : " << z.u.rule_ << '\n';
- throw;
- }
- }
- }
- else
- {
- // This zone::zonelet has no rule and no save
- z.tag_ = zonelet::is_empty;
- }
-
- minutes final_save{0};
- if (z.tag_ == zonelet::has_save)
- {
- final_save = z.u.save_;
- }
- else if (z.tag_ == zonelet::has_rule)
- {
- z.last_rule_ = find_rule_for_zone(eqr, z.until_year_, z.gmtoff_,
- z.until_date_);
- if (z.last_rule_.first != nullptr)
- final_save = z.last_rule_.first->save();
- }
- z.until_utc_ = z.until_date_.to_sys(z.until_year_, z.gmtoff_, final_save);
- z.until_std_ = local_seconds{z.until_utc_.time_since_epoch()} + z.gmtoff_;
- z.until_loc_ = z.until_std_ + final_save;
-
- if (z.tag_ == zonelet::has_rule)
- {
- if (prev_zonelet != nullptr)
- {
- z.first_rule_ = find_rule_for_zone(eqr, prev_zonelet->until_utc_,
- prev_zonelet->until_std_,
- prev_zonelet->until_loc_);
- if (z.first_rule_.first != nullptr)
- {
- z.initial_save_ = z.first_rule_.first->save();
- z.initial_abbrev_ = z.first_rule_.first->abbrev();
- if (z.first_rule_ != z.last_rule_)
- {
- z.first_rule_ = find_next_rule(eqr.first, eqr.second,
- z.first_rule_.first,
- z.first_rule_.second);
- }
- else
- {
- z.first_rule_ = std::make_pair(nullptr, year::min());
- z.last_rule_ = std::make_pair(nullptr, year::max());
- }
- }
- }
- if (z.first_rule_.first == nullptr && z.last_rule_.first != nullptr)
- {
- z.first_rule_ = std::make_pair(eqr.first, eqr.first->starting_year());
- z.initial_abbrev_ = find_first_std_rule(eqr)->abbrev();
- }
- }
-
-#ifndef NDEBUG
- if (z.first_rule_.first == nullptr)
- {
- assert(z.first_rule_.second == year::min());
- assert(z.last_rule_.first == nullptr);
- assert(z.last_rule_.second == year::max());
- }
- else
- {
- assert(z.last_rule_.first != nullptr);
- }
-#endif
- prev_zonelet = &z;
- }
-}
-
-static
-std::string
-format_abbrev(std::string format, const std::string& variable, std::chrono::seconds off,
- std::chrono::minutes save)
-{
- using namespace std::chrono;
- auto k = format.find("%s");
- if (k != std::string::npos)
- {
- format.replace(k, 2, variable);
- }
- else
- {
- k = format.find('/');
- if (k != std::string::npos)
- {
- if (save == minutes{0})
- format.erase(k);
- else
- format.erase(0, k+1);
- }
- else
- {
- k = format.find("%z");
- if (k != std::string::npos)
- {
- std::string temp;
- if (off < seconds{0})
- {
- temp = '-';
- off = -off;
- }
- else
- temp = '+';
- auto h = date::floor<hours>(off);
- off -= h;
- if (h < hours{10})
- temp += '0';
- temp += std::to_string(h.count());
- if (off > seconds{0})
- {
- auto m = date::floor<minutes>(off);
- off -= m;
- if (m < minutes{10})
- temp += '0';
- temp += std::to_string(m.count());
- if (off > seconds{0})
- {
- if (off < seconds{10})
- temp += '0';
- temp += std::to_string(off.count());
- }
- }
- format.replace(k, 2, temp);
- }
- }
- }
- return format;
-}
-
-sys_info
-time_zone::get_info_impl(sys_seconds tp, int tz_int) const
-{
- using namespace std::chrono;
- using namespace date;
- tz timezone = static_cast<tz>(tz_int);
- assert(timezone != tz::standard);
- auto y = year_month_day(floor<days>(tp)).year();
- if (y < min_year || y > max_year)
- throw std::runtime_error("The year " + std::to_string(static_cast<int>(y)) +
- " is out of range:[" + std::to_string(static_cast<int>(min_year)) + ", "
- + std::to_string(static_cast<int>(max_year)) + "]");
- std::call_once(*adjusted_,
- [this]()
- {
- const_cast<time_zone*>(this)->adjust_infos(get_tzdb().rules);
- });
- auto i = std::upper_bound(zonelets_.begin(), zonelets_.end(), tp,
- [timezone](sys_seconds t, const zonelet& zl)
- {
- return timezone == tz::utc ? t < zl.until_utc_ :
- t < sys_seconds{zl.until_loc_.time_since_epoch()};
- });
-
- sys_info r{};
- if (i != zonelets_.end())
- {
- if (i->tag_ == zonelet::has_save)
- {
- if (i != zonelets_.begin())
- r.begin = i[-1].until_utc_;
- else
- r.begin = sys_days(year::min()/min_day);
- r.end = i->until_utc_;
- r.offset = i->gmtoff_ + i->u.save_;
- r.save = i->u.save_;
- }
- else if (i->u.rule_.empty())
- {
- if (i != zonelets_.begin())
- r.begin = i[-1].until_utc_;
- else
- r.begin = sys_days(year::min()/min_day);
- r.end = i->until_utc_;
- r.offset = i->gmtoff_;
- }
- else
- {
- r = find_rule(i->first_rule_, i->last_rule_, y, i->gmtoff_,
- MonthDayTime(local_seconds{tp.time_since_epoch()}, timezone),
- i->initial_save_, i->initial_abbrev_);
- r.offset = i->gmtoff_ + r.save;
- if (i != zonelets_.begin() && r.begin < i[-1].until_utc_)
- r.begin = i[-1].until_utc_;
- if (r.end > i->until_utc_)
- r.end = i->until_utc_;
- }
- r.abbrev = format_abbrev(i->format_, r.abbrev, r.offset, r.save);
- assert(r.begin < r.end);
- }
- return r;
-}
-
-std::ostream&
-operator<<(std::ostream& os, const time_zone& z)
-{
- using namespace date;
- using namespace std::chrono;
- detail::save_ostream<char> _(os);
- os.fill(' ');
- os.flags(std::ios::dec | std::ios::left);
- std::call_once(*z.adjusted_,
- [&z]()
- {
- const_cast<time_zone&>(z).adjust_infos(get_tzdb().rules);
- });
- os.width(35);
- os << z.name_;
- std::string indent;
- for (auto const& s : z.zonelets_)
- {
- os << indent;
- if (s.gmtoff_ >= seconds{0})
- os << ' ';
- os << make_time(s.gmtoff_) << " ";
- os.width(15);
- if (s.tag_ != zonelet::has_save)
- os << s.u.rule_;
- else
- {
- std::ostringstream tmp;
- tmp << make_time(s.u.save_);
- os << tmp.str();
- }
- os.width(8);
- os << s.format_ << " ";
- os << s.until_year_ << ' ' << s.until_date_;
- os << " " << s.until_utc_ << " UTC";
- os << " " << s.until_std_ << " STD";
- os << " " << s.until_loc_;
- os << " " << make_time(s.initial_save_);
- os << " " << s.initial_abbrev_;
- if (s.first_rule_.first != nullptr)
- os << " {" << *s.first_rule_.first << ", " << s.first_rule_.second << '}';
- else
- os << " {" << "nullptr" << ", " << s.first_rule_.second << '}';
- if (s.last_rule_.first != nullptr)
- os << " {" << *s.last_rule_.first << ", " << s.last_rule_.second << '}';
- else
- os << " {" << "nullptr" << ", " << s.last_rule_.second << '}';
- os << '\n';
- if (indent.empty())
- indent = std::string(35, ' ');
- }
- return os;
-}
-
-#endif // !USE_OS_TZDB
-
-#if !MISSING_LEAP_SECONDS
-
-std::ostream&
-operator<<(std::ostream& os, const leap_second& x)
-{
- using namespace date;
- return os << x.date_ << " +";
-}
-
-#endif // !MISSING_LEAP_SECONDS
-
-#if USE_OS_TZDB
-
-# ifdef __APPLE__
-static
-std::string
-get_version()
-{
- using namespace std;
- auto path = get_tz_dir() + string("/+VERSION");
- ifstream in{path};
- string version;
- in >> version;
- if (in.fail())
- throw std::runtime_error("Unable to get Timezone database version from " + path);
- return version;
-}
-# endif
-
-static
-std::unique_ptr<tzdb>
-init_tzdb()
-{
- std::unique_ptr<tzdb> db(new tzdb);
-
- //Iterate through folders
- std::queue<std::string> subfolders;
- subfolders.emplace(get_tz_dir());
- struct dirent* d;
- struct stat s;
- while (!subfolders.empty())
- {
- auto dirname = std::move(subfolders.front());
- subfolders.pop();
- auto dir = opendir(dirname.c_str());
- if (!dir)
- continue;
- while ((d = readdir(dir)) != nullptr)
- {
- // Ignore these files:
- if (d->d_name[0] == '.' || // curdir, prevdir, hidden
- memcmp(d->d_name, "posix", 5) == 0 || // starts with posix
- strcmp(d->d_name, "Factory") == 0 ||
- strcmp(d->d_name, "iso3166.tab") == 0 ||
- strcmp(d->d_name, "right") == 0 ||
- strcmp(d->d_name, "+VERSION") == 0 ||
- strcmp(d->d_name, "zone.tab") == 0 ||
- strcmp(d->d_name, "zone1970.tab") == 0 ||
- strcmp(d->d_name, "tzdata.zi") == 0 ||
- strcmp(d->d_name, "leapseconds") == 0 ||
- strcmp(d->d_name, "leap-seconds.list") == 0 )
- continue;
- auto subname = dirname + folder_delimiter + d->d_name;
- if(stat(subname.c_str(), &s) == 0)
- {
- if(S_ISDIR(s.st_mode))
- {
- if(!S_ISLNK(s.st_mode))
- {
- subfolders.push(subname);
- }
- }
- else
- {
- db->zones.emplace_back(subname.substr(get_tz_dir().size()+1),
- detail::undocumented{});
- }
- }
- }
- closedir(dir);
- }
- db->zones.shrink_to_fit();
- std::sort(db->zones.begin(), db->zones.end());
-# if !MISSING_LEAP_SECONDS
- std::ifstream in(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC",
- std::ios_base::binary);
- if (in)
- {
- in.exceptions(std::ios::failbit | std::ios::badbit);
- db->leap_seconds = load_just_leaps(in);
- }
- else
- {
- in.clear();
- in.open(get_tz_dir() + std::string(1, folder_delimiter) +
- "UTC", std::ios_base::binary);
- if (!in)
- throw std::runtime_error("Unable to extract leap second information");
- in.exceptions(std::ios::failbit | std::ios::badbit);
- db->leap_seconds = load_just_leaps(in);
- }
-# endif // !MISSING_LEAP_SECONDS
-# ifdef __APPLE__
- db->version = get_version();
-# endif
- return db;
-}
-
-#else // !USE_OS_TZDB
-
-// time_zone_link
-
-time_zone_link::time_zone_link(const std::string& s)
-{
- using namespace date;
- std::istringstream in(s);
- in.exceptions(std::ios::failbit | std::ios::badbit);
- std::string word;
- in >> word >> target_ >> name_;
-}
-
-std::ostream&
-operator<<(std::ostream& os, const time_zone_link& x)
-{
- using namespace date;
- detail::save_ostream<char> _(os);
- os.fill(' ');
- os.flags(std::ios::dec | std::ios::left);
- os.width(35);
- return os << x.name_ << " --> " << x.target_;
-}
-
-// leap_second
-
-leap_second::leap_second(const std::string& s, detail::undocumented)
-{
- using namespace date;
- std::istringstream in(s);
- in.exceptions(std::ios::failbit | std::ios::badbit);
- std::string word;
- int y;
- MonthDayTime date;
- in >> word >> y >> date;
- date_ = date.to_time_point(year(y));
-}
-
-static
-bool
-file_exists(const std::string& filename)
-{
-#ifdef _WIN32
- return ::_access(filename.c_str(), 0) == 0;
-#else
- return ::access(filename.c_str(), F_OK) == 0;
-#endif
-}
-
-#if HAS_REMOTE_API
-
-// CURL tools
-
-static
-int
-curl_global()
-{
- if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
- throw std::runtime_error("CURL global initialization failed");
- return 0;
-}
-
-namespace
-{
-
-struct curl_deleter
-{
- void operator()(CURL* p) const
- {
- ::curl_easy_cleanup(p);
- }
-};
-
-} // unnamed namespace
-
-static
-std::unique_ptr<CURL, curl_deleter>
-curl_init()
-{
- static const auto curl_is_now_initiailized = curl_global();
- (void)curl_is_now_initiailized;
- return std::unique_ptr<CURL, curl_deleter>{::curl_easy_init()};
-}
-
-static
-bool
-download_to_string(const std::string& url, std::string& str)
-{
- str.clear();
- auto curl = curl_init();
- if (!curl)
- return false;
- std::string version;
- curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, "curl");
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
- curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
- void* userp) -> std::size_t
- {
- auto& userstr = *static_cast<std::string*>(userp);
- auto realsize = size * nmemb;
- userstr.append(contents, realsize);
- return realsize;
- };
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &str);
- curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
- auto res = curl_easy_perform(curl.get());
- return (res == CURLE_OK);
-}
-
-namespace
-{
- enum class download_file_options { binary, text };
-}
-
-static
-bool
-download_to_file(const std::string& url, const std::string& local_filename,
- download_file_options opts, char* error_buffer)
-{
- auto curl = curl_init();
- if (!curl)
- return false;
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
- curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
- if (error_buffer)
- curl_easy_setopt(curl.get(), CURLOPT_ERRORBUFFER, error_buffer);
- curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
- void* userp) -> std::size_t
- {
- auto& of = *static_cast<std::ofstream*>(userp);
- auto realsize = size * nmemb;
- of.write(contents, static_cast<std::streamsize>(realsize));
- return realsize;
- };
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
- decltype(curl_easy_perform(curl.get())) res;
- {
- std::ofstream of(local_filename,
- opts == download_file_options::binary ?
- std::ofstream::out | std::ofstream::binary :
- std::ofstream::out);
- of.exceptions(std::ios::badbit);
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &of);
- res = curl_easy_perform(curl.get());
- }
- return res == CURLE_OK;
-}
-
-std::string
-remote_version()
-{
- std::string version;
- std::string str;
- if (download_to_string("https://www.iana.org/time-zones", str))
- {
- CONSTDATA char db[] = "/time-zones/releases/tzdata";
- CONSTDATA auto db_size = sizeof(db) - 1;
- auto p = str.find(db, 0, db_size);
- const int ver_str_len = 5;
- if (p != std::string::npos && p + (db_size + ver_str_len) <= str.size())
- version = str.substr(p + db_size, ver_str_len);
- }
- return version;
-}
-
-
-// TODO! Using system() create a process and a console window.
-// This is useful to see what errors may occur but is slow and distracting.
-// Consider implementing this functionality more directly, such as
-// using _mkdir and CreateProcess etc.
-// But use the current means now as matches Unix implementations and while
-// in proof of concept / testing phase.
-// TODO! Use <filesystem> eventually.
-static
-bool
-remove_folder_and_subfolders(const std::string& folder)
-{
-# ifdef _WIN32
-# if USE_SHELL_API
- // Delete the folder contents by deleting the folder.
- std::string cmd = "rd /s /q \"";
- cmd += folder;
- cmd += '\"';
- return std::system(cmd.c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- // Create a buffer containing the path to delete. It must be terminated
- // by two nuls. Who designs these API's...
- std::vector<char> from;
- from.assign(folder.begin(), folder.end());
- from.push_back('\0');
- from.push_back('\0');
- SHFILEOPSTRUCT fo{}; // Zero initialize.
- fo.wFunc = FO_DELETE;
- fo.pFrom = from.data();
- fo.fFlags = FOF_NO_UI;
- int ret = SHFileOperation(&fo);
- if (ret == 0 && !fo.fAnyOperationsAborted)
- return true;
- return false;
-# endif // !USE_SHELL_API
-# else // !_WIN32
-# if USE_SHELL_API
- return std::system(("rm -R " + folder).c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- struct dir_deleter {
- dir_deleter() {}
- void operator()(DIR* d) const
- {
- if (d != nullptr)
- {
- int result = closedir(d);
- assert(result == 0);
- }
- }
- };
- using closedir_ptr = std::unique_ptr<DIR, dir_deleter>;
-
- std::string filename;
- struct stat statbuf;
- std::size_t folder_len = folder.length();
- struct dirent* p = nullptr;
-
- closedir_ptr d(opendir(folder.c_str()));
- bool r = d.get() != nullptr;
- while (r && (p=readdir(d.get())) != nullptr)
- {
- if (strcmp(p->d_name, ".") == 0 || strcmp(p->d_name, "..") == 0)
- continue;
-
- // + 2 for path delimiter and nul terminator.
- std::size_t buf_len = folder_len + strlen(p->d_name) + 2;
- filename.resize(buf_len);
- std::size_t path_len = static_cast<std::size_t>(
- snprintf(&filename[0], buf_len, "%s/%s", folder.c_str(), p->d_name));
- assert(path_len == buf_len - 1);
- filename.resize(path_len);
-
- if (stat(filename.c_str(), &statbuf) == 0)
- r = S_ISDIR(statbuf.st_mode)
- ? remove_folder_and_subfolders(filename)
- : unlink(filename.c_str()) == 0;
- }
- d.reset();
-
- if (r)
- r = rmdir(folder.c_str()) == 0;
-
- return r;
-# endif // !USE_SHELL_API
-# endif // !_WIN32
-}
-
-static
-bool
-make_directory(const std::string& folder)
-{
-# ifdef _WIN32
-# if USE_SHELL_API
- // Re-create the folder.
- std::string cmd = "mkdir \"";
- cmd += folder;
- cmd += '\"';
- return std::system(cmd.c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- return _mkdir(folder.c_str()) == 0;
-# endif // !USE_SHELL_API
-# else // !_WIN32
-# if USE_SHELL_API
- return std::system(("mkdir -p " + folder).c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- return mkdir(folder.c_str(), 0777) == 0;
-# endif // !USE_SHELL_API
-# endif // !_WIN32
-}
-
-static
-bool
-delete_file(const std::string& file)
-{
-# ifdef _WIN32
-# if USE_SHELL_API
- std::string cmd = "del \"";
- cmd += file;
- cmd += '\"';
- return std::system(cmd.c_str()) == 0;
-# else // !USE_SHELL_API
- return _unlink(file.c_str()) == 0;
-# endif // !USE_SHELL_API
-# else // !_WIN32
-# if USE_SHELL_API
- return std::system(("rm " + file).c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- return unlink(file.c_str()) == 0;
-# endif // !USE_SHELL_API
-# endif // !_WIN32
-}
-
-# ifdef _WIN32
-
-static
-bool
-move_file(const std::string& from, const std::string& to)
-{
-# if USE_SHELL_API
- std::string cmd = "move \"";
- cmd += from;
- cmd += "\" \"";
- cmd += to;
- cmd += '\"';
- return std::system(cmd.c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- return !!::MoveFile(from.c_str(), to.c_str());
-# endif // !USE_SHELL_API
-}
-
-// Usually something like "c:\Program Files".
-static
-std::string
-get_program_folder()
-{
- return get_known_folder(FOLDERID_ProgramFiles);
-}
-
-// Note folder can and usually does contain spaces.
-static
-std::string
-get_unzip_program()
-{
- std::string path;
-
- // 7-Zip appears to note its location in the registry.
- // If that doesn't work, fall through and take a guess, but it will likely be wrong.
- HKEY hKey = nullptr;
- if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\7-Zip", 0, KEY_READ, &hKey) == ERROR_SUCCESS)
- {
- char value_buffer[MAX_PATH + 1]; // fyi 260 at time of writing.
- // in/out parameter. Documentation say that size is a count of bytes not chars.
- DWORD size = sizeof(value_buffer) - sizeof(value_buffer[0]);
- DWORD tzi_type = REG_SZ;
- // Testing shows Path key value is "C:\Program Files\7-Zip\" i.e. always with trailing \.
- bool got_value = (RegQueryValueExA(hKey, "Path", nullptr, &tzi_type,
- reinterpret_cast<LPBYTE>(value_buffer), &size) == ERROR_SUCCESS);
- RegCloseKey(hKey); // Close now incase of throw later.
- if (got_value)
- {
- // Function does not guarantee to null terminate.
- value_buffer[size / sizeof(value_buffer[0])] = '\0';
- path = value_buffer;
- if (!path.empty())
- {
- path += "7z.exe";
- return path;
- }
- }
- }
- path += get_program_folder();
- path += folder_delimiter;
- path += "7-Zip\\7z.exe";
- return path;
-}
-
-# if !USE_SHELL_API
-static
-int
-run_program(const std::string& command)
-{
- STARTUPINFO si{};
- si.cb = sizeof(si);
- PROCESS_INFORMATION pi{};
-
- // Allegedly CreateProcess overwrites the command line. Ugh.
- std::string mutable_command(command);
- if (CreateProcess(nullptr, &mutable_command[0],
- nullptr, nullptr, FALSE, CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi))
- {
- WaitForSingleObject(pi.hProcess, INFINITE);
- DWORD exit_code;
- bool got_exit_code = !!GetExitCodeProcess(pi.hProcess, &exit_code);
- CloseHandle(pi.hProcess);
- CloseHandle(pi.hThread);
- // Not 100% sure about this still active thing is correct,
- // but I'm going with it because I *think* WaitForSingleObject might
- // return in some cases without INFINITE-ly waiting.
- // But why/wouldn't GetExitCodeProcess return false in that case?
- if (got_exit_code && exit_code != STILL_ACTIVE)
- return static_cast<int>(exit_code);
- }
- return EXIT_FAILURE;
-}
-# endif // !USE_SHELL_API
-
-static
-std::string
-get_download_tar_file(const std::string& version)
-{
- auto file = get_install();
- file += folder_delimiter;
- file += "tzdata";
- file += version;
- file += ".tar";
- return file;
-}
-
-static
-bool
-extract_gz_file(const std::string& version, const std::string& gz_file,
- const std::string& dest_folder)
-{
- auto unzip_prog = get_unzip_program();
- bool unzip_result = false;
- // Use the unzip program to extract the tar file from the archive.
-
- // Aim to create a string like:
- // "C:\Program Files\7-Zip\7z.exe" x "C:\Users\SomeUser\Downloads\tzdata2016d.tar.gz"
- // -o"C:\Users\SomeUser\Downloads\tzdata"
- std::string cmd;
- cmd = '\"';
- cmd += unzip_prog;
- cmd += "\" x \"";
- cmd += gz_file;
- cmd += "\" -o\"";
- cmd += dest_folder;
- cmd += '\"';
-
-# if USE_SHELL_API
- // When using shelling out with std::system() extra quotes are required around the
- // whole command. It's weird but necessary it seems, see:
- // http://stackoverflow.com/q/27975969/576911
-
- cmd = "\"" + cmd + "\"";
- if (std::system(cmd.c_str()) == EXIT_SUCCESS)
- unzip_result = true;
-# else // !USE_SHELL_API
- if (run_program(cmd) == EXIT_SUCCESS)
- unzip_result = true;
-# endif // !USE_SHELL_API
- if (unzip_result)
- delete_file(gz_file);
-
- // Use the unzip program extract the data from the tar file that was
- // just extracted from the archive.
- auto tar_file = get_download_tar_file(version);
- cmd = '\"';
- cmd += unzip_prog;
- cmd += "\" x \"";
- cmd += tar_file;
- cmd += "\" -o\"";
- cmd += get_install();
- cmd += '\"';
-# if USE_SHELL_API
- cmd = "\"" + cmd + "\"";
- if (std::system(cmd.c_str()) == EXIT_SUCCESS)
- unzip_result = true;
-# else // !USE_SHELL_API
- if (run_program(cmd) == EXIT_SUCCESS)
- unzip_result = true;
-# endif // !USE_SHELL_API
-
- if (unzip_result)
- delete_file(tar_file);
-
- return unzip_result;
-}
-
-static
-std::string
-get_download_mapping_file(const std::string& version)
-{
- auto file = get_install() + version + "windowsZones.xml";
- return file;
-}
-
-# else // !_WIN32
-
-# if !USE_SHELL_API
-static
-int
-run_program(const char* prog, const char*const args[])
-{
- pid_t pid = fork();
- if (pid == -1) // Child failed to start.
- return EXIT_FAILURE;
-
- if (pid != 0)
- {
- // We are in the parent. Child started. Wait for it.
- pid_t ret;
- int status;
- while ((ret = waitpid(pid, &status, 0)) == -1)
- {
- if (errno != EINTR)
- break;
- }
- if (ret != -1)
- {
- if (WIFEXITED(status))
- return WEXITSTATUS(status);
- }
- printf("Child issues!\n");
-
- return EXIT_FAILURE; // Not sure what status of child is.
- }
- else // We are in the child process. Start the program the parent wants to run.
- {
-
- if (execv(prog, const_cast<char**>(args)) == -1) // Does not return.
- {
- perror("unreachable 0\n");
- _Exit(127);
- }
- printf("unreachable 2\n");
- }
- printf("unreachable 2\n");
- // Unreachable.
- assert(false);
- exit(EXIT_FAILURE);
- return EXIT_FAILURE;
-}
-# endif // !USE_SHELL_API
-
-static
-bool
-extract_gz_file(const std::string&, const std::string& gz_file, const std::string&)
-{
-# if USE_SHELL_API
- bool unzipped = std::system(("tar -xzf " + gz_file + " -C " + get_install()).c_str()) == EXIT_SUCCESS;
-# else // !USE_SHELL_API
- const char prog[] = {"/usr/bin/tar"};
- const char*const args[] =
- {
- prog, "-xzf", gz_file.c_str(), "-C", get_install().c_str(), nullptr
- };
- bool unzipped = (run_program(prog, args) == EXIT_SUCCESS);
-# endif // !USE_SHELL_API
- if (unzipped)
- {
- delete_file(gz_file);
- return true;
- }
- return false;
-}
-
-# endif // !_WIN32
-
-bool
-remote_download(const std::string& version, char* error_buffer)
-{
- assert(!version.empty());
-
-# ifdef _WIN32
- // Download folder should be always available for Windows
-# else // !_WIN32
- // Create download folder if it does not exist on UNIX system
- auto download_folder = get_install();
- if (!file_exists(download_folder))
- {
- if (!make_directory(download_folder))
- return false;
- }
-# endif // _WIN32
-
- auto url = "https://data.iana.org/time-zones/releases/tzdata" + version +
- ".tar.gz";
- bool result = download_to_file(url, get_download_gz_file(version),
- download_file_options::binary, error_buffer);
-# ifdef _WIN32
- if (result)
- {
- auto mapping_file = get_download_mapping_file(version);
- result = download_to_file(
- "https://raw.githubusercontent.com/unicode-org/cldr/master/"
- "common/supplemental/windowsZones.xml",
- mapping_file, download_file_options::text, error_buffer);
- }
-# endif // _WIN32
- return result;
-}
-
-bool
-remote_install(const std::string& version)
-{
- auto success = false;
- assert(!version.empty());
-
- std::string install = get_install();
- auto gz_file = get_download_gz_file(version);
- if (file_exists(gz_file))
- {
- if (file_exists(install))
- remove_folder_and_subfolders(install);
- if (make_directory(install))
- {
- if (extract_gz_file(version, gz_file, install))
- success = true;
-# ifdef _WIN32
- auto mapping_file_source = get_download_mapping_file(version);
- auto mapping_file_dest = get_install();
- mapping_file_dest += folder_delimiter;
- mapping_file_dest += "windowsZones.xml";
- if (!move_file(mapping_file_source, mapping_file_dest))
- success = false;
-# endif // _WIN32
- }
- }
- return success;
-}
-
-#endif // HAS_REMOTE_API
-
-static
-std::string
-get_version(const std::string& path)
-{
- std::string version;
- std::ifstream infile(path + "version");
- if (infile.is_open())
- {
- infile >> version;
- if (!infile.fail())
- return version;
- }
- else
- {
- infile.open(path + "NEWS");
- while (infile)
- {
- infile >> version;
- if (version == "Release")
- {
- infile >> version;
- return version;
- }
- }
- }
- throw std::runtime_error("Unable to get Timezone database version from " + path);
-}
-
-static
-std::unique_ptr<tzdb>
-init_tzdb()
-{
- using namespace date;
- const std::string install = get_install();
- const std::string path = install + folder_delimiter;
- std::string line;
- bool continue_zone = false;
- std::unique_ptr<tzdb> db(new tzdb);
-
-#if AUTO_DOWNLOAD
- if (!file_exists(install))
- {
- auto rv = remote_version();
- if (!rv.empty() && remote_download(rv))
- {
- if (!remote_install(rv))
- {
- std::string msg = "Timezone database version \"";
- msg += rv;
- msg += "\" did not install correctly to \"";
- msg += install;
- msg += "\"";
- throw std::runtime_error(msg);
- }
- }
- if (!file_exists(install))
- {
- std::string msg = "Timezone database not found at \"";
- msg += install;
- msg += "\"";
- throw std::runtime_error(msg);
- }
- db->version = get_version(path);
- }
- else
- {
- db->version = get_version(path);
- auto rv = remote_version();
- if (!rv.empty() && db->version != rv)
- {
- if (remote_download(rv))
- {
- remote_install(rv);
- db->version = get_version(path);
- }
- }
- }
-#else // !AUTO_DOWNLOAD
- if (!file_exists(install))
- {
- std::string msg = "Timezone database not found at \"";
- msg += install;
- msg += "\"";
- throw std::runtime_error(msg);
- }
- db->version = get_version(path);
-#endif // !AUTO_DOWNLOAD
-
- CONSTDATA char*const files[] =
- {
- "africa", "antarctica", "asia", "australasia", "backward", "etcetera", "europe",
- "pacificnew", "northamerica", "southamerica", "systemv", "leapseconds"
- };
-
- for (const auto& filename : files)
- {
- std::ifstream infile(path + filename);
- while (infile)
- {
- std::getline(infile, line);
- if (!line.empty() && line[0] != '#')
- {
- std::istringstream in(line);
- std::string word;
- in >> word;
- if (word == "Rule")
- {
- db->rules.push_back(Rule(line));
- continue_zone = false;
- }
- else if (word == "Link")
- {
- db->links.push_back(time_zone_link(line));
- continue_zone = false;
- }
- else if (word == "Leap")
- {
- db->leap_seconds.push_back(leap_second(line, detail::undocumented{}));
- continue_zone = false;
- }
- else if (word == "Zone")
- {
- db->zones.push_back(time_zone(line, detail::undocumented{}));
- continue_zone = true;
- }
- else if (line[0] == '\t' && continue_zone)
- {
- db->zones.back().add(line);
- }
- else
- {
- std::cerr << line << '\n';
- }
- }
- }
- }
- std::sort(db->rules.begin(), db->rules.end());
- Rule::split_overlaps(db->rules);
- std::sort(db->zones.begin(), db->zones.end());
- db->zones.shrink_to_fit();
- std::sort(db->links.begin(), db->links.end());
- db->links.shrink_to_fit();
- std::sort(db->leap_seconds.begin(), db->leap_seconds.end());
- db->leap_seconds.shrink_to_fit();
-
-#ifdef _WIN32
- std::string mapping_file = get_install() + folder_delimiter + "windowsZones.xml";
- db->mappings = load_timezone_mappings_from_xml_file(mapping_file);
- sort_zone_mappings(db->mappings);
-#endif // _WIN32
-
- return db;
-}
-
-const tzdb&
-reload_tzdb()
-{
-#if AUTO_DOWNLOAD
- auto const& v = get_tzdb_list().front().version;
- if (!v.empty() && v == remote_version())
- return get_tzdb_list().front();
-#endif // AUTO_DOWNLOAD
- tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());
- return get_tzdb_list().front();
-}
-
-#endif // !USE_OS_TZDB
-
-const tzdb&
-get_tzdb()
-{
- return get_tzdb_list().front();
-}
-
-const time_zone*
-#if HAS_STRING_VIEW
-tzdb::locate_zone(std::string_view tz_name) const
-#else
-tzdb::locate_zone(const std::string& tz_name) const
-#endif
-{
- auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name,
-#if HAS_STRING_VIEW
- [](const time_zone& z, const std::string_view& nm)
-#else
- [](const time_zone& z, const std::string& nm)
-#endif
- {
- return z.name() < nm;
- });
- if (zi == zones.end() || zi->name() != tz_name)
- {
-#if !USE_OS_TZDB
- auto li = std::lower_bound(links.begin(), links.end(), tz_name,
-#if HAS_STRING_VIEW
- [](const time_zone_link& z, const std::string_view& nm)
-#else
- [](const time_zone_link& z, const std::string& nm)
-#endif
- {
- return z.name() < nm;
- });
- if (li != links.end() && li->name() == tz_name)
- {
- zi = std::lower_bound(zones.begin(), zones.end(), li->target(),
- [](const time_zone& z, const std::string& nm)
- {
- return z.name() < nm;
- });
- if (zi != zones.end() && zi->name() == li->target())
- return &*zi;
- }
-#endif // !USE_OS_TZDB
- throw std::runtime_error(std::string(tz_name) + " not found in timezone database");
- }
- return &*zi;
-}
-
-const time_zone*
-#if HAS_STRING_VIEW
-locate_zone(std::string_view tz_name)
-#else
-locate_zone(const std::string& tz_name)
-#endif
-{
- return get_tzdb().locate_zone(tz_name);
-}
-
-#if USE_OS_TZDB
-
-std::ostream&
-operator<<(std::ostream& os, const tzdb& db)
-{
- os << "Version: " << db.version << "\n\n";
- for (const auto& x : db.zones)
- os << x << '\n';
-#if !MISSING_LEAP_SECONDS
- os << '\n';
- for (const auto& x : db.leap_seconds)
- os << x << '\n';
-#endif // !MISSING_LEAP_SECONDS
- return os;
-}
-
-#else // !USE_OS_TZDB
-
-std::ostream&
-operator<<(std::ostream& os, const tzdb& db)
-{
- os << "Version: " << db.version << '\n';
- std::string title("--------------------------------------------"
- "--------------------------------------------\n"
- "Name ""Start Y ""End Y "
- "Beginning ""Offset "
- "Designator\n"
- "--------------------------------------------"
- "--------------------------------------------\n");
- int count = 0;
- for (const auto& x : db.rules)
- {
- if (count++ % 50 == 0)
- os << title;
- os << x << '\n';
- }
- os << '\n';
- title = std::string("---------------------------------------------------------"
- "--------------------------------------------------------\n"
- "Name ""Offset "
- "Rule ""Abrev ""Until\n"
- "---------------------------------------------------------"
- "--------------------------------------------------------\n");
- count = 0;
- for (const auto& x : db.zones)
- {
- if (count++ % 10 == 0)
- os << title;
- os << x << '\n';
- }
- os << '\n';
- title = std::string("---------------------------------------------------------"
- "--------------------------------------------------------\n"
- "Alias ""To\n"
- "---------------------------------------------------------"
- "--------------------------------------------------------\n");
- count = 0;
- for (const auto& x : db.links)
- {
- if (count++ % 45 == 0)
- os << title;
- os << x << '\n';
- }
- os << '\n';
- title = std::string("---------------------------------------------------------"
- "--------------------------------------------------------\n"
- "Leap second on\n"
- "---------------------------------------------------------"
- "--------------------------------------------------------\n");
- os << title;
- for (const auto& x : db.leap_seconds)
- os << x << '\n';
- return os;
-}
-
-#endif // !USE_OS_TZDB
-
-// -----------------------
-
-#ifdef _WIN32
-
-static
-std::string
-getTimeZoneKeyName()
-{
- DYNAMIC_TIME_ZONE_INFORMATION dtzi{};
- auto result = GetDynamicTimeZoneInformation(&dtzi);
- if (result == TIME_ZONE_ID_INVALID)
- throw std::runtime_error("current_zone(): GetDynamicTimeZoneInformation()"
- " reported TIME_ZONE_ID_INVALID.");
- auto wlen = wcslen(dtzi.TimeZoneKeyName);
- char buf[128] = {};
- assert(sizeof(buf) >= wlen+1);
- wcstombs(buf, dtzi.TimeZoneKeyName, wlen);
- if (strcmp(buf, "Coordinated Universal Time") == 0)
- return "UTC";
- return buf;
-}
-
-const time_zone*
-tzdb::current_zone() const
-{
- std::string win_tzid = getTimeZoneKeyName();
- std::string standard_tzid;
- if (!native_to_standard_timezone_name(win_tzid, standard_tzid))
- {
- std::string msg;
- msg = "current_zone() failed: A mapping from the Windows Time Zone id \"";
- msg += win_tzid;
- msg += "\" was not found in the time zone mapping database.";
- throw std::runtime_error(msg);
- }
- return locate_zone(standard_tzid);
-}
-
-#else // !_WIN32
-
-#if HAS_STRING_VIEW
-
-static
-std::string_view
-extract_tz_name(char const* rp)
-{
- using namespace std;
- string_view result = rp;
- CONSTDATA string_view zoneinfo = "zoneinfo";
- size_t pos = result.rfind(zoneinfo);
- if (pos == result.npos)
- throw runtime_error(
- "current_zone() failed to find \"zoneinfo\" in " + string(result));
- pos = result.find('/', pos);
- result.remove_prefix(pos + 1);
- return result;
-}
-
-#else // !HAS_STRING_VIEW
-
-static
-std::string
-extract_tz_name(char const* rp)
-{
- using namespace std;
- string result = rp;
- CONSTDATA char zoneinfo[] = "zoneinfo";
- size_t pos = result.rfind(zoneinfo);
- if (pos == result.npos)
- throw runtime_error(
- "current_zone() failed to find \"zoneinfo\" in " + result);
- pos = result.find('/', pos);
- result.erase(0, pos + 1);
- return result;
-}
-
-#endif // HAS_STRING_VIEW
-
-static
-bool
-sniff_realpath(const char* timezone)
-{
- using namespace std;
- char rp[PATH_MAX+1] = {};
- if (realpath(timezone, rp) == nullptr)
- throw system_error(errno, system_category(), "realpath() failed");
- auto result = extract_tz_name(rp);
- return result != "posixrules";
-}
-
-const time_zone*
-tzdb::current_zone() const
-{
- // On some OS's a file called /etc/localtime may
- // exist and it may be either a real file
- // containing time zone details or a symlink to such a file.
- // On MacOS and BSD Unix if this file is a symlink it
- // might resolve to a path like this:
- // "/usr/share/zoneinfo/America/Los_Angeles"
- // If it does, we try to determine the current
- // timezone from the remainder of the path by removing the prefix
- // and hoping the rest resolves to a valid timezone.
- // It may not always work though. If it doesn't then an
- // exception will be thrown by local_timezone.
- // The path may also take a relative form:
- // "../usr/share/zoneinfo/America/Los_Angeles".
- {
- struct stat sb;
- CONSTDATA auto timezone = "/etc/localtime";
- if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0)
- {
- using namespace std;
- static const bool use_realpath = sniff_realpath(timezone);
- char rp[PATH_MAX+1] = {};
- if (use_realpath)
- {
- if (realpath(timezone, rp) == nullptr)
- throw system_error(errno, system_category(), "realpath() failed");
- }
- else
- {
- if (readlink(timezone, rp, sizeof(rp)-1) <= 0)
- throw system_error(errno, system_category(), "readlink() failed");
- }
- return locate_zone(extract_tz_name(rp));
- }
- }
- // On embedded systems e.g. buildroot with uclibc the timezone is linked
- // into /etc/TZ which is a symlink to path like this:
- // "/usr/share/zoneinfo/uclibc/America/Los_Angeles"
- // If it does, we try to determine the current
- // timezone from the remainder of the path by removing the prefix
- // and hoping the rest resolves to valid timezone.
- // It may not always work though. If it doesn't then an
- // exception will be thrown by local_timezone.
- // The path may also take a relative form:
- // "../usr/share/zoneinfo/uclibc/America/Los_Angeles".
- {
- struct stat sb;
- CONSTDATA auto timezone = "/etc/TZ";
- if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) {
- using namespace std;
- string result;
- char rp[PATH_MAX+1] = {};
- if (readlink(timezone, rp, sizeof(rp)-1) > 0)
- result = string(rp);
- else
- throw system_error(errno, system_category(), "readlink() failed");
-
- const size_t pos = result.find(get_tz_dir());
- if (pos != result.npos)
- result.erase(0, get_tz_dir().size() + 1 + pos);
- return locate_zone(result);
- }
- }
- {
- // On some versions of some linux distro's (e.g. Ubuntu),
- // the current timezone might be in the first line of
- // the /etc/timezone file.
- std::ifstream timezone_file("/etc/timezone");
- if (timezone_file.is_open())
- {
- std::string result;
- std::getline(timezone_file, result);
- if (!result.empty())
- return locate_zone(result);
- }
- // Fall through to try other means.
- }
- {
- // On some versions of some bsd distro's (e.g. FreeBSD),
- // the current timezone might be in the first line of
- // the /var/db/zoneinfo file.
- std::ifstream timezone_file("/var/db/zoneinfo");
- if (timezone_file.is_open())
- {
- std::string result;
- std::getline(timezone_file, result);
- if (!result.empty())
- return locate_zone(result);
- }
- // Fall through to try other means.
- }
- {
- // On some versions of some bsd distro's (e.g. iOS),
- // it is not possible to use file based approach,
- // we switch to system API, calling functions in
- // CoreFoundation framework.
-#if TARGET_OS_IPHONE
- std::string result = date::iOSUtils::get_current_timezone();
- if (!result.empty())
- return locate_zone(result);
-#endif
- // Fall through to try other means.
- }
- {
- // On some versions of some linux distro's (e.g. Red Hat),
- // the current timezone might be in the first line of
- // the /etc/sysconfig/clock file as:
- // ZONE="US/Eastern"
- std::ifstream timezone_file("/etc/sysconfig/clock");
- std::string result;
- while (timezone_file)
- {
- std::getline(timezone_file, result);
- auto p = result.find("ZONE=\"");
- if (p != std::string::npos)
- {
- result.erase(p, p+6);
- result.erase(result.rfind('"'));
- return locate_zone(result);
- }
- }
- // Fall through to try other means.
- }
- throw std::runtime_error("Could not get current timezone");
-}
-
-#endif // !_WIN32
-
-const time_zone*
-current_zone()
-{
- return get_tzdb().current_zone();
-}
-
-} // namespace date
-} // namespace arrow_vendored
-
-#if defined(__GNUC__) && __GNUC__ < 5
-# pragma GCC diagnostic pop
-#endif
+ for (std::size_t i = 0; i < rules.size();)
+ {
+ auto e = static_cast<std::size_t>(std::upper_bound(
+ rules.cbegin()+static_cast<difference_type>(i), rules.cend(), rules[i].name(),
+ [](const std::string& nm, const Rule& x)
+ {
+ return nm < x.name();
+ }) - rules.cbegin());
+ split_overlaps(rules, i, e);
+ auto first_rule = rules.begin() + static_cast<difference_type>(i);
+ auto last_rule = rules.begin() + static_cast<difference_type>(e);
+ auto t = std::lower_bound(first_rule, last_rule, min_year);
+ if (t > first_rule+1)
+ {
+ if (t == last_rule || t->starting_year() >= min_year)
+ --t;
+ auto d = static_cast<std::size_t>(t - first_rule);
+ rules.erase(first_rule, t);
+ e -= d;
+ }
+ first_rule = rules.begin() + static_cast<difference_type>(i);
+ last_rule = rules.begin() + static_cast<difference_type>(e);
+ t = std::upper_bound(first_rule, last_rule, max_year);
+ if (t != last_rule)
+ {
+ auto d = static_cast<std::size_t>(last_rule - t);
+ rules.erase(t, last_rule);
+ e -= d;
+ }
+ i = e;
+ }
+ rules.shrink_to_fit();
+}
+
+// Find the rule that comes chronologically before Rule r. For multi-year rules,
+// y specifies which rules in r. For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically previous rule, and the year within
+// that rule. If there is no previous rule, returns nullptr and year::min().
+// Preconditions:
+// r->starting_year() <= y && y <= r->ending_year()
+static
+std::pair<const Rule*, date::year>
+find_previous_rule(const Rule* r, date::year y)
+{
+ using namespace date;
+ auto const& rules = get_tzdb().rules;
+ if (y == r->starting_year())
+ {
+ if (r == &rules.front() || r->name() != r[-1].name())
+ std::terminate(); // never called with first rule
+ --r;
+ if (y == r->starting_year())
+ return {r, y};
+ return {r, r->ending_year()};
+ }
+ if (r == &rules.front() || r->name() != r[-1].name() ||
+ r[-1].starting_year() < r->starting_year())
+ {
+ while (r < &rules.back() && r->name() == r[1].name() &&
+ r->starting_year() == r[1].starting_year())
+ ++r;
+ return {r, --y};
+ }
+ --r;
+ return {r, y};
+}
+
+// Find the rule that comes chronologically after Rule r. For multi-year rules,
+// y specifies which rules in r. For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically next rule, and the year within
+// that rule. If there is no next rule, return a pointer to a defaulted rule
+// and y+1.
+// Preconditions:
+// first <= r && r < last && r->starting_year() <= y && y <= r->ending_year()
+// [first, last) all have the same name
+static
+std::pair<const Rule*, date::year>
+find_next_rule(const Rule* first_rule, const Rule* last_rule, const Rule* r, date::year y)
+{
+ using namespace date;
+ if (y == r->ending_year())
+ {
+ if (r == last_rule-1)
+ return {nullptr, year::max()};
+ ++r;
+ if (y == r->ending_year())
+ return {r, y};
+ return {r, r->starting_year()};
+ }
+ if (r == last_rule-1 || r->ending_year() < r[1].ending_year())
+ {
+ while (r > first_rule && r->starting_year() == r[-1].starting_year())
+ --r;
+ return {r, ++y};
+ }
+ ++r;
+ return {r, y};
+}
+
+// Find the rule that comes chronologically after Rule r. For multi-year rules,
+// y specifies which rules in r. For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically next rule, and the year within
+// that rule. If there is no next rule, return nullptr and year::max().
+// Preconditions:
+// r->starting_year() <= y && y <= r->ending_year()
+static
+std::pair<const Rule*, date::year>
+find_next_rule(const Rule* r, date::year y)
+{
+ using namespace date;
+ auto const& rules = get_tzdb().rules;
+ if (y == r->ending_year())
+ {
+ if (r == &rules.back() || r->name() != r[1].name())
+ return {nullptr, year::max()};
+ ++r;
+ if (y == r->ending_year())
+ return {r, y};
+ return {r, r->starting_year()};
+ }
+ if (r == &rules.back() || r->name() != r[1].name() ||
+ r->ending_year() < r[1].ending_year())
+ {
+ while (r > &rules.front() && r->name() == r[-1].name() &&
+ r->starting_year() == r[-1].starting_year())
+ --r;
+ return {r, ++y};
+ }
+ ++r;
+ return {r, y};
+}
+
+static
+const Rule*
+find_first_std_rule(const std::pair<const Rule*, const Rule*>& eqr)
+{
+ auto r = eqr.first;
+ auto ry = r->starting_year();
+ while (r->save() != std::chrono::minutes{0})
+ {
+ std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+ if (r == nullptr)
+ throw std::runtime_error("Could not find standard offset in rule "
+ + eqr.first->name());
+ }
+ return r;
+}
+
+static
+std::pair<const Rule*, date::year>
+find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
+ const date::year& y, const std::chrono::seconds& offset,
+ const MonthDayTime& mdt)
+{
+ assert(eqr.first != nullptr);
+ assert(eqr.second != nullptr);
+
+ using namespace std::chrono;
+ using namespace date;
+ auto r = eqr.first;
+ auto ry = r->starting_year();
+ auto prev_save = minutes{0};
+ auto prev_year = year::min();
+ const Rule* prev_rule = nullptr;
+ while (r != nullptr)
+ {
+ if (mdt.compare(y, r->mdt(), ry, offset, prev_save) <= 0)
+ break;
+ prev_rule = r;
+ prev_year = ry;
+ prev_save = prev_rule->save();
+ std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+ }
+ return {prev_rule, prev_year};
+}
+
+static
+std::pair<const Rule*, date::year>
+find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
+ const sys_seconds& tp_utc,
+ const local_seconds& tp_std,
+ const local_seconds& tp_loc)
+{
+ using namespace std::chrono;
+ using namespace date;
+ auto r = eqr.first;
+ auto ry = r->starting_year();
+ auto prev_save = minutes{0};
+ auto prev_year = year::min();
+ const Rule* prev_rule = nullptr;
+ while (r != nullptr)
+ {
+ bool found = false;
+ switch (r->mdt().zone())
+ {
+ case tz::utc:
+ found = tp_utc < r->mdt().to_time_point(ry);
+ break;
+ case tz::standard:
+ found = sys_seconds{tp_std.time_since_epoch()} < r->mdt().to_time_point(ry);
+ break;
+ case tz::local:
+ found = sys_seconds{tp_loc.time_since_epoch()} < r->mdt().to_time_point(ry);
+ break;
+ }
+ if (found)
+ break;
+ prev_rule = r;
+ prev_year = ry;
+ prev_save = prev_rule->save();
+ std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+ }
+ return {prev_rule, prev_year};
+}
+
+static
+sys_info
+find_rule(const std::pair<const Rule*, date::year>& first_rule,
+ const std::pair<const Rule*, date::year>& last_rule,
+ const date::year& y, const std::chrono::seconds& offset,
+ const MonthDayTime& mdt, const std::chrono::minutes& initial_save,
+ const std::string& initial_abbrev)
+{
+ using namespace std::chrono;
+ using namespace date;
+ auto r = first_rule.first;
+ auto ry = first_rule.second;
+ sys_info x{sys_days(year::min()/min_day), sys_days(year::max()/max_day),
+ seconds{0}, initial_save, initial_abbrev};
+ while (r != nullptr)
+ {
+ auto tr = r->mdt().to_sys(ry, offset, x.save);
+ auto tx = mdt.to_sys(y, offset, x.save);
+ // Find last rule where tx >= tr
+ if (tx <= tr || (r == last_rule.first && ry == last_rule.second))
+ {
+ if (tx < tr && r == first_rule.first && ry == first_rule.second)
+ {
+ x.end = r->mdt().to_sys(ry, offset, x.save);
+ break;
+ }
+ if (tx < tr)
+ {
+ std::tie(r, ry) = find_previous_rule(r, ry); // can't return nullptr for r
+ assert(r != nullptr);
+ }
+ // r != nullptr && tx >= tr (if tr were to be recomputed)
+ auto prev_save = initial_save;
+ if (!(r == first_rule.first && ry == first_rule.second))
+ prev_save = find_previous_rule(r, ry).first->save();
+ x.begin = r->mdt().to_sys(ry, offset, prev_save);
+ x.save = r->save();
+ x.abbrev = r->abbrev();
+ if (!(r == last_rule.first && ry == last_rule.second))
+ {
+ std::tie(r, ry) = find_next_rule(r, ry); // can't return nullptr for r
+ assert(r != nullptr);
+ x.end = r->mdt().to_sys(ry, offset, x.save);
+ }
+ else
+ x.end = sys_days(year::max()/max_day);
+ break;
+ }
+ x.save = r->save();
+ std::tie(r, ry) = find_next_rule(r, ry); // Can't return nullptr for r
+ assert(r != nullptr);
+ }
+ return x;
+}
+
+// zonelet
+
+detail::zonelet::~zonelet()
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ using minutes = std::chrono::minutes;
+ using string = std::string;
+ if (tag_ == has_save)
+ u.save_.~minutes();
+ else
+ u.rule_.~string();
+#endif
+}
+
+detail::zonelet::zonelet()
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ ::new(&u.rule_) std::string();
+#endif
+}
+
+detail::zonelet::zonelet(const zonelet& i)
+ : gmtoff_(i.gmtoff_)
+ , tag_(i.tag_)
+ , format_(i.format_)
+ , until_year_(i.until_year_)
+ , until_date_(i.until_date_)
+ , until_utc_(i.until_utc_)
+ , until_std_(i.until_std_)
+ , until_loc_(i.until_loc_)
+ , initial_save_(i.initial_save_)
+ , initial_abbrev_(i.initial_abbrev_)
+ , first_rule_(i.first_rule_)
+ , last_rule_(i.last_rule_)
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ if (tag_ == has_save)
+ ::new(&u.save_) std::chrono::minutes(i.u.save_);
+ else
+ ::new(&u.rule_) std::string(i.u.rule_);
+#else
+ if (tag_ == has_save)
+ u.save_ = i.u.save_;
+ else
+ u.rule_ = i.u.rule_;
+#endif
+}
+
+#endif // !USE_OS_TZDB
+
+// time_zone
+
+#if USE_OS_TZDB
+
+time_zone::time_zone(const std::string& s, detail::undocumented)
+ : name_(s)
+ , adjusted_(new std::once_flag{})
+{
+}
+
+enum class endian
+{
+ native = __BYTE_ORDER__,
+ little = __ORDER_LITTLE_ENDIAN__,
+ big = __ORDER_BIG_ENDIAN__
+};
+
+static
+inline
+std::uint32_t
+reverse_bytes(std::uint32_t i)
+{
+ return
+ (i & 0xff000000u) >> 24 |
+ (i & 0x00ff0000u) >> 8 |
+ (i & 0x0000ff00u) << 8 |
+ (i & 0x000000ffu) << 24;
+}
+
+static
+inline
+std::uint64_t
+reverse_bytes(std::uint64_t i)
+{
+ return
+ (i & 0xff00000000000000ull) >> 56 |
+ (i & 0x00ff000000000000ull) >> 40 |
+ (i & 0x0000ff0000000000ull) >> 24 |
+ (i & 0x000000ff00000000ull) >> 8 |
+ (i & 0x00000000ff000000ull) << 8 |
+ (i & 0x0000000000ff0000ull) << 24 |
+ (i & 0x000000000000ff00ull) << 40 |
+ (i & 0x00000000000000ffull) << 56;
+}
+
+template <class T>
+static
+inline
+void
+maybe_reverse_bytes(T&, std::false_type)
+{
+}
+
+static
+inline
+void
+maybe_reverse_bytes(std::int32_t& t, std::true_type)
+{
+ t = static_cast<std::int32_t>(reverse_bytes(static_cast<std::uint32_t>(t)));
+}
+
+static
+inline
+void
+maybe_reverse_bytes(std::int64_t& t, std::true_type)
+{
+ t = static_cast<std::int64_t>(reverse_bytes(static_cast<std::uint64_t>(t)));
+}
+
+template <class T>
+static
+inline
+void
+maybe_reverse_bytes(T& t)
+{
+ maybe_reverse_bytes(t, std::integral_constant<bool,
+ endian::native == endian::little>{});
+}
+
+static
+void
+load_header(std::istream& inf)
+{
+ // Read TZif
+ auto t = inf.get();
+ auto z = inf.get();
+ auto i = inf.get();
+ auto f = inf.get();
+#ifndef NDEBUG
+ assert(t == 'T');
+ assert(z == 'Z');
+ assert(i == 'i');
+ assert(f == 'f');
+#else
+ (void)t;
+ (void)z;
+ (void)i;
+ (void)f;
+#endif
+}
+
+static
+unsigned char
+load_version(std::istream& inf)
+{
+ // Read version
+ auto v = inf.get();
+ assert(v != EOF);
+ return static_cast<unsigned char>(v);
+}
+
+static
+void
+skip_reserve(std::istream& inf)
+{
+ inf.ignore(15);
+}
+
+static
+void
+load_counts(std::istream& inf,
+ std::int32_t& tzh_ttisgmtcnt, std::int32_t& tzh_ttisstdcnt,
+ std::int32_t& tzh_leapcnt, std::int32_t& tzh_timecnt,
+ std::int32_t& tzh_typecnt, std::int32_t& tzh_charcnt)
+{
+ // Read counts;
+ inf.read(reinterpret_cast<char*>(&tzh_ttisgmtcnt), 4);
+ maybe_reverse_bytes(tzh_ttisgmtcnt);
+ inf.read(reinterpret_cast<char*>(&tzh_ttisstdcnt), 4);
+ maybe_reverse_bytes(tzh_ttisstdcnt);
+ inf.read(reinterpret_cast<char*>(&tzh_leapcnt), 4);
+ maybe_reverse_bytes(tzh_leapcnt);
+ inf.read(reinterpret_cast<char*>(&tzh_timecnt), 4);
+ maybe_reverse_bytes(tzh_timecnt);
+ inf.read(reinterpret_cast<char*>(&tzh_typecnt), 4);
+ maybe_reverse_bytes(tzh_typecnt);
+ inf.read(reinterpret_cast<char*>(&tzh_charcnt), 4);
+ maybe_reverse_bytes(tzh_charcnt);
+}
+
+template <class TimeType>
+static
+std::vector<detail::transition>
+load_transitions(std::istream& inf, std::int32_t tzh_timecnt)
+{
+ // Read transitions
+ using namespace std::chrono;
+ std::vector<detail::transition> transitions;
+ transitions.reserve(static_cast<unsigned>(tzh_timecnt));
+ for (std::int32_t i = 0; i < tzh_timecnt; ++i)
+ {
+ TimeType t;
+ inf.read(reinterpret_cast<char*>(&t), sizeof(t));
+ maybe_reverse_bytes(t);
+ transitions.emplace_back(sys_seconds{seconds{t}});
+ if (transitions.back().timepoint < min_seconds)
+ transitions.back().timepoint = min_seconds;
+ }
+ return transitions;
+}
+
+static
+std::vector<std::uint8_t>
+load_indices(std::istream& inf, std::int32_t tzh_timecnt)
+{
+ // Read indices
+ std::vector<std::uint8_t> indices;
+ indices.reserve(static_cast<unsigned>(tzh_timecnt));
+ for (std::int32_t i = 0; i < tzh_timecnt; ++i)
+ {
+ std::uint8_t t;
+ inf.read(reinterpret_cast<char*>(&t), sizeof(t));
+ indices.emplace_back(t);
+ }
+ return indices;
+}
+
+static
+std::vector<ttinfo>
+load_ttinfo(std::istream& inf, std::int32_t tzh_typecnt)
+{
+ // Read ttinfo
+ std::vector<ttinfo> ttinfos;
+ ttinfos.reserve(static_cast<unsigned>(tzh_typecnt));
+ for (std::int32_t i = 0; i < tzh_typecnt; ++i)
+ {
+ ttinfo t;
+ inf.read(reinterpret_cast<char*>(&t), 6);
+ maybe_reverse_bytes(t.tt_gmtoff);
+ ttinfos.emplace_back(t);
+ }
+ return ttinfos;
+}
+
+static
+std::string
+load_abbreviations(std::istream& inf, std::int32_t tzh_charcnt)
+{
+ // Read abbreviations
+ std::string abbrev;
+ abbrev.resize(static_cast<unsigned>(tzh_charcnt), '\0');
+ inf.read(&abbrev[0], tzh_charcnt);
+ return abbrev;
+}
+
+#if !MISSING_LEAP_SECONDS
+
+template <class TimeType>
+static
+std::vector<leap_second>
+load_leaps(std::istream& inf, std::int32_t tzh_leapcnt)
+{
+ // Read tzh_leapcnt pairs
+ using namespace std::chrono;
+ std::vector<leap_second> leap_seconds;
+ leap_seconds.reserve(static_cast<std::size_t>(tzh_leapcnt));
+ for (std::int32_t i = 0; i < tzh_leapcnt; ++i)
+ {
+ TimeType t0;
+ std::int32_t t1;
+ inf.read(reinterpret_cast<char*>(&t0), sizeof(t0));
+ inf.read(reinterpret_cast<char*>(&t1), sizeof(t1));
+ maybe_reverse_bytes(t0);
+ maybe_reverse_bytes(t1);
+ leap_seconds.emplace_back(sys_seconds{seconds{t0 - (t1-1)}},
+ detail::undocumented{});
+ }
+ return leap_seconds;
+}
+
+template <class TimeType>
+static
+std::vector<leap_second>
+load_leap_data(std::istream& inf,
+ std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+ std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
+{
+ inf.ignore(tzh_timecnt*static_cast<std::int32_t>(sizeof(TimeType)) + tzh_timecnt +
+ tzh_typecnt*6 + tzh_charcnt);
+ return load_leaps<TimeType>(inf, tzh_leapcnt);
+}
+
+static
+std::vector<leap_second>
+load_just_leaps(std::istream& inf)
+{
+ // Read tzh_leapcnt pairs
+ using namespace std::chrono;
+ load_header(inf);
+ auto v = load_version(inf);
+ std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt;
+ skip_reserve(inf);
+ load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ if (v == 0)
+ return load_leap_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
+ tzh_charcnt);
+#if !defined(NDEBUG)
+ inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+ tzh_ttisstdcnt + tzh_ttisgmtcnt);
+ load_header(inf);
+ auto v2 = load_version(inf);
+ assert(v == v2);
+ skip_reserve(inf);
+#else // defined(NDEBUG)
+ inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+ tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
+#endif // defined(NDEBUG)
+ load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ return load_leap_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
+ tzh_charcnt);
+}
+
+#endif // !MISSING_LEAP_SECONDS
+
+template <class TimeType>
+void
+time_zone::load_data(std::istream& inf,
+ std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+ std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
+{
+ using namespace std::chrono;
+ transitions_ = load_transitions<TimeType>(inf, tzh_timecnt);
+ auto indices = load_indices(inf, tzh_timecnt);
+ auto infos = load_ttinfo(inf, tzh_typecnt);
+ auto abbrev = load_abbreviations(inf, tzh_charcnt);
+#if !MISSING_LEAP_SECONDS
+ auto& leap_seconds = get_tzdb_list().front().leap_seconds;
+ if (leap_seconds.empty() && tzh_leapcnt > 0)
+ leap_seconds = load_leaps<TimeType>(inf, tzh_leapcnt);
+#endif
+ ttinfos_.reserve(infos.size());
+ for (auto& info : infos)
+ {
+ ttinfos_.push_back({seconds{info.tt_gmtoff},
+ abbrev.c_str() + info.tt_abbrind,
+ info.tt_isdst != 0});
+ }
+ auto i = 0u;
+ if (transitions_.empty() || transitions_.front().timepoint != min_seconds)
+ {
+ transitions_.emplace(transitions_.begin(), min_seconds);
+ auto tf = std::find_if(ttinfos_.begin(), ttinfos_.end(),
+ [](const expanded_ttinfo& ti)
+ {return ti.is_dst == 0;});
+ if (tf == ttinfos_.end())
+ tf = ttinfos_.begin();
+ transitions_[i].info = &*tf;
+ ++i;
+ }
+ for (auto j = 0u; i < transitions_.size(); ++i, ++j)
+ transitions_[i].info = ttinfos_.data() + indices[j];
+}
+
+void
+time_zone::init_impl()
+{
+ using namespace std;
+ using namespace std::chrono;
+ auto name = get_tz_dir() + ('/' + name_);
+ std::ifstream inf(name);
+ if (!inf.is_open())
+ throw std::runtime_error{"Unable to open " + name};
+ inf.exceptions(std::ios::failbit | std::ios::badbit);
+ load_header(inf);
+ auto v = load_version(inf);
+ std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt;
+ skip_reserve(inf);
+ load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ if (v == 0)
+ {
+ load_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ }
+ else
+ {
+#if !defined(NDEBUG)
+ inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+ tzh_ttisstdcnt + tzh_ttisgmtcnt);
+ load_header(inf);
+ auto v2 = load_version(inf);
+ assert(v == v2);
+ skip_reserve(inf);
+#else // defined(NDEBUG)
+ inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+ tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
+#endif // defined(NDEBUG)
+ load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+ tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ load_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
+ }
+#if !MISSING_LEAP_SECONDS
+ if (tzh_leapcnt > 0)
+ {
+ auto& leap_seconds = get_tzdb_list().front().leap_seconds;
+ auto itr = leap_seconds.begin();
+ auto l = itr->date();
+ seconds leap_count{0};
+ for (auto t = std::upper_bound(transitions_.begin(), transitions_.end(), l,
+ [](const sys_seconds& x, const transition& ct)
+ {
+ return x < ct.timepoint;
+ });
+ t != transitions_.end(); ++t)
+ {
+ while (t->timepoint >= l)
+ {
+ ++leap_count;
+ if (++itr == leap_seconds.end())
+ l = sys_days(max_year/max_day);
+ else
+ l = itr->date() + leap_count;
+ }
+ t->timepoint -= leap_count;
+ }
+ }
+#endif // !MISSING_LEAP_SECONDS
+ auto b = transitions_.begin();
+ auto i = transitions_.end();
+ if (i != b)
+ {
+ for (--i; i != b; --i)
+ {
+ if (i->info->offset == i[-1].info->offset &&
+ i->info->abbrev == i[-1].info->abbrev &&
+ i->info->is_dst == i[-1].info->is_dst)
+ i = transitions_.erase(i);
+ }
+ }
+}
+
+void
+time_zone::init() const
+{
+ std::call_once(*adjusted_, [this]() {const_cast<time_zone*>(this)->init_impl();});
+}
+
+sys_info
+time_zone::load_sys_info(std::vector<detail::transition>::const_iterator i) const
+{
+ using namespace std::chrono;
+ assert(!transitions_.empty());
+ assert(i != transitions_.begin());
+ sys_info r;
+ r.begin = i[-1].timepoint;
+ r.end = i != transitions_.end() ? i->timepoint :
+ sys_seconds(sys_days(year::max()/max_day));
+ r.offset = i[-1].info->offset;
+ r.save = i[-1].info->is_dst ? minutes{1} : minutes{0};
+ r.abbrev = i[-1].info->abbrev;
+ return r;
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp) const
+{
+ using namespace std;
+ init();
+ return load_sys_info(upper_bound(transitions_.begin(), transitions_.end(), tp,
+ [](const sys_seconds& x, const transition& t)
+ {
+ return x < t.timepoint;
+ }));
+}
+
+local_info
+time_zone::get_info_impl(local_seconds tp) const
+{
+ using namespace std::chrono;
+ init();
+ local_info i;
+ i.result = local_info::unique;
+ auto tr = upper_bound(transitions_.begin(), transitions_.end(), tp,
+ [](const local_seconds& x, const transition& t)
+ {
+ return sys_seconds{x.time_since_epoch()} -
+ t.info->offset < t.timepoint;
+ });
+ i.first = load_sys_info(tr);
+ auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
+ if (tps < i.first.begin + days{1} && tr != transitions_.begin())
+ {
+ i.second = load_sys_info(--tr);
+ tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+ if (tps < i.second.end)
+ {
+ i.result = local_info::ambiguous;
+ std::swap(i.first, i.second);
+ }
+ else
+ {
+ i.second = {};
+ }
+ }
+ else if (tps >= i.first.end && tr != transitions_.end())
+ {
+ i.second = load_sys_info(++tr);
+ tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+ if (tps < i.second.begin)
+ i.result = local_info::nonexistent;
+ else
+ i.second = {};
+ }
+ return i;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const time_zone& z)
+{
+ using namespace std::chrono;
+ z.init();
+ os << z.name_ << '\n';
+ os << "Initially: ";
+ auto const& t = z.transitions_.front();
+ if (t.info->offset >= seconds{0})
+ os << '+';
+ os << make_time(t.info->offset);
+ if (t.info->is_dst > 0)
+ os << " daylight ";
+ else
+ os << " standard ";
+ os << t.info->abbrev << '\n';
+ for (auto i = std::next(z.transitions_.cbegin()); i < z.transitions_.cend(); ++i)
+ os << *i << '\n';
+ return os;
+}
+
+#if !MISSING_LEAP_SECONDS
+
+leap_second::leap_second(const sys_seconds& s, detail::undocumented)
+ : date_(s)
+{
+}
+
+#endif // !MISSING_LEAP_SECONDS
+
+#else // !USE_OS_TZDB
+
+time_zone::time_zone(const std::string& s, detail::undocumented)
+ : adjusted_(new std::once_flag{})
+{
+ try
+ {
+ using namespace date;
+ std::istringstream in(s);
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::string word;
+ in >> word >> name_;
+ parse_info(in);
+ }
+ catch (...)
+ {
+ std::cerr << s << '\n';
+ std::cerr << *this << '\n';
+ zonelets_.pop_back();
+ throw;
+ }
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp) const
+{
+ return get_info_impl(tp, static_cast<int>(tz::utc));
+}
+
+local_info
+time_zone::get_info_impl(local_seconds tp) const
+{
+ using namespace std::chrono;
+ local_info i{};
+ i.first = get_info_impl(sys_seconds{tp.time_since_epoch()}, static_cast<int>(tz::local));
+ auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
+ if (tps < i.first.begin)
+ {
+ i.second = std::move(i.first);
+ i.first = get_info_impl(i.second.begin - seconds{1}, static_cast<int>(tz::utc));
+ i.result = local_info::nonexistent;
+ }
+ else if (i.first.end - tps <= days{1})
+ {
+ i.second = get_info_impl(i.first.end, static_cast<int>(tz::utc));
+ tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+ if (tps >= i.second.begin)
+ i.result = local_info::ambiguous;
+ else
+ i.second = {};
+ }
+ return i;
+}
+
+void
+time_zone::add(const std::string& s)
+{
+ try
+ {
+ std::istringstream in(s);
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ ws(in);
+ if (!in.eof() && in.peek() != '#')
+ parse_info(in);
+ }
+ catch (...)
+ {
+ std::cerr << s << '\n';
+ std::cerr << *this << '\n';
+ zonelets_.pop_back();
+ throw;
+ }
+}
+
+void
+time_zone::parse_info(std::istream& in)
+{
+ using namespace date;
+ using namespace std::chrono;
+ zonelets_.emplace_back();
+ auto& zonelet = zonelets_.back();
+ zonelet.gmtoff_ = parse_signed_time(in);
+ in >> zonelet.u.rule_;
+ if (zonelet.u.rule_ == "-")
+ zonelet.u.rule_.clear();
+ in >> zonelet.format_;
+ if (!in.eof())
+ ws(in);
+ if (in.eof() || in.peek() == '#')
+ {
+ zonelet.until_year_ = year::max();
+ zonelet.until_date_ = MonthDayTime(max_day, tz::utc);
+ }
+ else
+ {
+ int y;
+ in >> y;
+ zonelet.until_year_ = year{y};
+ in >> zonelet.until_date_;
+ zonelet.until_date_.canonicalize(zonelet.until_year_);
+ }
+ if ((zonelet.until_year_ < min_year) ||
+ (zonelets_.size() > 1 && zonelets_.end()[-2].until_year_ > max_year))
+ zonelets_.pop_back();
+}
+
+void
+time_zone::adjust_infos(const std::vector<Rule>& rules)
+{
+ using namespace std::chrono;
+ using namespace date;
+ const zonelet* prev_zonelet = nullptr;
+ for (auto& z : zonelets_)
+ {
+ std::pair<const Rule*, const Rule*> eqr{};
+ std::istringstream in;
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ // Classify info as rule-based, has save, or neither
+ if (!z.u.rule_.empty())
+ {
+ // Find out if this zonelet has a rule or a save
+ eqr = std::equal_range(rules.data(), rules.data() + rules.size(), z.u.rule_);
+ if (eqr.first == eqr.second)
+ {
+ // The rule doesn't exist. Assume this is a save
+ try
+ {
+ using namespace std::chrono;
+ using string = std::string;
+ in.str(z.u.rule_);
+ auto tmp = duration_cast<minutes>(parse_signed_time(in));
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ z.u.rule_.~string();
+ z.tag_ = zonelet::has_save;
+ ::new(&z.u.save_) minutes(tmp);
+#else
+ z.u.rule_.clear();
+ z.tag_ = zonelet::has_save;
+ z.u.save_ = tmp;
+#endif
+ }
+ catch (...)
+ {
+ std::cerr << name_ << " : " << z.u.rule_ << '\n';
+ throw;
+ }
+ }
+ }
+ else
+ {
+ // This zone::zonelet has no rule and no save
+ z.tag_ = zonelet::is_empty;
+ }
+
+ minutes final_save{0};
+ if (z.tag_ == zonelet::has_save)
+ {
+ final_save = z.u.save_;
+ }
+ else if (z.tag_ == zonelet::has_rule)
+ {
+ z.last_rule_ = find_rule_for_zone(eqr, z.until_year_, z.gmtoff_,
+ z.until_date_);
+ if (z.last_rule_.first != nullptr)
+ final_save = z.last_rule_.first->save();
+ }
+ z.until_utc_ = z.until_date_.to_sys(z.until_year_, z.gmtoff_, final_save);
+ z.until_std_ = local_seconds{z.until_utc_.time_since_epoch()} + z.gmtoff_;
+ z.until_loc_ = z.until_std_ + final_save;
+
+ if (z.tag_ == zonelet::has_rule)
+ {
+ if (prev_zonelet != nullptr)
+ {
+ z.first_rule_ = find_rule_for_zone(eqr, prev_zonelet->until_utc_,
+ prev_zonelet->until_std_,
+ prev_zonelet->until_loc_);
+ if (z.first_rule_.first != nullptr)
+ {
+ z.initial_save_ = z.first_rule_.first->save();
+ z.initial_abbrev_ = z.first_rule_.first->abbrev();
+ if (z.first_rule_ != z.last_rule_)
+ {
+ z.first_rule_ = find_next_rule(eqr.first, eqr.second,
+ z.first_rule_.first,
+ z.first_rule_.second);
+ }
+ else
+ {
+ z.first_rule_ = std::make_pair(nullptr, year::min());
+ z.last_rule_ = std::make_pair(nullptr, year::max());
+ }
+ }
+ }
+ if (z.first_rule_.first == nullptr && z.last_rule_.first != nullptr)
+ {
+ z.first_rule_ = std::make_pair(eqr.first, eqr.first->starting_year());
+ z.initial_abbrev_ = find_first_std_rule(eqr)->abbrev();
+ }
+ }
+
+#ifndef NDEBUG
+ if (z.first_rule_.first == nullptr)
+ {
+ assert(z.first_rule_.second == year::min());
+ assert(z.last_rule_.first == nullptr);
+ assert(z.last_rule_.second == year::max());
+ }
+ else
+ {
+ assert(z.last_rule_.first != nullptr);
+ }
+#endif
+ prev_zonelet = &z;
+ }
+}
+
+static
+std::string
+format_abbrev(std::string format, const std::string& variable, std::chrono::seconds off,
+ std::chrono::minutes save)
+{
+ using namespace std::chrono;
+ auto k = format.find("%s");
+ if (k != std::string::npos)
+ {
+ format.replace(k, 2, variable);
+ }
+ else
+ {
+ k = format.find('/');
+ if (k != std::string::npos)
+ {
+ if (save == minutes{0})
+ format.erase(k);
+ else
+ format.erase(0, k+1);
+ }
+ else
+ {
+ k = format.find("%z");
+ if (k != std::string::npos)
+ {
+ std::string temp;
+ if (off < seconds{0})
+ {
+ temp = '-';
+ off = -off;
+ }
+ else
+ temp = '+';
+ auto h = date::floor<hours>(off);
+ off -= h;
+ if (h < hours{10})
+ temp += '0';
+ temp += std::to_string(h.count());
+ if (off > seconds{0})
+ {
+ auto m = date::floor<minutes>(off);
+ off -= m;
+ if (m < minutes{10})
+ temp += '0';
+ temp += std::to_string(m.count());
+ if (off > seconds{0})
+ {
+ if (off < seconds{10})
+ temp += '0';
+ temp += std::to_string(off.count());
+ }
+ }
+ format.replace(k, 2, temp);
+ }
+ }
+ }
+ return format;
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp, int tz_int) const
+{
+ using namespace std::chrono;
+ using namespace date;
+ tz timezone = static_cast<tz>(tz_int);
+ assert(timezone != tz::standard);
+ auto y = year_month_day(floor<days>(tp)).year();
+ if (y < min_year || y > max_year)
+ throw std::runtime_error("The year " + std::to_string(static_cast<int>(y)) +
+ " is out of range:[" + std::to_string(static_cast<int>(min_year)) + ", "
+ + std::to_string(static_cast<int>(max_year)) + "]");
+ std::call_once(*adjusted_,
+ [this]()
+ {
+ const_cast<time_zone*>(this)->adjust_infos(get_tzdb().rules);
+ });
+ auto i = std::upper_bound(zonelets_.begin(), zonelets_.end(), tp,
+ [timezone](sys_seconds t, const zonelet& zl)
+ {
+ return timezone == tz::utc ? t < zl.until_utc_ :
+ t < sys_seconds{zl.until_loc_.time_since_epoch()};
+ });
+
+ sys_info r{};
+ if (i != zonelets_.end())
+ {
+ if (i->tag_ == zonelet::has_save)
+ {
+ if (i != zonelets_.begin())
+ r.begin = i[-1].until_utc_;
+ else
+ r.begin = sys_days(year::min()/min_day);
+ r.end = i->until_utc_;
+ r.offset = i->gmtoff_ + i->u.save_;
+ r.save = i->u.save_;
+ }
+ else if (i->u.rule_.empty())
+ {
+ if (i != zonelets_.begin())
+ r.begin = i[-1].until_utc_;
+ else
+ r.begin = sys_days(year::min()/min_day);
+ r.end = i->until_utc_;
+ r.offset = i->gmtoff_;
+ }
+ else
+ {
+ r = find_rule(i->first_rule_, i->last_rule_, y, i->gmtoff_,
+ MonthDayTime(local_seconds{tp.time_since_epoch()}, timezone),
+ i->initial_save_, i->initial_abbrev_);
+ r.offset = i->gmtoff_ + r.save;
+ if (i != zonelets_.begin() && r.begin < i[-1].until_utc_)
+ r.begin = i[-1].until_utc_;
+ if (r.end > i->until_utc_)
+ r.end = i->until_utc_;
+ }
+ r.abbrev = format_abbrev(i->format_, r.abbrev, r.offset, r.save);
+ assert(r.begin < r.end);
+ }
+ return r;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const time_zone& z)
+{
+ using namespace date;
+ using namespace std::chrono;
+ detail::save_ostream<char> _(os);
+ os.fill(' ');
+ os.flags(std::ios::dec | std::ios::left);
+ std::call_once(*z.adjusted_,
+ [&z]()
+ {
+ const_cast<time_zone&>(z).adjust_infos(get_tzdb().rules);
+ });
+ os.width(35);
+ os << z.name_;
+ std::string indent;
+ for (auto const& s : z.zonelets_)
+ {
+ os << indent;
+ if (s.gmtoff_ >= seconds{0})
+ os << ' ';
+ os << make_time(s.gmtoff_) << " ";
+ os.width(15);
+ if (s.tag_ != zonelet::has_save)
+ os << s.u.rule_;
+ else
+ {
+ std::ostringstream tmp;
+ tmp << make_time(s.u.save_);
+ os << tmp.str();
+ }
+ os.width(8);
+ os << s.format_ << " ";
+ os << s.until_year_ << ' ' << s.until_date_;
+ os << " " << s.until_utc_ << " UTC";
+ os << " " << s.until_std_ << " STD";
+ os << " " << s.until_loc_;
+ os << " " << make_time(s.initial_save_);
+ os << " " << s.initial_abbrev_;
+ if (s.first_rule_.first != nullptr)
+ os << " {" << *s.first_rule_.first << ", " << s.first_rule_.second << '}';
+ else
+ os << " {" << "nullptr" << ", " << s.first_rule_.second << '}';
+ if (s.last_rule_.first != nullptr)
+ os << " {" << *s.last_rule_.first << ", " << s.last_rule_.second << '}';
+ else
+ os << " {" << "nullptr" << ", " << s.last_rule_.second << '}';
+ os << '\n';
+ if (indent.empty())
+ indent = std::string(35, ' ');
+ }
+ return os;
+}
+
+#endif // !USE_OS_TZDB
+
+#if !MISSING_LEAP_SECONDS
+
+std::ostream&
+operator<<(std::ostream& os, const leap_second& x)
+{
+ using namespace date;
+ return os << x.date_ << " +";
+}
+
+#endif // !MISSING_LEAP_SECONDS
+
+#if USE_OS_TZDB
+
+# ifdef __APPLE__
+static
+std::string
+get_version()
+{
+ using namespace std;
+ auto path = get_tz_dir() + string("/+VERSION");
+ ifstream in{path};
+ string version;
+ in >> version;
+ if (in.fail())
+ throw std::runtime_error("Unable to get Timezone database version from " + path);
+ return version;
+}
+# endif
+
+static
+std::unique_ptr<tzdb>
+init_tzdb()
+{
+ std::unique_ptr<tzdb> db(new tzdb);
+
+ //Iterate through folders
+ std::queue<std::string> subfolders;
+ subfolders.emplace(get_tz_dir());
+ struct dirent* d;
+ struct stat s;
+ while (!subfolders.empty())
+ {
+ auto dirname = std::move(subfolders.front());
+ subfolders.pop();
+ auto dir = opendir(dirname.c_str());
+ if (!dir)
+ continue;
+ while ((d = readdir(dir)) != nullptr)
+ {
+ // Ignore these files:
+ if (d->d_name[0] == '.' || // curdir, prevdir, hidden
+ memcmp(d->d_name, "posix", 5) == 0 || // starts with posix
+ strcmp(d->d_name, "Factory") == 0 ||
+ strcmp(d->d_name, "iso3166.tab") == 0 ||
+ strcmp(d->d_name, "right") == 0 ||
+ strcmp(d->d_name, "+VERSION") == 0 ||
+ strcmp(d->d_name, "zone.tab") == 0 ||
+ strcmp(d->d_name, "zone1970.tab") == 0 ||
+ strcmp(d->d_name, "tzdata.zi") == 0 ||
+ strcmp(d->d_name, "leapseconds") == 0 ||
+ strcmp(d->d_name, "leap-seconds.list") == 0 )
+ continue;
+ auto subname = dirname + folder_delimiter + d->d_name;
+ if(stat(subname.c_str(), &s) == 0)
+ {
+ if(S_ISDIR(s.st_mode))
+ {
+ if(!S_ISLNK(s.st_mode))
+ {
+ subfolders.push(subname);
+ }
+ }
+ else
+ {
+ db->zones.emplace_back(subname.substr(get_tz_dir().size()+1),
+ detail::undocumented{});
+ }
+ }
+ }
+ closedir(dir);
+ }
+ db->zones.shrink_to_fit();
+ std::sort(db->zones.begin(), db->zones.end());
+# if !MISSING_LEAP_SECONDS
+ std::ifstream in(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC",
+ std::ios_base::binary);
+ if (in)
+ {
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ db->leap_seconds = load_just_leaps(in);
+ }
+ else
+ {
+ in.clear();
+ in.open(get_tz_dir() + std::string(1, folder_delimiter) +
+ "UTC", std::ios_base::binary);
+ if (!in)
+ throw std::runtime_error("Unable to extract leap second information");
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ db->leap_seconds = load_just_leaps(in);
+ }
+# endif // !MISSING_LEAP_SECONDS
+# ifdef __APPLE__
+ db->version = get_version();
+# endif
+ return db;
+}
+
+#else // !USE_OS_TZDB
+
+// time_zone_link
+
+time_zone_link::time_zone_link(const std::string& s)
+{
+ using namespace date;
+ std::istringstream in(s);
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::string word;
+ in >> word >> target_ >> name_;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const time_zone_link& x)
+{
+ using namespace date;
+ detail::save_ostream<char> _(os);
+ os.fill(' ');
+ os.flags(std::ios::dec | std::ios::left);
+ os.width(35);
+ return os << x.name_ << " --> " << x.target_;
+}
+
+// leap_second
+
+leap_second::leap_second(const std::string& s, detail::undocumented)
+{
+ using namespace date;
+ std::istringstream in(s);
+ in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::string word;
+ int y;
+ MonthDayTime date;
+ in >> word >> y >> date;
+ date_ = date.to_time_point(year(y));
+}
+
+static
+bool
+file_exists(const std::string& filename)
+{
+#ifdef _WIN32
+ return ::_access(filename.c_str(), 0) == 0;
+#else
+ return ::access(filename.c_str(), F_OK) == 0;
+#endif
+}
+
+#if HAS_REMOTE_API
+
+// CURL tools
+
+static
+int
+curl_global()
+{
+ if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
+ throw std::runtime_error("CURL global initialization failed");
+ return 0;
+}
+
+namespace
+{
+
+struct curl_deleter
+{
+ void operator()(CURL* p) const
+ {
+ ::curl_easy_cleanup(p);
+ }
+};
+
+} // unnamed namespace
+
+static
+std::unique_ptr<CURL, curl_deleter>
+curl_init()
+{
+ static const auto curl_is_now_initiailized = curl_global();
+ (void)curl_is_now_initiailized;
+ return std::unique_ptr<CURL, curl_deleter>{::curl_easy_init()};
+}
+
+static
+bool
+download_to_string(const std::string& url, std::string& str)
+{
+ str.clear();
+ auto curl = curl_init();
+ if (!curl)
+ return false;
+ std::string version;
+ curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, "curl");
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+ curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
+ void* userp) -> std::size_t
+ {
+ auto& userstr = *static_cast<std::string*>(userp);
+ auto realsize = size * nmemb;
+ userstr.append(contents, realsize);
+ return realsize;
+ };
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &str);
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
+ auto res = curl_easy_perform(curl.get());
+ return (res == CURLE_OK);
+}
+
+namespace
+{
+ enum class download_file_options { binary, text };
+}
+
+static
+bool
+download_to_file(const std::string& url, const std::string& local_filename,
+ download_file_options opts, char* error_buffer)
+{
+ auto curl = curl_init();
+ if (!curl)
+ return false;
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
+ if (error_buffer)
+ curl_easy_setopt(curl.get(), CURLOPT_ERRORBUFFER, error_buffer);
+ curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
+ void* userp) -> std::size_t
+ {
+ auto& of = *static_cast<std::ofstream*>(userp);
+ auto realsize = size * nmemb;
+ of.write(contents, static_cast<std::streamsize>(realsize));
+ return realsize;
+ };
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
+ decltype(curl_easy_perform(curl.get())) res;
+ {
+ std::ofstream of(local_filename,
+ opts == download_file_options::binary ?
+ std::ofstream::out | std::ofstream::binary :
+ std::ofstream::out);
+ of.exceptions(std::ios::badbit);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &of);
+ res = curl_easy_perform(curl.get());
+ }
+ return res == CURLE_OK;
+}
+
+std::string
+remote_version()
+{
+ std::string version;
+ std::string str;
+ if (download_to_string("https://www.iana.org/time-zones", str))
+ {
+ CONSTDATA char db[] = "/time-zones/releases/tzdata";
+ CONSTDATA auto db_size = sizeof(db) - 1;
+ auto p = str.find(db, 0, db_size);
+ const int ver_str_len = 5;
+ if (p != std::string::npos && p + (db_size + ver_str_len) <= str.size())
+ version = str.substr(p + db_size, ver_str_len);
+ }
+ return version;
+}
+
+
+// TODO! Using system() create a process and a console window.
+// This is useful to see what errors may occur but is slow and distracting.
+// Consider implementing this functionality more directly, such as
+// using _mkdir and CreateProcess etc.
+// But use the current means now as matches Unix implementations and while
+// in proof of concept / testing phase.
+// TODO! Use <filesystem> eventually.
+static
+bool
+remove_folder_and_subfolders(const std::string& folder)
+{
+# ifdef _WIN32
+# if USE_SHELL_API
+ // Delete the folder contents by deleting the folder.
+ std::string cmd = "rd /s /q \"";
+ cmd += folder;
+ cmd += '\"';
+ return std::system(cmd.c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ // Create a buffer containing the path to delete. It must be terminated
+ // by two nuls. Who designs these API's...
+ std::vector<char> from;
+ from.assign(folder.begin(), folder.end());
+ from.push_back('\0');
+ from.push_back('\0');
+ SHFILEOPSTRUCT fo{}; // Zero initialize.
+ fo.wFunc = FO_DELETE;
+ fo.pFrom = from.data();
+ fo.fFlags = FOF_NO_UI;
+ int ret = SHFileOperation(&fo);
+ if (ret == 0 && !fo.fAnyOperationsAborted)
+ return true;
+ return false;
+# endif // !USE_SHELL_API
+# else // !_WIN32
+# if USE_SHELL_API
+ return std::system(("rm -R " + folder).c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ struct dir_deleter {
+ dir_deleter() {}
+ void operator()(DIR* d) const
+ {
+ if (d != nullptr)
+ {
+ int result = closedir(d);
+ assert(result == 0);
+ }
+ }
+ };
+ using closedir_ptr = std::unique_ptr<DIR, dir_deleter>;
+
+ std::string filename;
+ struct stat statbuf;
+ std::size_t folder_len = folder.length();
+ struct dirent* p = nullptr;
+
+ closedir_ptr d(opendir(folder.c_str()));
+ bool r = d.get() != nullptr;
+ while (r && (p=readdir(d.get())) != nullptr)
+ {
+ if (strcmp(p->d_name, ".") == 0 || strcmp(p->d_name, "..") == 0)
+ continue;
+
+ // + 2 for path delimiter and nul terminator.
+ std::size_t buf_len = folder_len + strlen(p->d_name) + 2;
+ filename.resize(buf_len);
+ std::size_t path_len = static_cast<std::size_t>(
+ snprintf(&filename[0], buf_len, "%s/%s", folder.c_str(), p->d_name));
+ assert(path_len == buf_len - 1);
+ filename.resize(path_len);
+
+ if (stat(filename.c_str(), &statbuf) == 0)
+ r = S_ISDIR(statbuf.st_mode)
+ ? remove_folder_and_subfolders(filename)
+ : unlink(filename.c_str()) == 0;
+ }
+ d.reset();
+
+ if (r)
+ r = rmdir(folder.c_str()) == 0;
+
+ return r;
+# endif // !USE_SHELL_API
+# endif // !_WIN32
+}
+
+static
+bool
+make_directory(const std::string& folder)
+{
+# ifdef _WIN32
+# if USE_SHELL_API
+ // Re-create the folder.
+ std::string cmd = "mkdir \"";
+ cmd += folder;
+ cmd += '\"';
+ return std::system(cmd.c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ return _mkdir(folder.c_str()) == 0;
+# endif // !USE_SHELL_API
+# else // !_WIN32
+# if USE_SHELL_API
+ return std::system(("mkdir -p " + folder).c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ return mkdir(folder.c_str(), 0777) == 0;
+# endif // !USE_SHELL_API
+# endif // !_WIN32
+}
+
+static
+bool
+delete_file(const std::string& file)
+{
+# ifdef _WIN32
+# if USE_SHELL_API
+ std::string cmd = "del \"";
+ cmd += file;
+ cmd += '\"';
+ return std::system(cmd.c_str()) == 0;
+# else // !USE_SHELL_API
+ return _unlink(file.c_str()) == 0;
+# endif // !USE_SHELL_API
+# else // !_WIN32
+# if USE_SHELL_API
+ return std::system(("rm " + file).c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ return unlink(file.c_str()) == 0;
+# endif // !USE_SHELL_API
+# endif // !_WIN32
+}
+
+# ifdef _WIN32
+
+static
+bool
+move_file(const std::string& from, const std::string& to)
+{
+# if USE_SHELL_API
+ std::string cmd = "move \"";
+ cmd += from;
+ cmd += "\" \"";
+ cmd += to;
+ cmd += '\"';
+ return std::system(cmd.c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ return !!::MoveFile(from.c_str(), to.c_str());
+# endif // !USE_SHELL_API
+}
+
+// Usually something like "c:\Program Files".
+static
+std::string
+get_program_folder()
+{
+ return get_known_folder(FOLDERID_ProgramFiles);
+}
+
+// Note folder can and usually does contain spaces.
+static
+std::string
+get_unzip_program()
+{
+ std::string path;
+
+ // 7-Zip appears to note its location in the registry.
+ // If that doesn't work, fall through and take a guess, but it will likely be wrong.
+ HKEY hKey = nullptr;
+ if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\7-Zip", 0, KEY_READ, &hKey) == ERROR_SUCCESS)
+ {
+ char value_buffer[MAX_PATH + 1]; // fyi 260 at time of writing.
+ // in/out parameter. Documentation say that size is a count of bytes not chars.
+ DWORD size = sizeof(value_buffer) - sizeof(value_buffer[0]);
+ DWORD tzi_type = REG_SZ;
+ // Testing shows Path key value is "C:\Program Files\7-Zip\" i.e. always with trailing \.
+ bool got_value = (RegQueryValueExA(hKey, "Path", nullptr, &tzi_type,
+ reinterpret_cast<LPBYTE>(value_buffer), &size) == ERROR_SUCCESS);
+ RegCloseKey(hKey); // Close now incase of throw later.
+ if (got_value)
+ {
+ // Function does not guarantee to null terminate.
+ value_buffer[size / sizeof(value_buffer[0])] = '\0';
+ path = value_buffer;
+ if (!path.empty())
+ {
+ path += "7z.exe";
+ return path;
+ }
+ }
+ }
+ path += get_program_folder();
+ path += folder_delimiter;
+ path += "7-Zip\\7z.exe";
+ return path;
+}
+
+# if !USE_SHELL_API
+static
+int
+run_program(const std::string& command)
+{
+ STARTUPINFO si{};
+ si.cb = sizeof(si);
+ PROCESS_INFORMATION pi{};
+
+ // Allegedly CreateProcess overwrites the command line. Ugh.
+ std::string mutable_command(command);
+ if (CreateProcess(nullptr, &mutable_command[0],
+ nullptr, nullptr, FALSE, CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi))
+ {
+ WaitForSingleObject(pi.hProcess, INFINITE);
+ DWORD exit_code;
+ bool got_exit_code = !!GetExitCodeProcess(pi.hProcess, &exit_code);
+ CloseHandle(pi.hProcess);
+ CloseHandle(pi.hThread);
+ // Not 100% sure about this still active thing is correct,
+ // but I'm going with it because I *think* WaitForSingleObject might
+ // return in some cases without INFINITE-ly waiting.
+ // But why/wouldn't GetExitCodeProcess return false in that case?
+ if (got_exit_code && exit_code != STILL_ACTIVE)
+ return static_cast<int>(exit_code);
+ }
+ return EXIT_FAILURE;
+}
+# endif // !USE_SHELL_API
+
+static
+std::string
+get_download_tar_file(const std::string& version)
+{
+ auto file = get_install();
+ file += folder_delimiter;
+ file += "tzdata";
+ file += version;
+ file += ".tar";
+ return file;
+}
+
+static
+bool
+extract_gz_file(const std::string& version, const std::string& gz_file,
+ const std::string& dest_folder)
+{
+ auto unzip_prog = get_unzip_program();
+ bool unzip_result = false;
+ // Use the unzip program to extract the tar file from the archive.
+
+ // Aim to create a string like:
+ // "C:\Program Files\7-Zip\7z.exe" x "C:\Users\SomeUser\Downloads\tzdata2016d.tar.gz"
+ // -o"C:\Users\SomeUser\Downloads\tzdata"
+ std::string cmd;
+ cmd = '\"';
+ cmd += unzip_prog;
+ cmd += "\" x \"";
+ cmd += gz_file;
+ cmd += "\" -o\"";
+ cmd += dest_folder;
+ cmd += '\"';
+
+# if USE_SHELL_API
+ // When using shelling out with std::system() extra quotes are required around the
+ // whole command. It's weird but necessary it seems, see:
+ // http://stackoverflow.com/q/27975969/576911
+
+ cmd = "\"" + cmd + "\"";
+ if (std::system(cmd.c_str()) == EXIT_SUCCESS)
+ unzip_result = true;
+# else // !USE_SHELL_API
+ if (run_program(cmd) == EXIT_SUCCESS)
+ unzip_result = true;
+# endif // !USE_SHELL_API
+ if (unzip_result)
+ delete_file(gz_file);
+
+ // Use the unzip program extract the data from the tar file that was
+ // just extracted from the archive.
+ auto tar_file = get_download_tar_file(version);
+ cmd = '\"';
+ cmd += unzip_prog;
+ cmd += "\" x \"";
+ cmd += tar_file;
+ cmd += "\" -o\"";
+ cmd += get_install();
+ cmd += '\"';
+# if USE_SHELL_API
+ cmd = "\"" + cmd + "\"";
+ if (std::system(cmd.c_str()) == EXIT_SUCCESS)
+ unzip_result = true;
+# else // !USE_SHELL_API
+ if (run_program(cmd) == EXIT_SUCCESS)
+ unzip_result = true;
+# endif // !USE_SHELL_API
+
+ if (unzip_result)
+ delete_file(tar_file);
+
+ return unzip_result;
+}
+
+static
+std::string
+get_download_mapping_file(const std::string& version)
+{
+ auto file = get_install() + version + "windowsZones.xml";
+ return file;
+}
+
+# else // !_WIN32
+
+# if !USE_SHELL_API
+static
+int
+run_program(const char* prog, const char*const args[])
+{
+ pid_t pid = fork();
+ if (pid == -1) // Child failed to start.
+ return EXIT_FAILURE;
+
+ if (pid != 0)
+ {
+ // We are in the parent. Child started. Wait for it.
+ pid_t ret;
+ int status;
+ while ((ret = waitpid(pid, &status, 0)) == -1)
+ {
+ if (errno != EINTR)
+ break;
+ }
+ if (ret != -1)
+ {
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+ }
+ printf("Child issues!\n");
+
+ return EXIT_FAILURE; // Not sure what status of child is.
+ }
+ else // We are in the child process. Start the program the parent wants to run.
+ {
+
+ if (execv(prog, const_cast<char**>(args)) == -1) // Does not return.
+ {
+ perror("unreachable 0\n");
+ _Exit(127);
+ }
+ printf("unreachable 2\n");
+ }
+ printf("unreachable 2\n");
+ // Unreachable.
+ assert(false);
+ exit(EXIT_FAILURE);
+ return EXIT_FAILURE;
+}
+# endif // !USE_SHELL_API
+
+static
+bool
+extract_gz_file(const std::string&, const std::string& gz_file, const std::string&)
+{
+# if USE_SHELL_API
+ bool unzipped = std::system(("tar -xzf " + gz_file + " -C " + get_install()).c_str()) == EXIT_SUCCESS;
+# else // !USE_SHELL_API
+ const char prog[] = {"/usr/bin/tar"};
+ const char*const args[] =
+ {
+ prog, "-xzf", gz_file.c_str(), "-C", get_install().c_str(), nullptr
+ };
+ bool unzipped = (run_program(prog, args) == EXIT_SUCCESS);
+# endif // !USE_SHELL_API
+ if (unzipped)
+ {
+ delete_file(gz_file);
+ return true;
+ }
+ return false;
+}
+
+# endif // !_WIN32
+
+bool
+remote_download(const std::string& version, char* error_buffer)
+{
+ assert(!version.empty());
+
+# ifdef _WIN32
+ // Download folder should be always available for Windows
+# else // !_WIN32
+ // Create download folder if it does not exist on UNIX system
+ auto download_folder = get_install();
+ if (!file_exists(download_folder))
+ {
+ if (!make_directory(download_folder))
+ return false;
+ }
+# endif // _WIN32
+
+ auto url = "https://data.iana.org/time-zones/releases/tzdata" + version +
+ ".tar.gz";
+ bool result = download_to_file(url, get_download_gz_file(version),
+ download_file_options::binary, error_buffer);
+# ifdef _WIN32
+ if (result)
+ {
+ auto mapping_file = get_download_mapping_file(version);
+ result = download_to_file(
+ "https://raw.githubusercontent.com/unicode-org/cldr/master/"
+ "common/supplemental/windowsZones.xml",
+ mapping_file, download_file_options::text, error_buffer);
+ }
+# endif // _WIN32
+ return result;
+}
+
+bool
+remote_install(const std::string& version)
+{
+ auto success = false;
+ assert(!version.empty());
+
+ std::string install = get_install();
+ auto gz_file = get_download_gz_file(version);
+ if (file_exists(gz_file))
+ {
+ if (file_exists(install))
+ remove_folder_and_subfolders(install);
+ if (make_directory(install))
+ {
+ if (extract_gz_file(version, gz_file, install))
+ success = true;
+# ifdef _WIN32
+ auto mapping_file_source = get_download_mapping_file(version);
+ auto mapping_file_dest = get_install();
+ mapping_file_dest += folder_delimiter;
+ mapping_file_dest += "windowsZones.xml";
+ if (!move_file(mapping_file_source, mapping_file_dest))
+ success = false;
+# endif // _WIN32
+ }
+ }
+ return success;
+}
+
+#endif // HAS_REMOTE_API
+
+static
+std::string
+get_version(const std::string& path)
+{
+ std::string version;
+ std::ifstream infile(path + "version");
+ if (infile.is_open())
+ {
+ infile >> version;
+ if (!infile.fail())
+ return version;
+ }
+ else
+ {
+ infile.open(path + "NEWS");
+ while (infile)
+ {
+ infile >> version;
+ if (version == "Release")
+ {
+ infile >> version;
+ return version;
+ }
+ }
+ }
+ throw std::runtime_error("Unable to get Timezone database version from " + path);
+}
+
+static
+std::unique_ptr<tzdb>
+init_tzdb()
+{
+ using namespace date;
+ const std::string install = get_install();
+ const std::string path = install + folder_delimiter;
+ std::string line;
+ bool continue_zone = false;
+ std::unique_ptr<tzdb> db(new tzdb);
+
+#if AUTO_DOWNLOAD
+ if (!file_exists(install))
+ {
+ auto rv = remote_version();
+ if (!rv.empty() && remote_download(rv))
+ {
+ if (!remote_install(rv))
+ {
+ std::string msg = "Timezone database version \"";
+ msg += rv;
+ msg += "\" did not install correctly to \"";
+ msg += install;
+ msg += "\"";
+ throw std::runtime_error(msg);
+ }
+ }
+ if (!file_exists(install))
+ {
+ std::string msg = "Timezone database not found at \"";
+ msg += install;
+ msg += "\"";
+ throw std::runtime_error(msg);
+ }
+ db->version = get_version(path);
+ }
+ else
+ {
+ db->version = get_version(path);
+ auto rv = remote_version();
+ if (!rv.empty() && db->version != rv)
+ {
+ if (remote_download(rv))
+ {
+ remote_install(rv);
+ db->version = get_version(path);
+ }
+ }
+ }
+#else // !AUTO_DOWNLOAD
+ if (!file_exists(install))
+ {
+ std::string msg = "Timezone database not found at \"";
+ msg += install;
+ msg += "\"";
+ throw std::runtime_error(msg);
+ }
+ db->version = get_version(path);
+#endif // !AUTO_DOWNLOAD
+
+ CONSTDATA char*const files[] =
+ {
+ "africa", "antarctica", "asia", "australasia", "backward", "etcetera", "europe",
+ "pacificnew", "northamerica", "southamerica", "systemv", "leapseconds"
+ };
+
+ for (const auto& filename : files)
+ {
+ std::ifstream infile(path + filename);
+ while (infile)
+ {
+ std::getline(infile, line);
+ if (!line.empty() && line[0] != '#')
+ {
+ std::istringstream in(line);
+ std::string word;
+ in >> word;
+ if (word == "Rule")
+ {
+ db->rules.push_back(Rule(line));
+ continue_zone = false;
+ }
+ else if (word == "Link")
+ {
+ db->links.push_back(time_zone_link(line));
+ continue_zone = false;
+ }
+ else if (word == "Leap")
+ {
+ db->leap_seconds.push_back(leap_second(line, detail::undocumented{}));
+ continue_zone = false;
+ }
+ else if (word == "Zone")
+ {
+ db->zones.push_back(time_zone(line, detail::undocumented{}));
+ continue_zone = true;
+ }
+ else if (line[0] == '\t' && continue_zone)
+ {
+ db->zones.back().add(line);
+ }
+ else
+ {
+ std::cerr << line << '\n';
+ }
+ }
+ }
+ }
+ std::sort(db->rules.begin(), db->rules.end());
+ Rule::split_overlaps(db->rules);
+ std::sort(db->zones.begin(), db->zones.end());
+ db->zones.shrink_to_fit();
+ std::sort(db->links.begin(), db->links.end());
+ db->links.shrink_to_fit();
+ std::sort(db->leap_seconds.begin(), db->leap_seconds.end());
+ db->leap_seconds.shrink_to_fit();
+
+#ifdef _WIN32
+ std::string mapping_file = get_install() + folder_delimiter + "windowsZones.xml";
+ db->mappings = load_timezone_mappings_from_xml_file(mapping_file);
+ sort_zone_mappings(db->mappings);
+#endif // _WIN32
+
+ return db;
+}
+
+const tzdb&
+reload_tzdb()
+{
+#if AUTO_DOWNLOAD
+ auto const& v = get_tzdb_list().front().version;
+ if (!v.empty() && v == remote_version())
+ return get_tzdb_list().front();
+#endif // AUTO_DOWNLOAD
+ tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());
+ return get_tzdb_list().front();
+}
+
+#endif // !USE_OS_TZDB
+
+const tzdb&
+get_tzdb()
+{
+ return get_tzdb_list().front();
+}
+
+const time_zone*
+#if HAS_STRING_VIEW
+tzdb::locate_zone(std::string_view tz_name) const
+#else
+tzdb::locate_zone(const std::string& tz_name) const
+#endif
+{
+ auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name,
+#if HAS_STRING_VIEW
+ [](const time_zone& z, const std::string_view& nm)
+#else
+ [](const time_zone& z, const std::string& nm)
+#endif
+ {
+ return z.name() < nm;
+ });
+ if (zi == zones.end() || zi->name() != tz_name)
+ {
+#if !USE_OS_TZDB
+ auto li = std::lower_bound(links.begin(), links.end(), tz_name,
+#if HAS_STRING_VIEW
+ [](const time_zone_link& z, const std::string_view& nm)
+#else
+ [](const time_zone_link& z, const std::string& nm)
+#endif
+ {
+ return z.name() < nm;
+ });
+ if (li != links.end() && li->name() == tz_name)
+ {
+ zi = std::lower_bound(zones.begin(), zones.end(), li->target(),
+ [](const time_zone& z, const std::string& nm)
+ {
+ return z.name() < nm;
+ });
+ if (zi != zones.end() && zi->name() == li->target())
+ return &*zi;
+ }
+#endif // !USE_OS_TZDB
+ throw std::runtime_error(std::string(tz_name) + " not found in timezone database");
+ }
+ return &*zi;
+}
+
+const time_zone*
+#if HAS_STRING_VIEW
+locate_zone(std::string_view tz_name)
+#else
+locate_zone(const std::string& tz_name)
+#endif
+{
+ return get_tzdb().locate_zone(tz_name);
+}
+
+#if USE_OS_TZDB
+
+std::ostream&
+operator<<(std::ostream& os, const tzdb& db)
+{
+ os << "Version: " << db.version << "\n\n";
+ for (const auto& x : db.zones)
+ os << x << '\n';
+#if !MISSING_LEAP_SECONDS
+ os << '\n';
+ for (const auto& x : db.leap_seconds)
+ os << x << '\n';
+#endif // !MISSING_LEAP_SECONDS
+ return os;
+}
+
+#else // !USE_OS_TZDB
+
+std::ostream&
+operator<<(std::ostream& os, const tzdb& db)
+{
+ os << "Version: " << db.version << '\n';
+ std::string title("--------------------------------------------"
+ "--------------------------------------------\n"
+ "Name ""Start Y ""End Y "
+ "Beginning ""Offset "
+ "Designator\n"
+ "--------------------------------------------"
+ "--------------------------------------------\n");
+ int count = 0;
+ for (const auto& x : db.rules)
+ {
+ if (count++ % 50 == 0)
+ os << title;
+ os << x << '\n';
+ }
+ os << '\n';
+ title = std::string("---------------------------------------------------------"
+ "--------------------------------------------------------\n"
+ "Name ""Offset "
+ "Rule ""Abrev ""Until\n"
+ "---------------------------------------------------------"
+ "--------------------------------------------------------\n");
+ count = 0;
+ for (const auto& x : db.zones)
+ {
+ if (count++ % 10 == 0)
+ os << title;
+ os << x << '\n';
+ }
+ os << '\n';
+ title = std::string("---------------------------------------------------------"
+ "--------------------------------------------------------\n"
+ "Alias ""To\n"
+ "---------------------------------------------------------"
+ "--------------------------------------------------------\n");
+ count = 0;
+ for (const auto& x : db.links)
+ {
+ if (count++ % 45 == 0)
+ os << title;
+ os << x << '\n';
+ }
+ os << '\n';
+ title = std::string("---------------------------------------------------------"
+ "--------------------------------------------------------\n"
+ "Leap second on\n"
+ "---------------------------------------------------------"
+ "--------------------------------------------------------\n");
+ os << title;
+ for (const auto& x : db.leap_seconds)
+ os << x << '\n';
+ return os;
+}
+
+#endif // !USE_OS_TZDB
+
+// -----------------------
+
+#ifdef _WIN32
+
+static
+std::string
+getTimeZoneKeyName()
+{
+ DYNAMIC_TIME_ZONE_INFORMATION dtzi{};
+ auto result = GetDynamicTimeZoneInformation(&dtzi);
+ if (result == TIME_ZONE_ID_INVALID)
+ throw std::runtime_error("current_zone(): GetDynamicTimeZoneInformation()"
+ " reported TIME_ZONE_ID_INVALID.");
+ auto wlen = wcslen(dtzi.TimeZoneKeyName);
+ char buf[128] = {};
+ assert(sizeof(buf) >= wlen+1);
+ wcstombs(buf, dtzi.TimeZoneKeyName, wlen);
+ if (strcmp(buf, "Coordinated Universal Time") == 0)
+ return "UTC";
+ return buf;
+}
+
+const time_zone*
+tzdb::current_zone() const
+{
+ std::string win_tzid = getTimeZoneKeyName();
+ std::string standard_tzid;
+ if (!native_to_standard_timezone_name(win_tzid, standard_tzid))
+ {
+ std::string msg;
+ msg = "current_zone() failed: A mapping from the Windows Time Zone id \"";
+ msg += win_tzid;
+ msg += "\" was not found in the time zone mapping database.";
+ throw std::runtime_error(msg);
+ }
+ return locate_zone(standard_tzid);
+}
+
+#else // !_WIN32
+
+#if HAS_STRING_VIEW
+
+static
+std::string_view
+extract_tz_name(char const* rp)
+{
+ using namespace std;
+ string_view result = rp;
+ CONSTDATA string_view zoneinfo = "zoneinfo";
+ size_t pos = result.rfind(zoneinfo);
+ if (pos == result.npos)
+ throw runtime_error(
+ "current_zone() failed to find \"zoneinfo\" in " + string(result));
+ pos = result.find('/', pos);
+ result.remove_prefix(pos + 1);
+ return result;
+}
+
+#else // !HAS_STRING_VIEW
+
+static
+std::string
+extract_tz_name(char const* rp)
+{
+ using namespace std;
+ string result = rp;
+ CONSTDATA char zoneinfo[] = "zoneinfo";
+ size_t pos = result.rfind(zoneinfo);
+ if (pos == result.npos)
+ throw runtime_error(
+ "current_zone() failed to find \"zoneinfo\" in " + result);
+ pos = result.find('/', pos);
+ result.erase(0, pos + 1);
+ return result;
+}
+
+#endif // HAS_STRING_VIEW
+
+static
+bool
+sniff_realpath(const char* timezone)
+{
+ using namespace std;
+ char rp[PATH_MAX+1] = {};
+ if (realpath(timezone, rp) == nullptr)
+ throw system_error(errno, system_category(), "realpath() failed");
+ auto result = extract_tz_name(rp);
+ return result != "posixrules";
+}
+
+const time_zone*
+tzdb::current_zone() const
+{
+ // On some OS's a file called /etc/localtime may
+ // exist and it may be either a real file
+ // containing time zone details or a symlink to such a file.
+ // On MacOS and BSD Unix if this file is a symlink it
+ // might resolve to a path like this:
+ // "/usr/share/zoneinfo/America/Los_Angeles"
+ // If it does, we try to determine the current
+ // timezone from the remainder of the path by removing the prefix
+ // and hoping the rest resolves to a valid timezone.
+ // It may not always work though. If it doesn't then an
+ // exception will be thrown by local_timezone.
+ // The path may also take a relative form:
+ // "../usr/share/zoneinfo/America/Los_Angeles".
+ {
+ struct stat sb;
+ CONSTDATA auto timezone = "/etc/localtime";
+ if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0)
+ {
+ using namespace std;
+ static const bool use_realpath = sniff_realpath(timezone);
+ char rp[PATH_MAX+1] = {};
+ if (use_realpath)
+ {
+ if (realpath(timezone, rp) == nullptr)
+ throw system_error(errno, system_category(), "realpath() failed");
+ }
+ else
+ {
+ if (readlink(timezone, rp, sizeof(rp)-1) <= 0)
+ throw system_error(errno, system_category(), "readlink() failed");
+ }
+ return locate_zone(extract_tz_name(rp));
+ }
+ }
+ // On embedded systems e.g. buildroot with uclibc the timezone is linked
+ // into /etc/TZ which is a symlink to path like this:
+ // "/usr/share/zoneinfo/uclibc/America/Los_Angeles"
+ // If it does, we try to determine the current
+ // timezone from the remainder of the path by removing the prefix
+ // and hoping the rest resolves to valid timezone.
+ // It may not always work though. If it doesn't then an
+ // exception will be thrown by local_timezone.
+ // The path may also take a relative form:
+ // "../usr/share/zoneinfo/uclibc/America/Los_Angeles".
+ {
+ struct stat sb;
+ CONSTDATA auto timezone = "/etc/TZ";
+ if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) {
+ using namespace std;
+ string result;
+ char rp[PATH_MAX+1] = {};
+ if (readlink(timezone, rp, sizeof(rp)-1) > 0)
+ result = string(rp);
+ else
+ throw system_error(errno, system_category(), "readlink() failed");
+
+ const size_t pos = result.find(get_tz_dir());
+ if (pos != result.npos)
+ result.erase(0, get_tz_dir().size() + 1 + pos);
+ return locate_zone(result);
+ }
+ }
+ {
+ // On some versions of some linux distro's (e.g. Ubuntu),
+ // the current timezone might be in the first line of
+ // the /etc/timezone file.
+ std::ifstream timezone_file("/etc/timezone");
+ if (timezone_file.is_open())
+ {
+ std::string result;
+ std::getline(timezone_file, result);
+ if (!result.empty())
+ return locate_zone(result);
+ }
+ // Fall through to try other means.
+ }
+ {
+ // On some versions of some bsd distro's (e.g. FreeBSD),
+ // the current timezone might be in the first line of
+ // the /var/db/zoneinfo file.
+ std::ifstream timezone_file("/var/db/zoneinfo");
+ if (timezone_file.is_open())
+ {
+ std::string result;
+ std::getline(timezone_file, result);
+ if (!result.empty())
+ return locate_zone(result);
+ }
+ // Fall through to try other means.
+ }
+ {
+ // On some versions of some bsd distro's (e.g. iOS),
+ // it is not possible to use file based approach,
+ // we switch to system API, calling functions in
+ // CoreFoundation framework.
+#if TARGET_OS_IPHONE
+ std::string result = date::iOSUtils::get_current_timezone();
+ if (!result.empty())
+ return locate_zone(result);
+#endif
+ // Fall through to try other means.
+ }
+ {
+ // On some versions of some linux distro's (e.g. Red Hat),
+ // the current timezone might be in the first line of
+ // the /etc/sysconfig/clock file as:
+ // ZONE="US/Eastern"
+ std::ifstream timezone_file("/etc/sysconfig/clock");
+ std::string result;
+ while (timezone_file)
+ {
+ std::getline(timezone_file, result);
+ auto p = result.find("ZONE=\"");
+ if (p != std::string::npos)
+ {
+ result.erase(p, p+6);
+ result.erase(result.rfind('"'));
+ return locate_zone(result);
+ }
+ }
+ // Fall through to try other means.
+ }
+ throw std::runtime_error("Could not get current timezone");
+}
+
+#endif // !_WIN32
+
+const time_zone*
+current_zone()
+{
+ return get_tzdb().current_zone();
+}
+
+} // namespace date
+} // namespace arrow_vendored
+
+#if defined(__GNUC__) && __GNUC__ < 5
+# pragma GCC diagnostic pop
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.h
index 23c6742143c..ed76b9d9ef4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz.h
@@ -1,2804 +1,2804 @@
-#ifndef TZ_H
-#define TZ_H
-
-// The MIT License (MIT)
-//
-// Copyright (c) 2015, 2016, 2017 Howard Hinnant
-// Copyright (c) 2017 Jiangang Zhuang
-// Copyright (c) 2017 Aaron Bishop
-// Copyright (c) 2017 Tomasz Kamiński
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// Our apologies. When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
-
-// Get more recent database at http://www.iana.org/time-zones
-
-// The notion of "current timezone" is something the operating system is expected to "just
-// know". How it knows this is system specific. It's often a value set by the user at OS
-// installation time and recorded by the OS somewhere. On Linux and Mac systems the current
-// timezone name is obtained by looking at the name or contents of a particular file on
-// disk. On Windows the current timezone name comes from the registry. In either method,
-// there is no guarantee that the "native" current timezone name obtained will match any
-// of the "Standard" names in this library's "database". On Linux, the names usually do
-// seem to match so mapping functions to map from native to "Standard" are typically not
-// required. On Windows, the names are never "Standard" so mapping is always required.
-// Technically any OS may use the mapping process but currently only Windows does use it.
-
-// NOTE(ARROW): If this is not set, then the library will attempt to
-// use libcurl to obtain a timezone database, and we do not yet have
-// curl in our build toolchain
-#ifndef _WIN32
-#define USE_OS_TZDB 1
-#endif
-
-#ifndef USE_OS_TZDB
-# define USE_OS_TZDB 0
-#endif
-
-#ifndef HAS_REMOTE_API
-# if USE_OS_TZDB == 0
-# ifdef _WIN32
-# define HAS_REMOTE_API 0
-# else
-# define HAS_REMOTE_API 1
-# endif
-# else // HAS_REMOTE_API makes no since when using the OS timezone database
-# define HAS_REMOTE_API 0
-# endif
-#endif
-
-#ifdef __clang__
-# pragma clang diagnostic push
-# pragma clang diagnostic ignored "-Wconstant-logical-operand"
-#endif
-
-static_assert(!(USE_OS_TZDB && HAS_REMOTE_API),
- "USE_OS_TZDB and HAS_REMOTE_API can not be used together");
-
-#ifdef __clang__
-# pragma clang diagnostic pop
-#endif
-
-#ifndef AUTO_DOWNLOAD
-# define AUTO_DOWNLOAD HAS_REMOTE_API
-#endif
-
-static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true,
- "AUTO_DOWNLOAD can not be turned on without HAS_REMOTE_API");
-
-#ifndef USE_SHELL_API
-# define USE_SHELL_API 1
-#endif
-
-#if USE_OS_TZDB
-# ifdef _WIN32
-# error "USE_OS_TZDB can not be used on Windows"
-# endif
-# ifndef MISSING_LEAP_SECONDS
-# ifdef __APPLE__
-# define MISSING_LEAP_SECONDS 1
-# else
-# define MISSING_LEAP_SECONDS 0
-# endif
-# endif
-#else
-# define MISSING_LEAP_SECONDS 0
-#endif
-
-#ifndef HAS_DEDUCTION_GUIDES
-# if __cplusplus >= 201703
-# define HAS_DEDUCTION_GUIDES 1
-# else
-# define HAS_DEDUCTION_GUIDES 0
-# endif
-#endif // HAS_DEDUCTION_GUIDES
-
-#include "date.h"
-
-#if defined(_MSC_VER) && (_MSC_VER < 1900)
-#include "tz_private.h"
-#endif
-
-#include <algorithm>
-#include <atomic>
-#include <cassert>
-#include <chrono>
-#include <istream>
-#include <locale>
-#include <memory>
-#include <mutex>
-#include <ostream>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#ifdef _WIN32
-# ifdef DATE_BUILD_DLL
-# define DATE_API __declspec(dllexport)
-# elif defined(DATE_USE_DLL)
-# define DATE_API __declspec(dllimport)
-# else
-# define DATE_API
-# endif
-#else
-# ifdef DATE_BUILD_DLL
-# define DATE_API __attribute__ ((visibility ("default")))
-# else
-# define DATE_API
-# endif
-#endif
-
-namespace arrow_vendored
-{
-namespace date
-{
-
-enum class choose {earliest, latest};
-
-namespace detail
-{
- struct undocumented;
-
- template<typename T>
- struct nodeduct
- {
- using type = T;
- };
-
- template<typename T>
- using nodeduct_t = typename nodeduct<T>::type;
-}
-
-struct sys_info
-{
- sys_seconds begin;
- sys_seconds end;
- std::chrono::seconds offset;
- std::chrono::minutes save;
- std::string abbrev;
-};
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_info& r)
-{
- os << r.begin << '\n';
- os << r.end << '\n';
- os << make_time(r.offset) << "\n";
- os << make_time(r.save) << "\n";
- os << r.abbrev << '\n';
- return os;
-}
-
-struct local_info
-{
- enum {unique, nonexistent, ambiguous} result;
- sys_info first;
- sys_info second;
-};
-
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const local_info& r)
-{
- if (r.result == local_info::nonexistent)
- os << "nonexistent between\n";
- else if (r.result == local_info::ambiguous)
- os << "ambiguous between\n";
- os << r.first;
- if (r.result != local_info::unique)
- {
- os << "and\n";
- os << r.second;
- }
- return os;
-}
-
-class nonexistent_local_time
- : public std::runtime_error
-{
-public:
- template <class Duration>
- nonexistent_local_time(local_time<Duration> tp, const local_info& i);
-
-private:
- template <class Duration>
- static
- std::string
- make_msg(local_time<Duration> tp, const local_info& i);
-};
-
-template <class Duration>
-inline
-nonexistent_local_time::nonexistent_local_time(local_time<Duration> tp,
- const local_info& i)
- : std::runtime_error(make_msg(tp, i))
-{
-}
-
-template <class Duration>
-std::string
-nonexistent_local_time::make_msg(local_time<Duration> tp, const local_info& i)
-{
- assert(i.result == local_info::nonexistent);
- std::ostringstream os;
- os << tp << " is in a gap between\n"
- << local_seconds{i.first.end.time_since_epoch()} + i.first.offset << ' '
- << i.first.abbrev << " and\n"
- << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' '
- << i.second.abbrev
- << " which are both equivalent to\n"
- << i.first.end << " UTC";
- return os.str();
-}
-
-class ambiguous_local_time
- : public std::runtime_error
-{
-public:
- template <class Duration>
- ambiguous_local_time(local_time<Duration> tp, const local_info& i);
-
-private:
- template <class Duration>
- static
- std::string
- make_msg(local_time<Duration> tp, const local_info& i);
-};
-
-template <class Duration>
-inline
-ambiguous_local_time::ambiguous_local_time(local_time<Duration> tp, const local_info& i)
- : std::runtime_error(make_msg(tp, i))
-{
-}
-
-template <class Duration>
-std::string
-ambiguous_local_time::make_msg(local_time<Duration> tp, const local_info& i)
-{
- assert(i.result == local_info::ambiguous);
- std::ostringstream os;
- os << tp << " is ambiguous. It could be\n"
- << tp << ' ' << i.first.abbrev << " == "
- << tp - i.first.offset << " UTC or\n"
- << tp << ' ' << i.second.abbrev << " == "
- << tp - i.second.offset << " UTC";
- return os.str();
-}
-
-class time_zone;
-
-#if HAS_STRING_VIEW
-DATE_API const time_zone* locate_zone(std::string_view tz_name);
-#else
-DATE_API const time_zone* locate_zone(const std::string& tz_name);
-#endif
-
-DATE_API const time_zone* current_zone();
-
-template <class T>
-struct zoned_traits
-{
-};
-
-template <>
-struct zoned_traits<const time_zone*>
-{
- static
- const time_zone*
- default_zone()
- {
- return date::locate_zone("Etc/UTC");
- }
-
-#if HAS_STRING_VIEW
-
- static
- const time_zone*
- locate_zone(std::string_view name)
- {
- return date::locate_zone(name);
- }
-
-#else // !HAS_STRING_VIEW
-
- static
- const time_zone*
- locate_zone(const std::string& name)
- {
- return date::locate_zone(name);
- }
-
- static
- const time_zone*
- locate_zone(const char* name)
- {
- return date::locate_zone(name);
- }
-
-#endif // !HAS_STRING_VIEW
-};
-
-template <class Duration, class TimeZonePtr>
-class zoned_time;
-
-template <class Duration1, class Duration2, class TimeZonePtr>
-bool
-operator==(const zoned_time<Duration1, TimeZonePtr>& x,
- const zoned_time<Duration2, TimeZonePtr>& y);
-
-template <class Duration, class TimeZonePtr = const time_zone*>
-class zoned_time
-{
-public:
- using duration = typename std::common_type<Duration, std::chrono::seconds>::type;
-
-private:
- TimeZonePtr zone_;
- sys_time<duration> tp_;
-
-public:
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = decltype(zoned_traits<T>::default_zone())>
-#endif
- zoned_time();
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = decltype(zoned_traits<T>::default_zone())>
-#endif
- zoned_time(const sys_time<Duration>& st);
- explicit zoned_time(TimeZonePtr z);
-
-#if HAS_STRING_VIEW
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view()))
- >::value
- >::type>
- explicit zoned_time(std::string_view name);
-#else
-# if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string()))
- >::value
- >::type>
-# endif
- explicit zoned_time(const std::string& name);
-#endif
-
- template <class Duration2,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value
- >::type>
- zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT;
-
- zoned_time(TimeZonePtr z, const sys_time<Duration>& st);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible
- <
- decltype(std::declval<T&>()->to_sys(local_time<Duration>{})),
- sys_time<duration>
- >::value
- >::type>
-#endif
- zoned_time(TimeZonePtr z, const local_time<Duration>& tp);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible
- <
- decltype(std::declval<T&>()->to_sys(local_time<Duration>{},
- choose::earliest)),
- sys_time<duration>
- >::value
- >::type>
-#endif
- zoned_time(TimeZonePtr z, const local_time<Duration>& tp, choose c);
-
- template <class Duration2, class TimeZonePtr2,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value
- >::type>
- zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt);
-
- template <class Duration2, class TimeZonePtr2,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value
- >::type>
- zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt, choose);
-
-#if HAS_STRING_VIEW
-
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view())),
- sys_time<Duration>
- >::value
- >::type>
- zoned_time(std::string_view name, detail::nodeduct_t<const sys_time<Duration>&> st);
-
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view())),
- local_time<Duration>
- >::value
- >::type>
- zoned_time(std::string_view name, detail::nodeduct_t<const local_time<Duration>&> tp);
-
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view())),
- local_time<Duration>,
- choose
- >::value
- >::type>
- zoned_time(std::string_view name, detail::nodeduct_t<const local_time<Duration>&> tp, choose c);
-
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view())),
- zoned_time
- >::value
- >::type>
- zoned_time(std::string_view name, const zoned_time<Duration2, TimeZonePtr2>& zt);
-
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string_view())),
- zoned_time,
- choose
- >::value
- >::type>
- zoned_time(std::string_view name, const zoned_time<Duration2, TimeZonePtr2>& zt, choose);
-
-#else // !HAS_STRING_VIEW
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- sys_time<Duration>
- >::value
- >::type>
-#endif
- zoned_time(const std::string& name, const sys_time<Duration>& st);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- sys_time<Duration>
- >::value
- >::type>
-#endif
- zoned_time(const char* name, const sys_time<Duration>& st);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- local_time<Duration>
- >::value
- >::type>
-#endif
- zoned_time(const std::string& name, const local_time<Duration>& tp);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- local_time<Duration>
- >::value
- >::type>
-#endif
- zoned_time(const char* name, const local_time<Duration>& tp);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- local_time<Duration>,
- choose
- >::value
- >::type>
-#endif
- zoned_time(const std::string& name, const local_time<Duration>& tp, choose c);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- local_time<Duration>,
- choose
- >::value
- >::type>
-#endif
- zoned_time(const char* name, const local_time<Duration>& tp, choose c);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- zoned_time
- >::value
- >::type>
-#else
- template <class Duration2, class TimeZonePtr2>
-#endif
- zoned_time(const std::string& name, const zoned_time<Duration2, TimeZonePtr2>& zt);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- zoned_time
- >::value
- >::type>
-#else
- template <class Duration2, class TimeZonePtr2>
-#endif
- zoned_time(const char* name, const zoned_time<Duration2, TimeZonePtr2>& zt);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- zoned_time,
- choose
- >::value
- >::type>
-#else
- template <class Duration2, class TimeZonePtr2>
-#endif
- zoned_time(const std::string& name, const zoned_time<Duration2, TimeZonePtr2>& zt,
- choose);
-
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
- template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
- class = typename std::enable_if
- <
- std::is_convertible<sys_time<Duration2>,
- sys_time<Duration>>::value &&
- std::is_constructible
- <
- zoned_time,
- decltype(zoned_traits<T>::locate_zone(std::string())),
- zoned_time,
- choose
- >::value
- >::type>
-#else
- template <class Duration2, class TimeZonePtr2>
-#endif
- zoned_time(const char* name, const zoned_time<Duration2, TimeZonePtr2>& zt,
- choose);
-
-#endif // !HAS_STRING_VIEW
-
- zoned_time& operator=(const sys_time<Duration>& st);
- zoned_time& operator=(const local_time<Duration>& ut);
-
- explicit operator sys_time<duration>() const;
- explicit operator local_time<duration>() const;
-
- TimeZonePtr get_time_zone() const;
- local_time<duration> get_local_time() const;
- sys_time<duration> get_sys_time() const;
- sys_info get_info() const;
-
- template <class Duration1, class Duration2, class TimeZonePtr1>
- friend
- bool
- operator==(const zoned_time<Duration1, TimeZonePtr1>& x,
- const zoned_time<Duration2, TimeZonePtr1>& y);
-
- template <class CharT, class Traits, class Duration1, class TimeZonePtr1>
- friend
- std::basic_ostream<CharT, Traits>&
- operator<<(std::basic_ostream<CharT, Traits>& os,
- const zoned_time<Duration1, TimeZonePtr1>& t);
-
-private:
- template <class D, class T> friend class zoned_time;
-};
-
-using zoned_seconds = zoned_time<std::chrono::seconds>;
-
-#if HAS_DEDUCTION_GUIDES
-
-namespace detail
-{
- template<typename TimeZonePtrOrName>
- using time_zone_representation =
- std::conditional_t
- <
- std::is_convertible<TimeZonePtrOrName, std::string_view>::value,
- time_zone const*,
- std::remove_cv_t<std::remove_reference_t<TimeZonePtrOrName>>
- >;
-}
-
-zoned_time()
- -> zoned_time<std::chrono::seconds>;
-
-template <class Duration>
-zoned_time(sys_time<Duration>)
- -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
-
-template <class TimeZonePtrOrName>
-zoned_time(TimeZonePtrOrName&&)
- -> zoned_time<std::chrono::seconds, detail::time_zone_representation<TimeZonePtrOrName>>;
-
-template <class TimeZonePtrOrName, class Duration>
-zoned_time(TimeZonePtrOrName&&, sys_time<Duration>)
- -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
-
-template <class TimeZonePtrOrName, class Duration>
-zoned_time(TimeZonePtrOrName&&, local_time<Duration>, choose = choose::earliest)
- -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
-
-template <class Duration, class TimeZonePtrOrName, class TimeZonePtr2>
-zoned_time(TimeZonePtrOrName&&, zoned_time<Duration, TimeZonePtr2>, choose = choose::earliest)
- -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
-
-#endif // HAS_DEDUCTION_GUIDES
-
-template <class Duration1, class Duration2, class TimeZonePtr>
-inline
-bool
-operator==(const zoned_time<Duration1, TimeZonePtr>& x,
- const zoned_time<Duration2, TimeZonePtr>& y)
-{
- return x.zone_ == y.zone_ && x.tp_ == y.tp_;
-}
-
-template <class Duration1, class Duration2, class TimeZonePtr>
-inline
-bool
-operator!=(const zoned_time<Duration1, TimeZonePtr>& x,
- const zoned_time<Duration2, TimeZonePtr>& y)
-{
- return !(x == y);
-}
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-namespace detail
-{
-# if USE_OS_TZDB
- struct transition;
- struct expanded_ttinfo;
-# else // !USE_OS_TZDB
- struct zonelet;
- class Rule;
-# endif // !USE_OS_TZDB
-}
-
-#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-class time_zone
-{
-private:
- std::string name_;
-#if USE_OS_TZDB
- std::vector<detail::transition> transitions_;
- std::vector<detail::expanded_ttinfo> ttinfos_;
-#else // !USE_OS_TZDB
- std::vector<detail::zonelet> zonelets_;
-#endif // !USE_OS_TZDB
- std::unique_ptr<std::once_flag> adjusted_;
-
-public:
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- time_zone(time_zone&&) = default;
- time_zone& operator=(time_zone&&) = default;
-#else // defined(_MSC_VER) && (_MSC_VER < 1900)
- time_zone(time_zone&& src);
- time_zone& operator=(time_zone&& src);
-#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
-
- DATE_API explicit time_zone(const std::string& s, detail::undocumented);
-
- const std::string& name() const NOEXCEPT;
-
- template <class Duration> sys_info get_info(sys_time<Duration> st) const;
- template <class Duration> local_info get_info(local_time<Duration> tp) const;
-
- template <class Duration>
- sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- to_sys(local_time<Duration> tp) const;
-
- template <class Duration>
- sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- to_sys(local_time<Duration> tp, choose z) const;
-
- template <class Duration>
- local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- to_local(sys_time<Duration> tp) const;
-
- friend bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT;
- friend bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT;
- friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone& z);
-
-#if !USE_OS_TZDB
- DATE_API void add(const std::string& s);
-#endif // !USE_OS_TZDB
-
-private:
- DATE_API sys_info get_info_impl(sys_seconds tp) const;
- DATE_API local_info get_info_impl(local_seconds tp) const;
-
- template <class Duration>
- sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const;
- template <class Duration>
- sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- to_sys_impl(local_time<Duration> tp, choose, std::true_type) const;
-
-#if USE_OS_TZDB
- DATE_API void init() const;
- DATE_API void init_impl();
- DATE_API sys_info
- load_sys_info(std::vector<detail::transition>::const_iterator i) const;
-
- template <class TimeType>
- DATE_API void
- load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
- std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
-#else // !USE_OS_TZDB
- DATE_API sys_info get_info_impl(sys_seconds tp, int timezone) const;
- DATE_API void adjust_infos(const std::vector<detail::Rule>& rules);
- DATE_API void parse_info(std::istream& in);
-#endif // !USE_OS_TZDB
-};
-
-#if defined(_MSC_VER) && (_MSC_VER < 1900)
-
-inline
-time_zone::time_zone(time_zone&& src)
- : name_(std::move(src.name_))
- , zonelets_(std::move(src.zonelets_))
- , adjusted_(std::move(src.adjusted_))
- {}
-
-inline
-time_zone&
-time_zone::operator=(time_zone&& src)
-{
- name_ = std::move(src.name_);
- zonelets_ = std::move(src.zonelets_);
- adjusted_ = std::move(src.adjusted_);
- return *this;
-}
-
-#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
-
-inline
-const std::string&
-time_zone::name() const NOEXCEPT
-{
- return name_;
-}
-
-template <class Duration>
-inline
-sys_info
-time_zone::get_info(sys_time<Duration> st) const
-{
- return get_info_impl(date::floor<std::chrono::seconds>(st));
-}
-
-template <class Duration>
-inline
-local_info
-time_zone::get_info(local_time<Duration> tp) const
-{
- return get_info_impl(date::floor<std::chrono::seconds>(tp));
-}
-
-template <class Duration>
-inline
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-time_zone::to_sys(local_time<Duration> tp) const
-{
- return to_sys_impl(tp, choose{}, std::true_type{});
-}
-
-template <class Duration>
-inline
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-time_zone::to_sys(local_time<Duration> tp, choose z) const
-{
- return to_sys_impl(tp, z, std::false_type{});
-}
-
-template <class Duration>
-inline
-local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-time_zone::to_local(sys_time<Duration> tp) const
-{
- using LT = local_time<typename std::common_type<Duration, std::chrono::seconds>::type>;
- auto i = get_info(tp);
- return LT{(tp + i.offset).time_since_epoch()};
-}
-
-inline bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ == y.name_;}
-inline bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ < y.name_;}
-
-inline bool operator!=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x == y);}
-inline bool operator> (const time_zone& x, const time_zone& y) NOEXCEPT {return y < x;}
-inline bool operator<=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(y < x);}
-inline bool operator>=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x < y);}
-
-template <class Duration>
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-time_zone::to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const
-{
- auto i = get_info(tp);
- if (i.result == local_info::nonexistent)
- {
- return i.first.end;
- }
- else if (i.result == local_info::ambiguous)
- {
- if (z == choose::latest)
- return sys_time<Duration>{tp.time_since_epoch()} - i.second.offset;
- }
- return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
-}
-
-template <class Duration>
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-time_zone::to_sys_impl(local_time<Duration> tp, choose, std::true_type) const
-{
- auto i = get_info(tp);
- if (i.result == local_info::nonexistent)
- throw nonexistent_local_time(tp, i);
- else if (i.result == local_info::ambiguous)
- throw ambiguous_local_time(tp, i);
- return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
-}
-
-#if !USE_OS_TZDB
-
-class time_zone_link
-{
-private:
- std::string name_;
- std::string target_;
-public:
- DATE_API explicit time_zone_link(const std::string& s);
-
- const std::string& name() const {return name_;}
- const std::string& target() const {return target_;}
-
- friend bool operator==(const time_zone_link& x, const time_zone_link& y) {return x.name_ == y.name_;}
- friend bool operator< (const time_zone_link& x, const time_zone_link& y) {return x.name_ < y.name_;}
-
- friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone_link& x);
-};
-
-using link = time_zone_link;
-
-inline bool operator!=(const time_zone_link& x, const time_zone_link& y) {return !(x == y);}
-inline bool operator> (const time_zone_link& x, const time_zone_link& y) {return y < x;}
-inline bool operator<=(const time_zone_link& x, const time_zone_link& y) {return !(y < x);}
-inline bool operator>=(const time_zone_link& x, const time_zone_link& y) {return !(x < y);}
-
-#endif // !USE_OS_TZDB
-
-#if !MISSING_LEAP_SECONDS
-
-class leap_second
-{
-private:
- sys_seconds date_;
-
-public:
-#if USE_OS_TZDB
- DATE_API explicit leap_second(const sys_seconds& s, detail::undocumented);
-#else
- DATE_API explicit leap_second(const std::string& s, detail::undocumented);
-#endif
-
- sys_seconds date() const {return date_;}
-
- friend bool operator==(const leap_second& x, const leap_second& y) {return x.date_ == y.date_;}
- friend bool operator< (const leap_second& x, const leap_second& y) {return x.date_ < y.date_;}
-
- template <class Duration>
- friend
- bool
- operator==(const leap_second& x, const sys_time<Duration>& y)
- {
- return x.date_ == y;
- }
-
- template <class Duration>
- friend
- bool
- operator< (const leap_second& x, const sys_time<Duration>& y)
- {
- return x.date_ < y;
- }
-
- template <class Duration>
- friend
- bool
- operator< (const sys_time<Duration>& x, const leap_second& y)
- {
- return x < y.date_;
- }
-
- friend DATE_API std::ostream& operator<<(std::ostream& os, const leap_second& x);
-};
-
-inline bool operator!=(const leap_second& x, const leap_second& y) {return !(x == y);}
-inline bool operator> (const leap_second& x, const leap_second& y) {return y < x;}
-inline bool operator<=(const leap_second& x, const leap_second& y) {return !(y < x);}
-inline bool operator>=(const leap_second& x, const leap_second& y) {return !(x < y);}
-
-template <class Duration>
-inline
-bool
-operator==(const sys_time<Duration>& x, const leap_second& y)
-{
- return y == x;
-}
-
-template <class Duration>
-inline
-bool
-operator!=(const leap_second& x, const sys_time<Duration>& y)
-{
- return !(x == y);
-}
-
-template <class Duration>
-inline
-bool
-operator!=(const sys_time<Duration>& x, const leap_second& y)
-{
- return !(x == y);
-}
-
-template <class Duration>
-inline
-bool
-operator> (const leap_second& x, const sys_time<Duration>& y)
-{
- return y < x;
-}
-
-template <class Duration>
-inline
-bool
-operator> (const sys_time<Duration>& x, const leap_second& y)
-{
- return y < x;
-}
-
-template <class Duration>
-inline
-bool
-operator<=(const leap_second& x, const sys_time<Duration>& y)
-{
- return !(y < x);
-}
-
-template <class Duration>
-inline
-bool
-operator<=(const sys_time<Duration>& x, const leap_second& y)
-{
- return !(y < x);
-}
-
-template <class Duration>
-inline
-bool
-operator>=(const leap_second& x, const sys_time<Duration>& y)
-{
- return !(x < y);
-}
-
-template <class Duration>
-inline
-bool
-operator>=(const sys_time<Duration>& x, const leap_second& y)
-{
- return !(x < y);
-}
-
-using leap = leap_second;
-
-#endif // !MISSING_LEAP_SECONDS
-
-#ifdef _WIN32
-
-namespace detail
-{
-
-// The time zone mapping is modelled after this data file:
-// http://unicode.org/repos/cldr/trunk/common/supplemental/windowsZones.xml
-// and the field names match the element names from the mapZone element
-// of windowsZones.xml.
-// The website displays this file here:
-// http://www.unicode.org/cldr/charts/latest/supplemental/zone_tzid.html
-// The html view is sorted before being displayed but is otherwise the same
-// There is a mapping between the os centric view (in this case windows)
-// the html displays uses and the generic view the xml file.
-// That mapping is this:
-// display column "windows" -> xml field "other".
-// display column "region" -> xml field "territory".
-// display column "tzid" -> xml field "type".
-// This structure uses the generic terminology because it could be
-// used to to support other os/native name conversions, not just windows,
-// and using the same generic names helps retain the connection to the
-// origin of the data that we are using.
-struct timezone_mapping
-{
- timezone_mapping(const char* other, const char* territory, const char* type)
- : other(other), territory(territory), type(type)
- {
- }
- timezone_mapping() = default;
- std::string other;
- std::string territory;
- std::string type;
-};
-
-} // detail
-
-#endif // _WIN32
-
-struct tzdb
-{
- std::string version = "unknown";
- std::vector<time_zone> zones;
-#if !USE_OS_TZDB
- std::vector<time_zone_link> links;
-#endif
-#if !MISSING_LEAP_SECONDS
- std::vector<leap_second> leap_seconds;
-#endif
-#if !USE_OS_TZDB
- std::vector<detail::Rule> rules;
-#endif
-#ifdef _WIN32
- std::vector<detail::timezone_mapping> mappings;
-#endif
- tzdb* next = nullptr;
-
- tzdb() = default;
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- tzdb(tzdb&&) = default;
- tzdb& operator=(tzdb&&) = default;
-#else // defined(_MSC_VER) && (_MSC_VER < 1900)
- tzdb(tzdb&& src)
- : version(std::move(src.version))
- , zones(std::move(src.zones))
- , links(std::move(src.links))
- , leap_seconds(std::move(src.leap_seconds))
- , rules(std::move(src.rules))
- , mappings(std::move(src.mappings))
- {}
-
- tzdb& operator=(tzdb&& src)
- {
- version = std::move(src.version);
- zones = std::move(src.zones);
- links = std::move(src.links);
- leap_seconds = std::move(src.leap_seconds);
- rules = std::move(src.rules);
- mappings = std::move(src.mappings);
- return *this;
- }
-#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
-
-#if HAS_STRING_VIEW
- const time_zone* locate_zone(std::string_view tz_name) const;
-#else
- const time_zone* locate_zone(const std::string& tz_name) const;
-#endif
- const time_zone* current_zone() const;
-};
-
-using TZ_DB = tzdb;
-
-DATE_API std::ostream&
-operator<<(std::ostream& os, const tzdb& db);
-
-DATE_API const tzdb& get_tzdb();
-
-class tzdb_list
-{
- std::atomic<tzdb*> head_{nullptr};
-
-public:
- ~tzdb_list();
- tzdb_list() = default;
- tzdb_list(tzdb_list&& x) noexcept;
-
- const tzdb& front() const noexcept {return *head_;}
- tzdb& front() noexcept {return *head_;}
-
- class const_iterator;
-
- const_iterator begin() const noexcept;
- const_iterator end() const noexcept;
-
- const_iterator cbegin() const noexcept;
- const_iterator cend() const noexcept;
-
- const_iterator erase_after(const_iterator p) noexcept;
-
- struct undocumented_helper;
-private:
- void push_front(tzdb* tzdb) noexcept;
-};
-
-class tzdb_list::const_iterator
-{
- tzdb* p_ = nullptr;
-
- explicit const_iterator(tzdb* p) noexcept : p_{p} {}
-public:
- const_iterator() = default;
-
- using iterator_category = std::forward_iterator_tag;
- using value_type = tzdb;
- using reference = const value_type&;
- using pointer = const value_type*;
- using difference_type = std::ptrdiff_t;
-
- reference operator*() const noexcept {return *p_;}
- pointer operator->() const noexcept {return p_;}
-
- const_iterator& operator++() noexcept {p_ = p_->next; return *this;}
- const_iterator operator++(int) noexcept {auto t = *this; ++(*this); return t;}
-
- friend
- bool
- operator==(const const_iterator& x, const const_iterator& y) noexcept
- {return x.p_ == y.p_;}
-
- friend
- bool
- operator!=(const const_iterator& x, const const_iterator& y) noexcept
- {return !(x == y);}
-
- friend class tzdb_list;
-};
-
-inline
-tzdb_list::const_iterator
-tzdb_list::begin() const noexcept
-{
- return const_iterator{head_};
-}
-
-inline
-tzdb_list::const_iterator
-tzdb_list::end() const noexcept
-{
- return const_iterator{nullptr};
-}
-
-inline
-tzdb_list::const_iterator
-tzdb_list::cbegin() const noexcept
-{
- return begin();
-}
-
-inline
-tzdb_list::const_iterator
-tzdb_list::cend() const noexcept
-{
- return end();
-}
-
-DATE_API tzdb_list& get_tzdb_list();
-
-#if !USE_OS_TZDB
-
-DATE_API const tzdb& reload_tzdb();
-DATE_API void set_install(const std::string& install);
-
-#endif // !USE_OS_TZDB
-
-#if HAS_REMOTE_API
-
-DATE_API std::string remote_version();
-// if provided error_buffer size should be at least CURL_ERROR_SIZE
-DATE_API bool remote_download(const std::string& version, char* error_buffer = nullptr);
-DATE_API bool remote_install(const std::string& version);
-
-#endif
-
-// zoned_time
-
-namespace detail
-{
-
-template <class T>
-inline
-T*
-to_raw_pointer(T* p) noexcept
-{
- return p;
-}
-
-template <class Pointer>
-inline
-auto
-to_raw_pointer(Pointer p) noexcept
- -> decltype(detail::to_raw_pointer(p.operator->()))
-{
- return detail::to_raw_pointer(p.operator->());
-}
-
-} // namespace detail
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time()
- : zone_(zoned_traits<TimeZonePtr>::default_zone())
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const sys_time<Duration>& st)
- : zone_(zoned_traits<TimeZonePtr>::default_zone())
- , tp_(st)
- {}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z)
- : zone_(std::move(z))
- {assert(detail::to_raw_pointer(zone_) != nullptr);}
-
-#if HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-template <class T, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
- {}
-
-#else // !HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
- {}
-
-#endif // !HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-template <class Duration2, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT
- : zone_(zt.zone_)
- , tp_(zt.tp_)
- {}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const sys_time<Duration>& st)
- : zone_(std::move(z))
- , tp_(st)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t)
- : zone_(std::move(z))
- , tp_(zone_->to_sys(t))
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t,
- choose c)
- : zone_(std::move(z))
- , tp_(zone_->to_sys(t, c))
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class Duration2, class TimeZonePtr2, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
- const zoned_time<Duration2, TimeZonePtr2>& zt)
- : zone_(std::move(z))
- , tp_(zt.tp_)
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class Duration2, class TimeZonePtr2, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
- const zoned_time<Duration2, TimeZonePtr2>& zt, choose)
- : zoned_time(std::move(z), zt)
- {}
-
-#if HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-template <class T, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
- detail::nodeduct_t<const sys_time<Duration>&> st)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class T, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
- detail::nodeduct_t<const local_time<Duration>&> t)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class T, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
- detail::nodeduct_t<const local_time<Duration>&> t, choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class Duration2, class TimeZonePtr2, class, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
- const zoned_time<Duration2, TimeZonePtr2>& zt)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
- {}
-
-template <class Duration, class TimeZonePtr>
-template <class Duration2, class TimeZonePtr2, class, class>
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
- const zoned_time<Duration2, TimeZonePtr2>& zt,
- choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
- {}
-
-#else // !HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
- const sys_time<Duration>& st)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
- const sys_time<Duration>& st)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
- const local_time<Duration>& t)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
- const local_time<Duration>& t)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
- const local_time<Duration>& t, choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class T, class>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
- const local_time<Duration>& t, choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class Duration2, class TimeZonePtr2, class, class>
-#else
-template <class Duration2, class TimeZonePtr2>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
- const zoned_time<Duration2, TimeZonePtr2>& zt)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class Duration2, class TimeZonePtr2, class, class>
-#else
-template <class Duration2, class TimeZonePtr2>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
- const zoned_time<Duration2, TimeZonePtr2>& zt)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class Duration2, class TimeZonePtr2, class, class>
-#else
-template <class Duration2, class TimeZonePtr2>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
- const zoned_time<Duration2, TimeZonePtr2>& zt,
- choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
- {}
-
-template <class Duration, class TimeZonePtr>
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-template <class Duration2, class TimeZonePtr2, class, class>
-#else
-template <class Duration2, class TimeZonePtr2>
-#endif
-inline
-zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
- const zoned_time<Duration2, TimeZonePtr2>& zt,
- choose c)
- : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
- {}
-
-#endif // HAS_STRING_VIEW
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>&
-zoned_time<Duration, TimeZonePtr>::operator=(const sys_time<Duration>& st)
-{
- tp_ = st;
- return *this;
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>&
-zoned_time<Duration, TimeZonePtr>::operator=(const local_time<Duration>& ut)
-{
- tp_ = zone_->to_sys(ut);
- return *this;
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>::operator local_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
-{
- return get_local_time();
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>::operator sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
-{
- return get_sys_time();
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-TimeZonePtr
-zoned_time<Duration, TimeZonePtr>::get_time_zone() const
-{
- return zone_;
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-local_time<typename zoned_time<Duration, TimeZonePtr>::duration>
-zoned_time<Duration, TimeZonePtr>::get_local_time() const
-{
- return zone_->to_local(tp_);
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>
-zoned_time<Duration, TimeZonePtr>::get_sys_time() const
-{
- return tp_;
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-sys_info
-zoned_time<Duration, TimeZonePtr>::get_info() const
-{
- return zone_->get_info(tp_);
-}
-
-// make_zoned_time
-
-inline
-zoned_time<std::chrono::seconds>
-make_zoned()
-{
- return zoned_time<std::chrono::seconds>();
-}
-
-template <class Duration>
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-make_zoned(const sys_time<Duration>& tp)
-{
- return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>(tp);
-}
-
-template <class TimeZonePtr
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
- , class = typename std::enable_if
- <
- std::is_class
- <
- typename std::decay
- <
- decltype(*detail::to_raw_pointer(std::declval<TimeZonePtr&>()))
- >::type
- >{}
- >::type
-#endif
-#endif
- >
-inline
-zoned_time<std::chrono::seconds, TimeZonePtr>
-make_zoned(TimeZonePtr z)
-{
- return zoned_time<std::chrono::seconds, TimeZonePtr>(std::move(z));
-}
-
-inline
-zoned_seconds
-make_zoned(const std::string& name)
-{
- return zoned_seconds(name);
-}
-
-template <class Duration, class TimeZonePtr
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
- , class = typename std::enable_if
- <
- std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
- >::type
-#endif
-#endif
- >
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
-make_zoned(TimeZonePtr zone, const local_time<Duration>& tp)
-{
- return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
- TimeZonePtr>(std::move(zone), tp);
-}
-
-template <class Duration, class TimeZonePtr
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
- , class = typename std::enable_if
- <
- std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
- >::type
-#endif
-#endif
- >
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
-make_zoned(TimeZonePtr zone, const local_time<Duration>& tp, choose c)
-{
- return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
- TimeZonePtr>(std::move(zone), tp, c);
-}
-
-template <class Duration>
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-make_zoned(const std::string& name, const local_time<Duration>& tp)
-{
- return zoned_time<typename std::common_type<Duration,
- std::chrono::seconds>::type>(name, tp);
-}
-
-template <class Duration>
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-make_zoned(const std::string& name, const local_time<Duration>& tp, choose c)
-{
- return zoned_time<typename std::common_type<Duration,
- std::chrono::seconds>::type>(name, tp, c);
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>
-make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt)
-{
- return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt);
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>
-make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt)
-{
- return zoned_time<Duration, TimeZonePtr>(name, zt);
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>
-make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
-{
- return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt, c);
-}
-
-template <class Duration, class TimeZonePtr>
-inline
-zoned_time<Duration, TimeZonePtr>
-make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
-{
- return zoned_time<Duration, TimeZonePtr>(name, zt, c);
-}
-
-template <class Duration, class TimeZonePtr
-#if !defined(_MSC_VER) || (_MSC_VER > 1916)
-#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
- , class = typename std::enable_if
- <
- std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
- >::type
-#endif
-#endif
- >
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
-make_zoned(TimeZonePtr zone, const sys_time<Duration>& st)
-{
- return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
- TimeZonePtr>(std::move(zone), st);
-}
-
-template <class Duration>
-inline
-zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-make_zoned(const std::string& name, const sys_time<Duration>& st)
-{
- return zoned_time<typename std::common_type<Duration,
- std::chrono::seconds>::type>(name, st);
-}
-
-template <class CharT, class Traits, class Duration, class TimeZonePtr>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const zoned_time<Duration, TimeZonePtr>& tp)
-{
- using duration = typename zoned_time<Duration, TimeZonePtr>::duration;
- using LT = local_time<duration>;
- auto const st = tp.get_sys_time();
- auto const info = tp.get_time_zone()->get_info(st);
- return to_stream(os, fmt, LT{(st+info.offset).time_since_epoch()},
- &info.abbrev, &info.offset);
-}
-
-template <class CharT, class Traits, class Duration, class TimeZonePtr>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const zoned_time<Duration, TimeZonePtr>& t)
-{
- const CharT fmt[] = {'%', 'F', ' ', '%', 'T', ' ', '%', 'Z', CharT{}};
- return to_stream(os, fmt, t);
-}
-
-#if !MISSING_LEAP_SECONDS
-
-class utc_clock
-{
-public:
- using duration = std::chrono::system_clock::duration;
- using rep = duration::rep;
- using period = duration::period;
- using time_point = std::chrono::time_point<utc_clock>;
- static CONSTDATA bool is_steady = false;
-
- static time_point now();
-
- template<typename Duration>
- static
- std::chrono::time_point<std::chrono::system_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- to_sys(const std::chrono::time_point<utc_clock, Duration>&);
-
- template<typename Duration>
- static
- std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- from_sys(const std::chrono::time_point<std::chrono::system_clock, Duration>&);
-
- template<typename Duration>
- static
- std::chrono::time_point<local_t, typename std::common_type<Duration, std::chrono::seconds>::type>
- to_local(const std::chrono::time_point<utc_clock, Duration>&);
-
- template<typename Duration>
- static
- std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- from_local(const std::chrono::time_point<local_t, Duration>&);
-};
-
-template <class Duration>
- using utc_time = std::chrono::time_point<utc_clock, Duration>;
-
-using utc_seconds = utc_time<std::chrono::seconds>;
-
-template <class Duration>
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-utc_clock::from_sys(const sys_time<Duration>& st)
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- auto const& leaps = get_tzdb().leap_seconds;
- auto const lt = std::upper_bound(leaps.begin(), leaps.end(), st);
- return utc_time<CD>{st.time_since_epoch() + seconds{lt-leaps.begin()}};
-}
-
-// Return pair<is_leap_second, seconds{number_of_leap_seconds_since_1970}>
-// first is true if ut is during a leap second insertion, otherwise false.
-// If ut is during a leap second insertion, that leap second is included in the count
-template <class Duration>
-std::pair<bool, std::chrono::seconds>
-is_leap_second(date::utc_time<Duration> const& ut)
-{
- using std::chrono::seconds;
- using duration = typename std::common_type<Duration, seconds>::type;
- auto const& leaps = get_tzdb().leap_seconds;
- auto tp = sys_time<duration>{ut.time_since_epoch()};
- auto const lt = std::upper_bound(leaps.begin(), leaps.end(), tp);
- auto ds = seconds{lt-leaps.begin()};
- tp -= ds;
- auto ls = false;
- if (lt > leaps.begin())
- {
- if (tp < lt[-1])
- {
- if (tp >= lt[-1].date() - seconds{1})
- ls = true;
- else
- --ds;
- }
- }
- return {ls, ds};
-}
-
-struct leap_second_info
-{
- bool is_leap_second;
- std::chrono::seconds elapsed;
-};
-
-template <class Duration>
-leap_second_info
-get_leap_second_info(date::utc_time<Duration> const& ut)
-{
- auto p = is_leap_second(ut);
- return {p.first, p.second};
-}
-
-template <class Duration>
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-utc_clock::to_sys(const utc_time<Duration>& ut)
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- auto ls = is_leap_second(ut);
- auto tp = sys_time<CD>{ut.time_since_epoch() - ls.second};
- if (ls.first)
- tp = floor<seconds>(tp) + seconds{1} - CD{1};
- return tp;
-}
-
-inline
-utc_clock::time_point
-utc_clock::now()
-{
- return from_sys(std::chrono::system_clock::now());
-}
-
-template <class Duration>
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-utc_clock::from_local(const local_time<Duration>& st)
-{
- return from_sys(sys_time<Duration>{st.time_since_epoch()});
-}
-
-template <class Duration>
-local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-utc_clock::to_local(const utc_time<Duration>& ut)
-{
- using CD = typename std::common_type<Duration, std::chrono::seconds>::type;
- return local_time<CD>{to_sys(ut).time_since_epoch()};
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const utc_time<Duration>& t)
-{
- using std::chrono::seconds;
- using CT = typename std::common_type<Duration, seconds>::type;
- const std::string abbrev("UTC");
- CONSTDATA seconds offset{0};
- auto ls = is_leap_second(t);
- auto tp = sys_time<CT>{t.time_since_epoch() - ls.second};
- auto const sd = floor<days>(tp);
- year_month_day ymd = sd;
- auto time = make_time(tp - sys_seconds{sd});
- time.seconds(detail::undocumented{}) += seconds{ls.first};
- fields<CT> fds{ymd, time};
- return to_stream(os, fmt, fds, &abbrev, &offset);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const utc_time<Duration>& t)
-{
- const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
- return to_stream(os, fmt, t);
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- utc_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- using std::chrono::seconds;
- using std::chrono::minutes;
- using CT = typename std::common_type<Duration, seconds>::type;
- minutes offset_local{};
- auto offptr = offset ? offset : &offset_local;
- fields<CT> fds{};
- fds.has_tod = true;
- from_stream(is, fmt, fds, abbrev, offptr);
- if (!fds.ymd.ok())
- is.setstate(std::ios::failbit);
- if (!is.fail())
- {
- bool is_60_sec = fds.tod.seconds() == seconds{60};
- if (is_60_sec)
- fds.tod.seconds(detail::undocumented{}) -= seconds{1};
- auto tmp = utc_clock::from_sys(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
- if (is_60_sec)
- tmp += seconds{1};
- if (is_60_sec != is_leap_second(tmp).first || !fds.tod.in_conventional_range())
- {
- is.setstate(std::ios::failbit);
- return is;
- }
- tp = std::chrono::time_point_cast<Duration>(tmp);
- }
- return is;
-}
-
-// tai_clock
-
-class tai_clock
-{
-public:
- using duration = std::chrono::system_clock::duration;
- using rep = duration::rep;
- using period = duration::period;
- using time_point = std::chrono::time_point<tai_clock>;
- static const bool is_steady = false;
-
- static time_point now();
-
- template<typename Duration>
- static
- std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- to_utc(const std::chrono::time_point<tai_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<tai_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<local_t, typename std::common_type<Duration, date::days>::type>
- to_local(const std::chrono::time_point<tai_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<tai_clock, typename std::common_type<Duration, date::days>::type>
- from_local(const std::chrono::time_point<local_t, Duration>&) NOEXCEPT;
-};
-
-template <class Duration>
- using tai_time = std::chrono::time_point<tai_clock, Duration>;
-
-using tai_seconds = tai_time<std::chrono::seconds>;
-
-template <class Duration>
-inline
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-tai_clock::to_utc(const tai_time<Duration>& t) NOEXCEPT
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- return utc_time<CD>{t.time_since_epoch()} -
- (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10});
-}
-
-template <class Duration>
-inline
-tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-tai_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- return tai_time<CD>{t.time_since_epoch()} +
- (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10});
-}
-
-inline
-tai_clock::time_point
-tai_clock::now()
-{
- return from_utc(utc_clock::now());
-}
-
-template <class Duration>
-inline
-local_time<typename std::common_type<Duration, date::days>::type>
-tai_clock::to_local(const tai_time<Duration>& t) NOEXCEPT
-{
- using CD = typename std::common_type<Duration, date::days>::type;
- return local_time<CD>{t.time_since_epoch()} -
- (local_days(year{1970}/January/1) - local_days(year{1958}/January/1));
-}
-
-template <class Duration>
-inline
-tai_time<typename std::common_type<Duration, date::days>::type>
-tai_clock::from_local(const local_time<Duration>& t) NOEXCEPT
-{
- using CD = typename std::common_type<Duration, date::days>::type;
- return tai_time<CD>{t.time_since_epoch()} +
- (local_days(year{1970}/January/1) - local_days(year{1958}/January/1));
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const tai_time<Duration>& t)
-{
- const std::string abbrev("TAI");
- CONSTDATA std::chrono::seconds offset{0};
- return to_stream(os, fmt, tai_clock::to_local(t), &abbrev, &offset);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const tai_time<Duration>& t)
-{
- const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
- return to_stream(os, fmt, t);
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- tai_time<Duration>& tp,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- local_time<Duration> lp;
- from_stream(is, fmt, lp, abbrev, offset);
- if (!is.fail())
- tp = tai_clock::from_local(lp);
- return is;
-}
-
-// gps_clock
-
-class gps_clock
-{
-public:
- using duration = std::chrono::system_clock::duration;
- using rep = duration::rep;
- using period = duration::period;
- using time_point = std::chrono::time_point<gps_clock>;
- static const bool is_steady = false;
-
- static time_point now();
-
- template<typename Duration>
- static
- std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- to_utc(const std::chrono::time_point<gps_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<gps_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
- from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<local_t, typename std::common_type<Duration, date::days>::type>
- to_local(const std::chrono::time_point<gps_clock, Duration>&) NOEXCEPT;
-
- template<typename Duration>
- static
- std::chrono::time_point<gps_clock, typename std::common_type<Duration, date::days>::type>
- from_local(const std::chrono::time_point<local_t, Duration>&) NOEXCEPT;
-};
-
-template <class Duration>
- using gps_time = std::chrono::time_point<gps_clock, Duration>;
-
-using gps_seconds = gps_time<std::chrono::seconds>;
-
-template <class Duration>
-inline
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-gps_clock::to_utc(const gps_time<Duration>& t) NOEXCEPT
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- return utc_time<CD>{t.time_since_epoch()} +
- (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) +
- seconds{9});
-}
-
-template <class Duration>
-inline
-gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-gps_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
-{
- using std::chrono::seconds;
- using CD = typename std::common_type<Duration, seconds>::type;
- return gps_time<CD>{t.time_since_epoch()} -
- (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) +
- seconds{9});
-}
-
-inline
-gps_clock::time_point
-gps_clock::now()
-{
- return from_utc(utc_clock::now());
-}
-
-template <class Duration>
-inline
-local_time<typename std::common_type<Duration, date::days>::type>
-gps_clock::to_local(const gps_time<Duration>& t) NOEXCEPT
-{
- using CD = typename std::common_type<Duration, date::days>::type;
- return local_time<CD>{t.time_since_epoch()} +
- (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1));
-}
-
-template <class Duration>
-inline
-gps_time<typename std::common_type<Duration, date::days>::type>
-gps_clock::from_local(const local_time<Duration>& t) NOEXCEPT
-{
- using CD = typename std::common_type<Duration, date::days>::type;
- return gps_time<CD>{t.time_since_epoch()} -
- (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1));
-}
-
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
- const gps_time<Duration>& t)
-{
- const std::string abbrev("GPS");
- CONSTDATA std::chrono::seconds offset{0};
- return to_stream(os, fmt, gps_clock::to_local(t), &abbrev, &offset);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const gps_time<Duration>& t)
-{
- const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
- return to_stream(os, fmt, t);
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
- gps_time<Duration>& tp,
- std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
- std::chrono::minutes* offset = nullptr)
-{
- local_time<Duration> lp;
- from_stream(is, fmt, lp, abbrev, offset);
- if (!is.fail())
- tp = gps_clock::from_local(lp);
- return is;
-}
-
-// clock_time_conversion
-
-template <class DstClock, class SrcClock>
-struct clock_time_conversion
-{};
-
-template <>
-struct clock_time_conversion<std::chrono::system_clock, std::chrono::system_clock>
-{
- template <class Duration>
- CONSTCD14
- sys_time<Duration>
- operator()(const sys_time<Duration>& st) const
- {
- return st;
- }
-};
-
-template <>
-struct clock_time_conversion<utc_clock, utc_clock>
-{
- template <class Duration>
- CONSTCD14
- utc_time<Duration>
- operator()(const utc_time<Duration>& ut) const
- {
- return ut;
- }
-};
-
-template<>
-struct clock_time_conversion<local_t, local_t>
-{
- template <class Duration>
- CONSTCD14
- local_time<Duration>
- operator()(const local_time<Duration>& lt) const
- {
- return lt;
- }
-};
-
-template <>
-struct clock_time_conversion<utc_clock, std::chrono::system_clock>
-{
- template <class Duration>
- utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- operator()(const sys_time<Duration>& st) const
- {
- return utc_clock::from_sys(st);
- }
-};
-
-template <>
-struct clock_time_conversion<std::chrono::system_clock, utc_clock>
-{
- template <class Duration>
- sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- operator()(const utc_time<Duration>& ut) const
- {
- return utc_clock::to_sys(ut);
- }
-};
-
-template<>
-struct clock_time_conversion<local_t, std::chrono::system_clock>
-{
- template <class Duration>
- CONSTCD14
- local_time<Duration>
- operator()(const sys_time<Duration>& st) const
- {
- return local_time<Duration>{st.time_since_epoch()};
- }
-};
-
-template<>
-struct clock_time_conversion<std::chrono::system_clock, local_t>
-{
- template <class Duration>
- CONSTCD14
- sys_time<Duration>
- operator()(const local_time<Duration>& lt) const
- {
- return sys_time<Duration>{lt.time_since_epoch()};
- }
-};
-
-template<>
-struct clock_time_conversion<utc_clock, local_t>
-{
- template <class Duration>
- utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- operator()(const local_time<Duration>& lt) const
- {
- return utc_clock::from_local(lt);
- }
-};
-
-template<>
-struct clock_time_conversion<local_t, utc_clock>
-{
- template <class Duration>
- local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
- operator()(const utc_time<Duration>& ut) const
- {
- return utc_clock::to_local(ut);
- }
-};
-
-template<typename Clock>
-struct clock_time_conversion<Clock, Clock>
-{
- template <class Duration>
- CONSTCD14
- std::chrono::time_point<Clock, Duration>
- operator()(const std::chrono::time_point<Clock, Duration>& tp) const
- {
- return tp;
- }
-};
-
-namespace ctc_detail
-{
-
-template <class Clock, class Duration>
- using time_point = std::chrono::time_point<Clock, Duration>;
-
-using std::declval;
-using std::chrono::system_clock;
-
-//Check if TimePoint is time for given clock,
-//if not emits hard error
-template <class Clock, class TimePoint>
-struct return_clock_time
-{
- using clock_time_point = time_point<Clock, typename TimePoint::duration>;
- using type = TimePoint;
-
- static_assert(std::is_same<TimePoint, clock_time_point>::value,
- "time point with appropariate clock shall be returned");
-};
-
-// Check if Clock has to_sys method accepting TimePoint with given duration const& and
-// returning sys_time. If so has nested type member equal to return type to_sys.
-template <class Clock, class Duration, class = void>
-struct return_to_sys
-{};
-
-template <class Clock, class Duration>
-struct return_to_sys
- <
- Clock, Duration,
- decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()), void())
- >
- : return_clock_time
- <
- system_clock,
- decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()))
- >
-{};
-
-// Similiar to above
-template <class Clock, class Duration, class = void>
-struct return_from_sys
-{};
-
-template <class Clock, class Duration>
-struct return_from_sys
- <
- Clock, Duration,
- decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()),
- void())
- >
- : return_clock_time
- <
- Clock,
- decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()))
- >
-{};
-
-// Similiar to above
-template <class Clock, class Duration, class = void>
-struct return_to_utc
-{};
-
-template <class Clock, class Duration>
-struct return_to_utc
- <
- Clock, Duration,
- decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()), void())
- >
- : return_clock_time
- <
- utc_clock,
- decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()))>
-{};
-
-// Similiar to above
-template <class Clock, class Duration, class = void>
-struct return_from_utc
-{};
-
-template <class Clock, class Duration>
-struct return_from_utc
- <
- Clock, Duration,
- decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()),
- void())
- >
- : return_clock_time
- <
- Clock,
- decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()))
- >
-{};
-
-// Similiar to above
-template<typename Clock, typename Duration, typename = void>
-struct return_to_local
-{};
-
-template<typename Clock, typename Duration>
-struct return_to_local
- <
- Clock, Duration,
- decltype(Clock::to_local(declval<time_point<Clock, Duration> const&>()),
- void())
- >
- : return_clock_time
- <
- local_t,
- decltype(Clock::to_local(declval<time_point<Clock, Duration> const&>()))
- >
-{};
-
-// Similiar to above
-template<typename Clock, typename Duration, typename = void>
-struct return_from_local
-{};
-
-template<typename Clock, typename Duration>
-struct return_from_local
- <
- Clock, Duration,
- decltype(Clock::from_local(declval<time_point<local_t, Duration> const&>()),
- void())
- >
- : return_clock_time
- <
- Clock,
- decltype(Clock::from_local(declval<time_point<local_t, Duration> const&>()))
- >
-{};
-
-} // namespace ctc_detail
-
-template <class SrcClock>
-struct clock_time_conversion<std::chrono::system_clock, SrcClock>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_to_sys<SrcClock, Duration>::type
- operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
- {
- return SrcClock::to_sys(tp);
- }
-};
-
-template <class DstClock>
-struct clock_time_conversion<DstClock, std::chrono::system_clock>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_from_sys<DstClock, Duration>::type
- operator()(const sys_time<Duration>& st) const
- {
- return DstClock::from_sys(st);
- }
-};
-
-template <class SrcClock>
-struct clock_time_conversion<utc_clock, SrcClock>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_to_utc<SrcClock, Duration>::type
- operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
- {
- return SrcClock::to_utc(tp);
- }
-};
-
-template <class DstClock>
-struct clock_time_conversion<DstClock, utc_clock>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_from_utc<DstClock, Duration>::type
- operator()(const utc_time<Duration>& ut) const
- {
- return DstClock::from_utc(ut);
- }
-};
-
-template<typename SrcClock>
-struct clock_time_conversion<local_t, SrcClock>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_to_local<SrcClock, Duration>::type
- operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
- {
- return SrcClock::to_local(tp);
- }
-};
-
-template<typename DstClock>
-struct clock_time_conversion<DstClock, local_t>
-{
- template <class Duration>
- CONSTCD14
- typename ctc_detail::return_from_local<DstClock, Duration>::type
- operator()(const local_time<Duration>& lt) const
- {
- return DstClock::from_local(lt);
- }
-};
-
-namespace clock_cast_detail
-{
-
-template <class Clock, class Duration>
- using time_point = std::chrono::time_point<Clock, Duration>;
-using std::chrono::system_clock;
-
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-conv_clock(const time_point<SrcClock, Duration>& t)
- -> decltype(std::declval<clock_time_conversion<DstClock, SrcClock>>()(t))
-{
- return clock_time_conversion<DstClock, SrcClock>{}(t);
-}
-
-//direct trait conversion, 1st candidate
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-cc_impl(const time_point<SrcClock, Duration>& t, const time_point<SrcClock, Duration>*)
- -> decltype(conv_clock<DstClock>(t))
-{
- return conv_clock<DstClock>(t);
-}
-
-//conversion through sys, 2nd candidate
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-cc_impl(const time_point<SrcClock, Duration>& t, const void*)
- -> decltype(conv_clock<DstClock>(conv_clock<system_clock>(t)))
-{
- return conv_clock<DstClock>(conv_clock<system_clock>(t));
-}
-
-//conversion through utc, 2nd candidate
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-cc_impl(const time_point<SrcClock, Duration>& t, const void*)
- -> decltype(0, // MSVC_WORKAROUND
- conv_clock<DstClock>(conv_clock<utc_clock>(t)))
-{
- return conv_clock<DstClock>(conv_clock<utc_clock>(t));
-}
-
-//conversion through sys and utc, 3rd candidate
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-cc_impl(const time_point<SrcClock, Duration>& t, ...)
- -> decltype(conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t))))
-{
- return conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t)));
-}
-
-//conversion through utc and sys, 3rd candidate
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-cc_impl(const time_point<SrcClock, Duration>& t, ...)
- -> decltype(0, // MSVC_WORKAROUND
- conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t))))
-{
- return conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t)));
-}
-
-} // namespace clock_cast_detail
-
-template <class DstClock, class SrcClock, class Duration>
-CONSTCD14
-auto
-clock_cast(const std::chrono::time_point<SrcClock, Duration>& tp)
- -> decltype(clock_cast_detail::cc_impl<DstClock>(tp, &tp))
-{
- return clock_cast_detail::cc_impl<DstClock>(tp, &tp);
-}
-
-// Deprecated API
-
-template <class Duration>
-inline
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_sys_time(const utc_time<Duration>& t)
-{
- return utc_clock::to_sys(t);
-}
-
-template <class Duration>
-inline
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_sys_time(const tai_time<Duration>& t)
-{
- return utc_clock::to_sys(tai_clock::to_utc(t));
-}
-
-template <class Duration>
-inline
-sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_sys_time(const gps_time<Duration>& t)
-{
- return utc_clock::to_sys(gps_clock::to_utc(t));
-}
-
-
-template <class Duration>
-inline
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_utc_time(const sys_time<Duration>& t)
-{
- return utc_clock::from_sys(t);
-}
-
-template <class Duration>
-inline
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_utc_time(const tai_time<Duration>& t)
-{
- return tai_clock::to_utc(t);
-}
-
-template <class Duration>
-inline
-utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_utc_time(const gps_time<Duration>& t)
-{
- return gps_clock::to_utc(t);
-}
-
-
-template <class Duration>
-inline
-tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_tai_time(const sys_time<Duration>& t)
-{
- return tai_clock::from_utc(utc_clock::from_sys(t));
-}
-
-template <class Duration>
-inline
-tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_tai_time(const utc_time<Duration>& t)
-{
- return tai_clock::from_utc(t);
-}
-
-template <class Duration>
-inline
-tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_tai_time(const gps_time<Duration>& t)
-{
- return tai_clock::from_utc(gps_clock::to_utc(t));
-}
-
-
-template <class Duration>
-inline
-gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_gps_time(const sys_time<Duration>& t)
-{
- return gps_clock::from_utc(utc_clock::from_sys(t));
-}
-
-template <class Duration>
-inline
-gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_gps_time(const utc_time<Duration>& t)
-{
- return gps_clock::from_utc(t);
-}
-
-template <class Duration>
-inline
-gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
-to_gps_time(const tai_time<Duration>& t)
-{
- return gps_clock::from_utc(tai_clock::to_utc(t));
-}
-
-#endif // !MISSING_LEAP_SECONDS
-
-} // namespace date
-} // namespace arrow_vendored
-
-#endif // TZ_H
+#ifndef TZ_H
+#define TZ_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2017 Jiangang Zhuang
+// Copyright (c) 2017 Aaron Bishop
+// Copyright (c) 2017 Tomasz Kamiński
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies. When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+// Get more recent database at http://www.iana.org/time-zones
+
+// The notion of "current timezone" is something the operating system is expected to "just
+// know". How it knows this is system specific. It's often a value set by the user at OS
+// installation time and recorded by the OS somewhere. On Linux and Mac systems the current
+// timezone name is obtained by looking at the name or contents of a particular file on
+// disk. On Windows the current timezone name comes from the registry. In either method,
+// there is no guarantee that the "native" current timezone name obtained will match any
+// of the "Standard" names in this library's "database". On Linux, the names usually do
+// seem to match so mapping functions to map from native to "Standard" are typically not
+// required. On Windows, the names are never "Standard" so mapping is always required.
+// Technically any OS may use the mapping process but currently only Windows does use it.
+
+// NOTE(ARROW): If this is not set, then the library will attempt to
+// use libcurl to obtain a timezone database, and we do not yet have
+// curl in our build toolchain
+#ifndef _WIN32
+#define USE_OS_TZDB 1
+#endif
+
+#ifndef USE_OS_TZDB
+# define USE_OS_TZDB 0
+#endif
+
+#ifndef HAS_REMOTE_API
+# if USE_OS_TZDB == 0
+# ifdef _WIN32
+# define HAS_REMOTE_API 0
+# else
+# define HAS_REMOTE_API 1
+# endif
+# else // HAS_REMOTE_API makes no since when using the OS timezone database
+# define HAS_REMOTE_API 0
+# endif
+#endif
+
+#ifdef __clang__
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#endif
+
+static_assert(!(USE_OS_TZDB && HAS_REMOTE_API),
+ "USE_OS_TZDB and HAS_REMOTE_API can not be used together");
+
+#ifdef __clang__
+# pragma clang diagnostic pop
+#endif
+
+#ifndef AUTO_DOWNLOAD
+# define AUTO_DOWNLOAD HAS_REMOTE_API
+#endif
+
+static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true,
+ "AUTO_DOWNLOAD can not be turned on without HAS_REMOTE_API");
+
+#ifndef USE_SHELL_API
+# define USE_SHELL_API 1
+#endif
+
+#if USE_OS_TZDB
+# ifdef _WIN32
+# error "USE_OS_TZDB can not be used on Windows"
+# endif
+# ifndef MISSING_LEAP_SECONDS
+# ifdef __APPLE__
+# define MISSING_LEAP_SECONDS 1
+# else
+# define MISSING_LEAP_SECONDS 0
+# endif
+# endif
+#else
+# define MISSING_LEAP_SECONDS 0
+#endif
+
+#ifndef HAS_DEDUCTION_GUIDES
+# if __cplusplus >= 201703
+# define HAS_DEDUCTION_GUIDES 1
+# else
+# define HAS_DEDUCTION_GUIDES 0
+# endif
+#endif // HAS_DEDUCTION_GUIDES
+
+#include "date.h"
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#include "tz_private.h"
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <istream>
+#include <locale>
+#include <memory>
+#include <mutex>
+#include <ostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#ifdef _WIN32
+# ifdef DATE_BUILD_DLL
+# define DATE_API __declspec(dllexport)
+# elif defined(DATE_USE_DLL)
+# define DATE_API __declspec(dllimport)
+# else
+# define DATE_API
+# endif
+#else
+# ifdef DATE_BUILD_DLL
+# define DATE_API __attribute__ ((visibility ("default")))
+# else
+# define DATE_API
+# endif
+#endif
+
+namespace arrow_vendored
+{
+namespace date
+{
+
+enum class choose {earliest, latest};
+
+namespace detail
+{
+ struct undocumented;
+
+ template<typename T>
+ struct nodeduct
+ {
+ using type = T;
+ };
+
+ template<typename T>
+ using nodeduct_t = typename nodeduct<T>::type;
+}
+
+struct sys_info
+{
+ sys_seconds begin;
+ sys_seconds end;
+ std::chrono::seconds offset;
+ std::chrono::minutes save;
+ std::string abbrev;
+};
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_info& r)
+{
+ os << r.begin << '\n';
+ os << r.end << '\n';
+ os << make_time(r.offset) << "\n";
+ os << make_time(r.save) << "\n";
+ os << r.abbrev << '\n';
+ return os;
+}
+
+struct local_info
+{
+ enum {unique, nonexistent, ambiguous} result;
+ sys_info first;
+ sys_info second;
+};
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const local_info& r)
+{
+ if (r.result == local_info::nonexistent)
+ os << "nonexistent between\n";
+ else if (r.result == local_info::ambiguous)
+ os << "ambiguous between\n";
+ os << r.first;
+ if (r.result != local_info::unique)
+ {
+ os << "and\n";
+ os << r.second;
+ }
+ return os;
+}
+
+class nonexistent_local_time
+ : public std::runtime_error
+{
+public:
+ template <class Duration>
+ nonexistent_local_time(local_time<Duration> tp, const local_info& i);
+
+private:
+ template <class Duration>
+ static
+ std::string
+ make_msg(local_time<Duration> tp, const local_info& i);
+};
+
+template <class Duration>
+inline
+nonexistent_local_time::nonexistent_local_time(local_time<Duration> tp,
+ const local_info& i)
+ : std::runtime_error(make_msg(tp, i))
+{
+}
+
+template <class Duration>
+std::string
+nonexistent_local_time::make_msg(local_time<Duration> tp, const local_info& i)
+{
+ assert(i.result == local_info::nonexistent);
+ std::ostringstream os;
+ os << tp << " is in a gap between\n"
+ << local_seconds{i.first.end.time_since_epoch()} + i.first.offset << ' '
+ << i.first.abbrev << " and\n"
+ << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' '
+ << i.second.abbrev
+ << " which are both equivalent to\n"
+ << i.first.end << " UTC";
+ return os.str();
+}
+
+class ambiguous_local_time
+ : public std::runtime_error
+{
+public:
+ template <class Duration>
+ ambiguous_local_time(local_time<Duration> tp, const local_info& i);
+
+private:
+ template <class Duration>
+ static
+ std::string
+ make_msg(local_time<Duration> tp, const local_info& i);
+};
+
+template <class Duration>
+inline
+ambiguous_local_time::ambiguous_local_time(local_time<Duration> tp, const local_info& i)
+ : std::runtime_error(make_msg(tp, i))
+{
+}
+
+template <class Duration>
+std::string
+ambiguous_local_time::make_msg(local_time<Duration> tp, const local_info& i)
+{
+ assert(i.result == local_info::ambiguous);
+ std::ostringstream os;
+ os << tp << " is ambiguous. It could be\n"
+ << tp << ' ' << i.first.abbrev << " == "
+ << tp - i.first.offset << " UTC or\n"
+ << tp << ' ' << i.second.abbrev << " == "
+ << tp - i.second.offset << " UTC";
+ return os.str();
+}
+
+class time_zone;
+
+#if HAS_STRING_VIEW
+DATE_API const time_zone* locate_zone(std::string_view tz_name);
+#else
+DATE_API const time_zone* locate_zone(const std::string& tz_name);
+#endif
+
+DATE_API const time_zone* current_zone();
+
+template <class T>
+struct zoned_traits
+{
+};
+
+template <>
+struct zoned_traits<const time_zone*>
+{
+ static
+ const time_zone*
+ default_zone()
+ {
+ return date::locate_zone("Etc/UTC");
+ }
+
+#if HAS_STRING_VIEW
+
+ static
+ const time_zone*
+ locate_zone(std::string_view name)
+ {
+ return date::locate_zone(name);
+ }
+
+#else // !HAS_STRING_VIEW
+
+ static
+ const time_zone*
+ locate_zone(const std::string& name)
+ {
+ return date::locate_zone(name);
+ }
+
+ static
+ const time_zone*
+ locate_zone(const char* name)
+ {
+ return date::locate_zone(name);
+ }
+
+#endif // !HAS_STRING_VIEW
+};
+
+template <class Duration, class TimeZonePtr>
+class zoned_time;
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+bool
+operator==(const zoned_time<Duration1, TimeZonePtr>& x,
+ const zoned_time<Duration2, TimeZonePtr>& y);
+
+template <class Duration, class TimeZonePtr = const time_zone*>
+class zoned_time
+{
+public:
+ using duration = typename std::common_type<Duration, std::chrono::seconds>::type;
+
+private:
+ TimeZonePtr zone_;
+ sys_time<duration> tp_;
+
+public:
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = decltype(zoned_traits<T>::default_zone())>
+#endif
+ zoned_time();
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = decltype(zoned_traits<T>::default_zone())>
+#endif
+ zoned_time(const sys_time<Duration>& st);
+ explicit zoned_time(TimeZonePtr z);
+
+#if HAS_STRING_VIEW
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view()))
+ >::value
+ >::type>
+ explicit zoned_time(std::string_view name);
+#else
+# if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string()))
+ >::value
+ >::type>
+# endif
+ explicit zoned_time(const std::string& name);
+#endif
+
+ template <class Duration2,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value
+ >::type>
+ zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT;
+
+ zoned_time(TimeZonePtr z, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible
+ <
+ decltype(std::declval<T&>()->to_sys(local_time<Duration>{})),
+ sys_time<duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(TimeZonePtr z, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible
+ <
+ decltype(std::declval<T&>()->to_sys(local_time<Duration>{},
+ choose::earliest)),
+ sys_time<duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(TimeZonePtr z, const local_time<Duration>& tp, choose c);
+
+ template <class Duration2, class TimeZonePtr2,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value
+ >::type>
+ zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt);
+
+ template <class Duration2, class TimeZonePtr2,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value
+ >::type>
+ zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt, choose);
+
+#if HAS_STRING_VIEW
+
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view())),
+ sys_time<Duration>
+ >::value
+ >::type>
+ zoned_time(std::string_view name, detail::nodeduct_t<const sys_time<Duration>&> st);
+
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view())),
+ local_time<Duration>
+ >::value
+ >::type>
+ zoned_time(std::string_view name, detail::nodeduct_t<const local_time<Duration>&> tp);
+
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view())),
+ local_time<Duration>,
+ choose
+ >::value
+ >::type>
+ zoned_time(std::string_view name, detail::nodeduct_t<const local_time<Duration>&> tp, choose c);
+
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view())),
+ zoned_time
+ >::value
+ >::type>
+ zoned_time(std::string_view name, const zoned_time<Duration2, TimeZonePtr2>& zt);
+
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string_view())),
+ zoned_time,
+ choose
+ >::value
+ >::type>
+ zoned_time(std::string_view name, const zoned_time<Duration2, TimeZonePtr2>& zt, choose);
+
+#else // !HAS_STRING_VIEW
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ sys_time<Duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(const std::string& name, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ sys_time<Duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(const char* name, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ local_time<Duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(const std::string& name, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ local_time<Duration>
+ >::value
+ >::type>
+#endif
+ zoned_time(const char* name, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ local_time<Duration>,
+ choose
+ >::value
+ >::type>
+#endif
+ zoned_time(const std::string& name, const local_time<Duration>& tp, choose c);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ local_time<Duration>,
+ choose
+ >::value
+ >::type>
+#endif
+ zoned_time(const char* name, const local_time<Duration>& tp, choose c);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ zoned_time
+ >::value
+ >::type>
+#else
+ template <class Duration2, class TimeZonePtr2>
+#endif
+ zoned_time(const std::string& name, const zoned_time<Duration2, TimeZonePtr2>& zt);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ zoned_time
+ >::value
+ >::type>
+#else
+ template <class Duration2, class TimeZonePtr2>
+#endif
+ zoned_time(const char* name, const zoned_time<Duration2, TimeZonePtr2>& zt);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ zoned_time,
+ choose
+ >::value
+ >::type>
+#else
+ template <class Duration2, class TimeZonePtr2>
+#endif
+ zoned_time(const std::string& name, const zoned_time<Duration2, TimeZonePtr2>& zt,
+ choose);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+ template <class Duration2, class TimeZonePtr2, class T = TimeZonePtr,
+ class = typename std::enable_if
+ <
+ std::is_convertible<sys_time<Duration2>,
+ sys_time<Duration>>::value &&
+ std::is_constructible
+ <
+ zoned_time,
+ decltype(zoned_traits<T>::locate_zone(std::string())),
+ zoned_time,
+ choose
+ >::value
+ >::type>
+#else
+ template <class Duration2, class TimeZonePtr2>
+#endif
+ zoned_time(const char* name, const zoned_time<Duration2, TimeZonePtr2>& zt,
+ choose);
+
+#endif // !HAS_STRING_VIEW
+
+ zoned_time& operator=(const sys_time<Duration>& st);
+ zoned_time& operator=(const local_time<Duration>& ut);
+
+ explicit operator sys_time<duration>() const;
+ explicit operator local_time<duration>() const;
+
+ TimeZonePtr get_time_zone() const;
+ local_time<duration> get_local_time() const;
+ sys_time<duration> get_sys_time() const;
+ sys_info get_info() const;
+
+ template <class Duration1, class Duration2, class TimeZonePtr1>
+ friend
+ bool
+ operator==(const zoned_time<Duration1, TimeZonePtr1>& x,
+ const zoned_time<Duration2, TimeZonePtr1>& y);
+
+ template <class CharT, class Traits, class Duration1, class TimeZonePtr1>
+ friend
+ std::basic_ostream<CharT, Traits>&
+ operator<<(std::basic_ostream<CharT, Traits>& os,
+ const zoned_time<Duration1, TimeZonePtr1>& t);
+
+private:
+ template <class D, class T> friend class zoned_time;
+};
+
+using zoned_seconds = zoned_time<std::chrono::seconds>;
+
+#if HAS_DEDUCTION_GUIDES
+
+namespace detail
+{
+ template<typename TimeZonePtrOrName>
+ using time_zone_representation =
+ std::conditional_t
+ <
+ std::is_convertible<TimeZonePtrOrName, std::string_view>::value,
+ time_zone const*,
+ std::remove_cv_t<std::remove_reference_t<TimeZonePtrOrName>>
+ >;
+}
+
+zoned_time()
+ -> zoned_time<std::chrono::seconds>;
+
+template <class Duration>
+zoned_time(sys_time<Duration>)
+ -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class TimeZonePtrOrName>
+zoned_time(TimeZonePtrOrName&&)
+ -> zoned_time<std::chrono::seconds, detail::time_zone_representation<TimeZonePtrOrName>>;
+
+template <class TimeZonePtrOrName, class Duration>
+zoned_time(TimeZonePtrOrName&&, sys_time<Duration>)
+ -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
+
+template <class TimeZonePtrOrName, class Duration>
+zoned_time(TimeZonePtrOrName&&, local_time<Duration>, choose = choose::earliest)
+ -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
+
+template <class Duration, class TimeZonePtrOrName, class TimeZonePtr2>
+zoned_time(TimeZonePtrOrName&&, zoned_time<Duration, TimeZonePtr2>, choose = choose::earliest)
+ -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, detail::time_zone_representation<TimeZonePtrOrName>>;
+
+#endif // HAS_DEDUCTION_GUIDES
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+inline
+bool
+operator==(const zoned_time<Duration1, TimeZonePtr>& x,
+ const zoned_time<Duration2, TimeZonePtr>& y)
+{
+ return x.zone_ == y.zone_ && x.tp_ == y.tp_;
+}
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+inline
+bool
+operator!=(const zoned_time<Duration1, TimeZonePtr>& x,
+ const zoned_time<Duration2, TimeZonePtr>& y)
+{
+ return !(x == y);
+}
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+namespace detail
+{
+# if USE_OS_TZDB
+ struct transition;
+ struct expanded_ttinfo;
+# else // !USE_OS_TZDB
+ struct zonelet;
+ class Rule;
+# endif // !USE_OS_TZDB
+}
+
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+class time_zone
+{
+private:
+ std::string name_;
+#if USE_OS_TZDB
+ std::vector<detail::transition> transitions_;
+ std::vector<detail::expanded_ttinfo> ttinfos_;
+#else // !USE_OS_TZDB
+ std::vector<detail::zonelet> zonelets_;
+#endif // !USE_OS_TZDB
+ std::unique_ptr<std::once_flag> adjusted_;
+
+public:
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ time_zone(time_zone&&) = default;
+ time_zone& operator=(time_zone&&) = default;
+#else // defined(_MSC_VER) && (_MSC_VER < 1900)
+ time_zone(time_zone&& src);
+ time_zone& operator=(time_zone&& src);
+#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+ DATE_API explicit time_zone(const std::string& s, detail::undocumented);
+
+ const std::string& name() const NOEXCEPT;
+
+ template <class Duration> sys_info get_info(sys_time<Duration> st) const;
+ template <class Duration> local_info get_info(local_time<Duration> tp) const;
+
+ template <class Duration>
+ sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_sys(local_time<Duration> tp) const;
+
+ template <class Duration>
+ sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_sys(local_time<Duration> tp, choose z) const;
+
+ template <class Duration>
+ local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_local(sys_time<Duration> tp) const;
+
+ friend bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT;
+ friend bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT;
+ friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone& z);
+
+#if !USE_OS_TZDB
+ DATE_API void add(const std::string& s);
+#endif // !USE_OS_TZDB
+
+private:
+ DATE_API sys_info get_info_impl(sys_seconds tp) const;
+ DATE_API local_info get_info_impl(local_seconds tp) const;
+
+ template <class Duration>
+ sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const;
+ template <class Duration>
+ sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_sys_impl(local_time<Duration> tp, choose, std::true_type) const;
+
+#if USE_OS_TZDB
+ DATE_API void init() const;
+ DATE_API void init_impl();
+ DATE_API sys_info
+ load_sys_info(std::vector<detail::transition>::const_iterator i) const;
+
+ template <class TimeType>
+ DATE_API void
+ load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+ std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
+#else // !USE_OS_TZDB
+ DATE_API sys_info get_info_impl(sys_seconds tp, int timezone) const;
+ DATE_API void adjust_infos(const std::vector<detail::Rule>& rules);
+ DATE_API void parse_info(std::istream& in);
+#endif // !USE_OS_TZDB
+};
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+
+inline
+time_zone::time_zone(time_zone&& src)
+ : name_(std::move(src.name_))
+ , zonelets_(std::move(src.zonelets_))
+ , adjusted_(std::move(src.adjusted_))
+ {}
+
+inline
+time_zone&
+time_zone::operator=(time_zone&& src)
+{
+ name_ = std::move(src.name_);
+ zonelets_ = std::move(src.zonelets_);
+ adjusted_ = std::move(src.adjusted_);
+ return *this;
+}
+
+#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+inline
+const std::string&
+time_zone::name() const NOEXCEPT
+{
+ return name_;
+}
+
+template <class Duration>
+inline
+sys_info
+time_zone::get_info(sys_time<Duration> st) const
+{
+ return get_info_impl(date::floor<std::chrono::seconds>(st));
+}
+
+template <class Duration>
+inline
+local_info
+time_zone::get_info(local_time<Duration> tp) const
+{
+ return get_info_impl(date::floor<std::chrono::seconds>(tp));
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys(local_time<Duration> tp) const
+{
+ return to_sys_impl(tp, choose{}, std::true_type{});
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys(local_time<Duration> tp, choose z) const
+{
+ return to_sys_impl(tp, z, std::false_type{});
+}
+
+template <class Duration>
+inline
+local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_local(sys_time<Duration> tp) const
+{
+ using LT = local_time<typename std::common_type<Duration, std::chrono::seconds>::type>;
+ auto i = get_info(tp);
+ return LT{(tp + i.offset).time_since_epoch()};
+}
+
+inline bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ == y.name_;}
+inline bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ < y.name_;}
+
+inline bool operator!=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x == y);}
+inline bool operator> (const time_zone& x, const time_zone& y) NOEXCEPT {return y < x;}
+inline bool operator<=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(y < x);}
+inline bool operator>=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x < y);}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const
+{
+ auto i = get_info(tp);
+ if (i.result == local_info::nonexistent)
+ {
+ return i.first.end;
+ }
+ else if (i.result == local_info::ambiguous)
+ {
+ if (z == choose::latest)
+ return sys_time<Duration>{tp.time_since_epoch()} - i.second.offset;
+ }
+ return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
+}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys_impl(local_time<Duration> tp, choose, std::true_type) const
+{
+ auto i = get_info(tp);
+ if (i.result == local_info::nonexistent)
+ throw nonexistent_local_time(tp, i);
+ else if (i.result == local_info::ambiguous)
+ throw ambiguous_local_time(tp, i);
+ return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
+}
+
+#if !USE_OS_TZDB
+
+class time_zone_link
+{
+private:
+ std::string name_;
+ std::string target_;
+public:
+ DATE_API explicit time_zone_link(const std::string& s);
+
+ const std::string& name() const {return name_;}
+ const std::string& target() const {return target_;}
+
+ friend bool operator==(const time_zone_link& x, const time_zone_link& y) {return x.name_ == y.name_;}
+ friend bool operator< (const time_zone_link& x, const time_zone_link& y) {return x.name_ < y.name_;}
+
+ friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone_link& x);
+};
+
+using link = time_zone_link;
+
+inline bool operator!=(const time_zone_link& x, const time_zone_link& y) {return !(x == y);}
+inline bool operator> (const time_zone_link& x, const time_zone_link& y) {return y < x;}
+inline bool operator<=(const time_zone_link& x, const time_zone_link& y) {return !(y < x);}
+inline bool operator>=(const time_zone_link& x, const time_zone_link& y) {return !(x < y);}
+
+#endif // !USE_OS_TZDB
+
+#if !MISSING_LEAP_SECONDS
+
+class leap_second
+{
+private:
+ sys_seconds date_;
+
+public:
+#if USE_OS_TZDB
+ DATE_API explicit leap_second(const sys_seconds& s, detail::undocumented);
+#else
+ DATE_API explicit leap_second(const std::string& s, detail::undocumented);
+#endif
+
+ sys_seconds date() const {return date_;}
+
+ friend bool operator==(const leap_second& x, const leap_second& y) {return x.date_ == y.date_;}
+ friend bool operator< (const leap_second& x, const leap_second& y) {return x.date_ < y.date_;}
+
+ template <class Duration>
+ friend
+ bool
+ operator==(const leap_second& x, const sys_time<Duration>& y)
+ {
+ return x.date_ == y;
+ }
+
+ template <class Duration>
+ friend
+ bool
+ operator< (const leap_second& x, const sys_time<Duration>& y)
+ {
+ return x.date_ < y;
+ }
+
+ template <class Duration>
+ friend
+ bool
+ operator< (const sys_time<Duration>& x, const leap_second& y)
+ {
+ return x < y.date_;
+ }
+
+ friend DATE_API std::ostream& operator<<(std::ostream& os, const leap_second& x);
+};
+
+inline bool operator!=(const leap_second& x, const leap_second& y) {return !(x == y);}
+inline bool operator> (const leap_second& x, const leap_second& y) {return y < x;}
+inline bool operator<=(const leap_second& x, const leap_second& y) {return !(y < x);}
+inline bool operator>=(const leap_second& x, const leap_second& y) {return !(x < y);}
+
+template <class Duration>
+inline
+bool
+operator==(const sys_time<Duration>& x, const leap_second& y)
+{
+ return y == x;
+}
+
+template <class Duration>
+inline
+bool
+operator!=(const leap_second& x, const sys_time<Duration>& y)
+{
+ return !(x == y);
+}
+
+template <class Duration>
+inline
+bool
+operator!=(const sys_time<Duration>& x, const leap_second& y)
+{
+ return !(x == y);
+}
+
+template <class Duration>
+inline
+bool
+operator> (const leap_second& x, const sys_time<Duration>& y)
+{
+ return y < x;
+}
+
+template <class Duration>
+inline
+bool
+operator> (const sys_time<Duration>& x, const leap_second& y)
+{
+ return y < x;
+}
+
+template <class Duration>
+inline
+bool
+operator<=(const leap_second& x, const sys_time<Duration>& y)
+{
+ return !(y < x);
+}
+
+template <class Duration>
+inline
+bool
+operator<=(const sys_time<Duration>& x, const leap_second& y)
+{
+ return !(y < x);
+}
+
+template <class Duration>
+inline
+bool
+operator>=(const leap_second& x, const sys_time<Duration>& y)
+{
+ return !(x < y);
+}
+
+template <class Duration>
+inline
+bool
+operator>=(const sys_time<Duration>& x, const leap_second& y)
+{
+ return !(x < y);
+}
+
+using leap = leap_second;
+
+#endif // !MISSING_LEAP_SECONDS
+
+#ifdef _WIN32
+
+namespace detail
+{
+
+// The time zone mapping is modelled after this data file:
+// http://unicode.org/repos/cldr/trunk/common/supplemental/windowsZones.xml
+// and the field names match the element names from the mapZone element
+// of windowsZones.xml.
+// The website displays this file here:
+// http://www.unicode.org/cldr/charts/latest/supplemental/zone_tzid.html
+// The html view is sorted before being displayed but is otherwise the same
+// There is a mapping between the os centric view (in this case windows)
+// the html displays uses and the generic view the xml file.
+// That mapping is this:
+// display column "windows" -> xml field "other".
+// display column "region" -> xml field "territory".
+// display column "tzid" -> xml field "type".
+// This structure uses the generic terminology because it could be
+// used to to support other os/native name conversions, not just windows,
+// and using the same generic names helps retain the connection to the
+// origin of the data that we are using.
+struct timezone_mapping
+{
+ timezone_mapping(const char* other, const char* territory, const char* type)
+ : other(other), territory(territory), type(type)
+ {
+ }
+ timezone_mapping() = default;
+ std::string other;
+ std::string territory;
+ std::string type;
+};
+
+} // detail
+
+#endif // _WIN32
+
+struct tzdb
+{
+ std::string version = "unknown";
+ std::vector<time_zone> zones;
+#if !USE_OS_TZDB
+ std::vector<time_zone_link> links;
+#endif
+#if !MISSING_LEAP_SECONDS
+ std::vector<leap_second> leap_seconds;
+#endif
+#if !USE_OS_TZDB
+ std::vector<detail::Rule> rules;
+#endif
+#ifdef _WIN32
+ std::vector<detail::timezone_mapping> mappings;
+#endif
+ tzdb* next = nullptr;
+
+ tzdb() = default;
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ tzdb(tzdb&&) = default;
+ tzdb& operator=(tzdb&&) = default;
+#else // defined(_MSC_VER) && (_MSC_VER < 1900)
+ tzdb(tzdb&& src)
+ : version(std::move(src.version))
+ , zones(std::move(src.zones))
+ , links(std::move(src.links))
+ , leap_seconds(std::move(src.leap_seconds))
+ , rules(std::move(src.rules))
+ , mappings(std::move(src.mappings))
+ {}
+
+ tzdb& operator=(tzdb&& src)
+ {
+ version = std::move(src.version);
+ zones = std::move(src.zones);
+ links = std::move(src.links);
+ leap_seconds = std::move(src.leap_seconds);
+ rules = std::move(src.rules);
+ mappings = std::move(src.mappings);
+ return *this;
+ }
+#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+#if HAS_STRING_VIEW
+ const time_zone* locate_zone(std::string_view tz_name) const;
+#else
+ const time_zone* locate_zone(const std::string& tz_name) const;
+#endif
+ const time_zone* current_zone() const;
+};
+
+using TZ_DB = tzdb;
+
+DATE_API std::ostream&
+operator<<(std::ostream& os, const tzdb& db);
+
+DATE_API const tzdb& get_tzdb();
+
+class tzdb_list
+{
+ std::atomic<tzdb*> head_{nullptr};
+
+public:
+ ~tzdb_list();
+ tzdb_list() = default;
+ tzdb_list(tzdb_list&& x) noexcept;
+
+ const tzdb& front() const noexcept {return *head_;}
+ tzdb& front() noexcept {return *head_;}
+
+ class const_iterator;
+
+ const_iterator begin() const noexcept;
+ const_iterator end() const noexcept;
+
+ const_iterator cbegin() const noexcept;
+ const_iterator cend() const noexcept;
+
+ const_iterator erase_after(const_iterator p) noexcept;
+
+ struct undocumented_helper;
+private:
+ void push_front(tzdb* tzdb) noexcept;
+};
+
+class tzdb_list::const_iterator
+{
+ tzdb* p_ = nullptr;
+
+ explicit const_iterator(tzdb* p) noexcept : p_{p} {}
+public:
+ const_iterator() = default;
+
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = tzdb;
+ using reference = const value_type&;
+ using pointer = const value_type*;
+ using difference_type = std::ptrdiff_t;
+
+ reference operator*() const noexcept {return *p_;}
+ pointer operator->() const noexcept {return p_;}
+
+ const_iterator& operator++() noexcept {p_ = p_->next; return *this;}
+ const_iterator operator++(int) noexcept {auto t = *this; ++(*this); return t;}
+
+ friend
+ bool
+ operator==(const const_iterator& x, const const_iterator& y) noexcept
+ {return x.p_ == y.p_;}
+
+ friend
+ bool
+ operator!=(const const_iterator& x, const const_iterator& y) noexcept
+ {return !(x == y);}
+
+ friend class tzdb_list;
+};
+
+inline
+tzdb_list::const_iterator
+tzdb_list::begin() const noexcept
+{
+ return const_iterator{head_};
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::end() const noexcept
+{
+ return const_iterator{nullptr};
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::cbegin() const noexcept
+{
+ return begin();
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::cend() const noexcept
+{
+ return end();
+}
+
+DATE_API tzdb_list& get_tzdb_list();
+
+#if !USE_OS_TZDB
+
+DATE_API const tzdb& reload_tzdb();
+DATE_API void set_install(const std::string& install);
+
+#endif // !USE_OS_TZDB
+
+#if HAS_REMOTE_API
+
+DATE_API std::string remote_version();
+// if provided error_buffer size should be at least CURL_ERROR_SIZE
+DATE_API bool remote_download(const std::string& version, char* error_buffer = nullptr);
+DATE_API bool remote_install(const std::string& version);
+
+#endif
+
+// zoned_time
+
+namespace detail
+{
+
+template <class T>
+inline
+T*
+to_raw_pointer(T* p) noexcept
+{
+ return p;
+}
+
+template <class Pointer>
+inline
+auto
+to_raw_pointer(Pointer p) noexcept
+ -> decltype(detail::to_raw_pointer(p.operator->()))
+{
+ return detail::to_raw_pointer(p.operator->());
+}
+
+} // namespace detail
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time()
+ : zone_(zoned_traits<TimeZonePtr>::default_zone())
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const sys_time<Duration>& st)
+ : zone_(zoned_traits<TimeZonePtr>::default_zone())
+ , tp_(st)
+ {}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z)
+ : zone_(std::move(z))
+ {assert(detail::to_raw_pointer(zone_) != nullptr);}
+
+#if HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class T, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
+ {}
+
+#else // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
+ {}
+
+#endif // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT
+ : zone_(zt.zone_)
+ , tp_(zt.tp_)
+ {}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const sys_time<Duration>& st)
+ : zone_(std::move(z))
+ , tp_(st)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t)
+ : zone_(std::move(z))
+ , tp_(zone_->to_sys(t))
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t,
+ choose c)
+ : zone_(std::move(z))
+ , tp_(zone_->to_sys(t, c))
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
+ const zoned_time<Duration2, TimeZonePtr2>& zt)
+ : zone_(std::move(z))
+ , tp_(zt.tp_)
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
+ const zoned_time<Duration2, TimeZonePtr2>& zt, choose)
+ : zoned_time(std::move(z), zt)
+ {}
+
+#if HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class T, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+ detail::nodeduct_t<const sys_time<Duration>&> st)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class T, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+ detail::nodeduct_t<const local_time<Duration>&> t)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class T, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+ detail::nodeduct_t<const local_time<Duration>&> t, choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+ {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt,
+ choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+ {}
+
+#else // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+ const sys_time<Duration>& st)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+ const sys_time<Duration>& st)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+ const local_time<Duration>& t)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+ const local_time<Duration>& t)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+ const local_time<Duration>& t, choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+ const local_time<Duration>& t, choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class Duration2, class TimeZonePtr2, class, class>
+#else
+template <class Duration2, class TimeZonePtr2>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class Duration2, class TimeZonePtr2, class, class>
+#else
+template <class Duration2, class TimeZonePtr2>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class Duration2, class TimeZonePtr2, class, class>
+#else
+template <class Duration2, class TimeZonePtr2>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt,
+ choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+ {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+template <class Duration2, class TimeZonePtr2, class, class>
+#else
+template <class Duration2, class TimeZonePtr2>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+ const zoned_time<Duration2, TimeZonePtr2>& zt,
+ choose c)
+ : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+ {}
+
+#endif // HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>&
+zoned_time<Duration, TimeZonePtr>::operator=(const sys_time<Duration>& st)
+{
+ tp_ = st;
+ return *this;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>&
+zoned_time<Duration, TimeZonePtr>::operator=(const local_time<Duration>& ut)
+{
+ tp_ = zone_->to_sys(ut);
+ return *this;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::operator local_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
+{
+ return get_local_time();
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::operator sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
+{
+ return get_sys_time();
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+TimeZonePtr
+zoned_time<Duration, TimeZonePtr>::get_time_zone() const
+{
+ return zone_;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+local_time<typename zoned_time<Duration, TimeZonePtr>::duration>
+zoned_time<Duration, TimeZonePtr>::get_local_time() const
+{
+ return zone_->to_local(tp_);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>
+zoned_time<Duration, TimeZonePtr>::get_sys_time() const
+{
+ return tp_;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+sys_info
+zoned_time<Duration, TimeZonePtr>::get_info() const
+{
+ return zone_->get_info(tp_);
+}
+
+// make_zoned_time
+
+inline
+zoned_time<std::chrono::seconds>
+make_zoned()
+{
+ return zoned_time<std::chrono::seconds>();
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const sys_time<Duration>& tp)
+{
+ return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>(tp);
+}
+
+template <class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
+ , class = typename std::enable_if
+ <
+ std::is_class
+ <
+ typename std::decay
+ <
+ decltype(*detail::to_raw_pointer(std::declval<TimeZonePtr&>()))
+ >::type
+ >{}
+ >::type
+#endif
+#endif
+ >
+inline
+zoned_time<std::chrono::seconds, TimeZonePtr>
+make_zoned(TimeZonePtr z)
+{
+ return zoned_time<std::chrono::seconds, TimeZonePtr>(std::move(z));
+}
+
+inline
+zoned_seconds
+make_zoned(const std::string& name)
+{
+ return zoned_seconds(name);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
+ , class = typename std::enable_if
+ <
+ std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+ >::type
+#endif
+#endif
+ >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const local_time<Duration>& tp)
+{
+ return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+ TimeZonePtr>(std::move(zone), tp);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
+ , class = typename std::enable_if
+ <
+ std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+ >::type
+#endif
+#endif
+ >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const local_time<Duration>& tp, choose c)
+{
+ return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+ TimeZonePtr>(std::move(zone), tp, c);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const local_time<Duration>& tp)
+{
+ return zoned_time<typename std::common_type<Duration,
+ std::chrono::seconds>::type>(name, tp);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const local_time<Duration>& tp, choose c)
+{
+ return zoned_time<typename std::common_type<Duration,
+ std::chrono::seconds>::type>(name, tp, c);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt)
+{
+ return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt)
+{
+ return zoned_time<Duration, TimeZonePtr>(name, zt);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
+{
+ return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt, c);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
+{
+ return zoned_time<Duration, TimeZonePtr>(name, zt, c);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1916)
+#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600)
+ , class = typename std::enable_if
+ <
+ std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+ >::type
+#endif
+#endif
+ >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const sys_time<Duration>& st)
+{
+ return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+ TimeZonePtr>(std::move(zone), st);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const sys_time<Duration>& st)
+{
+ return zoned_time<typename std::common_type<Duration,
+ std::chrono::seconds>::type>(name, st);
+}
+
+template <class CharT, class Traits, class Duration, class TimeZonePtr>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const zoned_time<Duration, TimeZonePtr>& tp)
+{
+ using duration = typename zoned_time<Duration, TimeZonePtr>::duration;
+ using LT = local_time<duration>;
+ auto const st = tp.get_sys_time();
+ auto const info = tp.get_time_zone()->get_info(st);
+ return to_stream(os, fmt, LT{(st+info.offset).time_since_epoch()},
+ &info.abbrev, &info.offset);
+}
+
+template <class CharT, class Traits, class Duration, class TimeZonePtr>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const zoned_time<Duration, TimeZonePtr>& t)
+{
+ const CharT fmt[] = {'%', 'F', ' ', '%', 'T', ' ', '%', 'Z', CharT{}};
+ return to_stream(os, fmt, t);
+}
+
+#if !MISSING_LEAP_SECONDS
+
+class utc_clock
+{
+public:
+ using duration = std::chrono::system_clock::duration;
+ using rep = duration::rep;
+ using period = duration::period;
+ using time_point = std::chrono::time_point<utc_clock>;
+ static CONSTDATA bool is_steady = false;
+
+ static time_point now();
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<std::chrono::system_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_sys(const std::chrono::time_point<utc_clock, Duration>&);
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ from_sys(const std::chrono::time_point<std::chrono::system_clock, Duration>&);
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<local_t, typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_local(const std::chrono::time_point<utc_clock, Duration>&);
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ from_local(const std::chrono::time_point<local_t, Duration>&);
+};
+
+template <class Duration>
+ using utc_time = std::chrono::time_point<utc_clock, Duration>;
+
+using utc_seconds = utc_time<std::chrono::seconds>;
+
+template <class Duration>
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::from_sys(const sys_time<Duration>& st)
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ auto const& leaps = get_tzdb().leap_seconds;
+ auto const lt = std::upper_bound(leaps.begin(), leaps.end(), st);
+ return utc_time<CD>{st.time_since_epoch() + seconds{lt-leaps.begin()}};
+}
+
+// Return pair<is_leap_second, seconds{number_of_leap_seconds_since_1970}>
+// first is true if ut is during a leap second insertion, otherwise false.
+// If ut is during a leap second insertion, that leap second is included in the count
+template <class Duration>
+std::pair<bool, std::chrono::seconds>
+is_leap_second(date::utc_time<Duration> const& ut)
+{
+ using std::chrono::seconds;
+ using duration = typename std::common_type<Duration, seconds>::type;
+ auto const& leaps = get_tzdb().leap_seconds;
+ auto tp = sys_time<duration>{ut.time_since_epoch()};
+ auto const lt = std::upper_bound(leaps.begin(), leaps.end(), tp);
+ auto ds = seconds{lt-leaps.begin()};
+ tp -= ds;
+ auto ls = false;
+ if (lt > leaps.begin())
+ {
+ if (tp < lt[-1])
+ {
+ if (tp >= lt[-1].date() - seconds{1})
+ ls = true;
+ else
+ --ds;
+ }
+ }
+ return {ls, ds};
+}
+
+struct leap_second_info
+{
+ bool is_leap_second;
+ std::chrono::seconds elapsed;
+};
+
+template <class Duration>
+leap_second_info
+get_leap_second_info(date::utc_time<Duration> const& ut)
+{
+ auto p = is_leap_second(ut);
+ return {p.first, p.second};
+}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::to_sys(const utc_time<Duration>& ut)
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ auto ls = is_leap_second(ut);
+ auto tp = sys_time<CD>{ut.time_since_epoch() - ls.second};
+ if (ls.first)
+ tp = floor<seconds>(tp) + seconds{1} - CD{1};
+ return tp;
+}
+
+inline
+utc_clock::time_point
+utc_clock::now()
+{
+ return from_sys(std::chrono::system_clock::now());
+}
+
+template <class Duration>
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::from_local(const local_time<Duration>& st)
+{
+ return from_sys(sys_time<Duration>{st.time_since_epoch()});
+}
+
+template <class Duration>
+local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::to_local(const utc_time<Duration>& ut)
+{
+ using CD = typename std::common_type<Duration, std::chrono::seconds>::type;
+ return local_time<CD>{to_sys(ut).time_since_epoch()};
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const utc_time<Duration>& t)
+{
+ using std::chrono::seconds;
+ using CT = typename std::common_type<Duration, seconds>::type;
+ const std::string abbrev("UTC");
+ CONSTDATA seconds offset{0};
+ auto ls = is_leap_second(t);
+ auto tp = sys_time<CT>{t.time_since_epoch() - ls.second};
+ auto const sd = floor<days>(tp);
+ year_month_day ymd = sd;
+ auto time = make_time(tp - sys_seconds{sd});
+ time.seconds(detail::undocumented{}) += seconds{ls.first};
+ fields<CT> fds{ymd, time};
+ return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const utc_time<Duration>& t)
+{
+ const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+ return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ utc_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ using std::chrono::seconds;
+ using std::chrono::minutes;
+ using CT = typename std::common_type<Duration, seconds>::type;
+ minutes offset_local{};
+ auto offptr = offset ? offset : &offset_local;
+ fields<CT> fds{};
+ fds.has_tod = true;
+ from_stream(is, fmt, fds, abbrev, offptr);
+ if (!fds.ymd.ok())
+ is.setstate(std::ios::failbit);
+ if (!is.fail())
+ {
+ bool is_60_sec = fds.tod.seconds() == seconds{60};
+ if (is_60_sec)
+ fds.tod.seconds(detail::undocumented{}) -= seconds{1};
+ auto tmp = utc_clock::from_sys(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
+ if (is_60_sec)
+ tmp += seconds{1};
+ if (is_60_sec != is_leap_second(tmp).first || !fds.tod.in_conventional_range())
+ {
+ is.setstate(std::ios::failbit);
+ return is;
+ }
+ tp = std::chrono::time_point_cast<Duration>(tmp);
+ }
+ return is;
+}
+
+// tai_clock
+
+class tai_clock
+{
+public:
+ using duration = std::chrono::system_clock::duration;
+ using rep = duration::rep;
+ using period = duration::period;
+ using time_point = std::chrono::time_point<tai_clock>;
+ static const bool is_steady = false;
+
+ static time_point now();
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_utc(const std::chrono::time_point<tai_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<tai_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<local_t, typename std::common_type<Duration, date::days>::type>
+ to_local(const std::chrono::time_point<tai_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<tai_clock, typename std::common_type<Duration, date::days>::type>
+ from_local(const std::chrono::time_point<local_t, Duration>&) NOEXCEPT;
+};
+
+template <class Duration>
+ using tai_time = std::chrono::time_point<tai_clock, Duration>;
+
+using tai_seconds = tai_time<std::chrono::seconds>;
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+tai_clock::to_utc(const tai_time<Duration>& t) NOEXCEPT
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ return utc_time<CD>{t.time_since_epoch()} -
+ (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10});
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+tai_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ return tai_time<CD>{t.time_since_epoch()} +
+ (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10});
+}
+
+inline
+tai_clock::time_point
+tai_clock::now()
+{
+ return from_utc(utc_clock::now());
+}
+
+template <class Duration>
+inline
+local_time<typename std::common_type<Duration, date::days>::type>
+tai_clock::to_local(const tai_time<Duration>& t) NOEXCEPT
+{
+ using CD = typename std::common_type<Duration, date::days>::type;
+ return local_time<CD>{t.time_since_epoch()} -
+ (local_days(year{1970}/January/1) - local_days(year{1958}/January/1));
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, date::days>::type>
+tai_clock::from_local(const local_time<Duration>& t) NOEXCEPT
+{
+ using CD = typename std::common_type<Duration, date::days>::type;
+ return tai_time<CD>{t.time_since_epoch()} +
+ (local_days(year{1970}/January/1) - local_days(year{1958}/January/1));
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const tai_time<Duration>& t)
+{
+ const std::string abbrev("TAI");
+ CONSTDATA std::chrono::seconds offset{0};
+ return to_stream(os, fmt, tai_clock::to_local(t), &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const tai_time<Duration>& t)
+{
+ const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+ return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ tai_time<Duration>& tp,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ local_time<Duration> lp;
+ from_stream(is, fmt, lp, abbrev, offset);
+ if (!is.fail())
+ tp = tai_clock::from_local(lp);
+ return is;
+}
+
+// gps_clock
+
+class gps_clock
+{
+public:
+ using duration = std::chrono::system_clock::duration;
+ using rep = duration::rep;
+ using period = duration::period;
+ using time_point = std::chrono::time_point<gps_clock>;
+ static const bool is_steady = false;
+
+ static time_point now();
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ to_utc(const std::chrono::time_point<gps_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<gps_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+ from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<local_t, typename std::common_type<Duration, date::days>::type>
+ to_local(const std::chrono::time_point<gps_clock, Duration>&) NOEXCEPT;
+
+ template<typename Duration>
+ static
+ std::chrono::time_point<gps_clock, typename std::common_type<Duration, date::days>::type>
+ from_local(const std::chrono::time_point<local_t, Duration>&) NOEXCEPT;
+};
+
+template <class Duration>
+ using gps_time = std::chrono::time_point<gps_clock, Duration>;
+
+using gps_seconds = gps_time<std::chrono::seconds>;
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+gps_clock::to_utc(const gps_time<Duration>& t) NOEXCEPT
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ return utc_time<CD>{t.time_since_epoch()} +
+ (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) +
+ seconds{9});
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+gps_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
+{
+ using std::chrono::seconds;
+ using CD = typename std::common_type<Duration, seconds>::type;
+ return gps_time<CD>{t.time_since_epoch()} -
+ (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) +
+ seconds{9});
+}
+
+inline
+gps_clock::time_point
+gps_clock::now()
+{
+ return from_utc(utc_clock::now());
+}
+
+template <class Duration>
+inline
+local_time<typename std::common_type<Duration, date::days>::type>
+gps_clock::to_local(const gps_time<Duration>& t) NOEXCEPT
+{
+ using CD = typename std::common_type<Duration, date::days>::type;
+ return local_time<CD>{t.time_since_epoch()} +
+ (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1));
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, date::days>::type>
+gps_clock::from_local(const local_time<Duration>& t) NOEXCEPT
+{
+ using CD = typename std::common_type<Duration, date::days>::type;
+ return gps_time<CD>{t.time_since_epoch()} -
+ (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1));
+}
+
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+ const gps_time<Duration>& t)
+{
+ const std::string abbrev("GPS");
+ CONSTDATA std::chrono::seconds offset{0};
+ return to_stream(os, fmt, gps_clock::to_local(t), &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const gps_time<Duration>& t)
+{
+ const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+ return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+ gps_time<Duration>& tp,
+ std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+ std::chrono::minutes* offset = nullptr)
+{
+ local_time<Duration> lp;
+ from_stream(is, fmt, lp, abbrev, offset);
+ if (!is.fail())
+ tp = gps_clock::from_local(lp);
+ return is;
+}
+
+// clock_time_conversion
+
+template <class DstClock, class SrcClock>
+struct clock_time_conversion
+{};
+
+template <>
+struct clock_time_conversion<std::chrono::system_clock, std::chrono::system_clock>
+{
+ template <class Duration>
+ CONSTCD14
+ sys_time<Duration>
+ operator()(const sys_time<Duration>& st) const
+ {
+ return st;
+ }
+};
+
+template <>
+struct clock_time_conversion<utc_clock, utc_clock>
+{
+ template <class Duration>
+ CONSTCD14
+ utc_time<Duration>
+ operator()(const utc_time<Duration>& ut) const
+ {
+ return ut;
+ }
+};
+
+template<>
+struct clock_time_conversion<local_t, local_t>
+{
+ template <class Duration>
+ CONSTCD14
+ local_time<Duration>
+ operator()(const local_time<Duration>& lt) const
+ {
+ return lt;
+ }
+};
+
+template <>
+struct clock_time_conversion<utc_clock, std::chrono::system_clock>
+{
+ template <class Duration>
+ utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ operator()(const sys_time<Duration>& st) const
+ {
+ return utc_clock::from_sys(st);
+ }
+};
+
+template <>
+struct clock_time_conversion<std::chrono::system_clock, utc_clock>
+{
+ template <class Duration>
+ sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ operator()(const utc_time<Duration>& ut) const
+ {
+ return utc_clock::to_sys(ut);
+ }
+};
+
+template<>
+struct clock_time_conversion<local_t, std::chrono::system_clock>
+{
+ template <class Duration>
+ CONSTCD14
+ local_time<Duration>
+ operator()(const sys_time<Duration>& st) const
+ {
+ return local_time<Duration>{st.time_since_epoch()};
+ }
+};
+
+template<>
+struct clock_time_conversion<std::chrono::system_clock, local_t>
+{
+ template <class Duration>
+ CONSTCD14
+ sys_time<Duration>
+ operator()(const local_time<Duration>& lt) const
+ {
+ return sys_time<Duration>{lt.time_since_epoch()};
+ }
+};
+
+template<>
+struct clock_time_conversion<utc_clock, local_t>
+{
+ template <class Duration>
+ utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ operator()(const local_time<Duration>& lt) const
+ {
+ return utc_clock::from_local(lt);
+ }
+};
+
+template<>
+struct clock_time_conversion<local_t, utc_clock>
+{
+ template <class Duration>
+ local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+ operator()(const utc_time<Duration>& ut) const
+ {
+ return utc_clock::to_local(ut);
+ }
+};
+
+template<typename Clock>
+struct clock_time_conversion<Clock, Clock>
+{
+ template <class Duration>
+ CONSTCD14
+ std::chrono::time_point<Clock, Duration>
+ operator()(const std::chrono::time_point<Clock, Duration>& tp) const
+ {
+ return tp;
+ }
+};
+
+namespace ctc_detail
+{
+
+template <class Clock, class Duration>
+ using time_point = std::chrono::time_point<Clock, Duration>;
+
+using std::declval;
+using std::chrono::system_clock;
+
+//Check if TimePoint is time for given clock,
+//if not emits hard error
+template <class Clock, class TimePoint>
+struct return_clock_time
+{
+ using clock_time_point = time_point<Clock, typename TimePoint::duration>;
+ using type = TimePoint;
+
+ static_assert(std::is_same<TimePoint, clock_time_point>::value,
+ "time point with appropariate clock shall be returned");
+};
+
+// Check if Clock has to_sys method accepting TimePoint with given duration const& and
+// returning sys_time. If so has nested type member equal to return type to_sys.
+template <class Clock, class Duration, class = void>
+struct return_to_sys
+{};
+
+template <class Clock, class Duration>
+struct return_to_sys
+ <
+ Clock, Duration,
+ decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()), void())
+ >
+ : return_clock_time
+ <
+ system_clock,
+ decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()))
+ >
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_from_sys
+{};
+
+template <class Clock, class Duration>
+struct return_from_sys
+ <
+ Clock, Duration,
+ decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()),
+ void())
+ >
+ : return_clock_time
+ <
+ Clock,
+ decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()))
+ >
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_to_utc
+{};
+
+template <class Clock, class Duration>
+struct return_to_utc
+ <
+ Clock, Duration,
+ decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()), void())
+ >
+ : return_clock_time
+ <
+ utc_clock,
+ decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()))>
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_from_utc
+{};
+
+template <class Clock, class Duration>
+struct return_from_utc
+ <
+ Clock, Duration,
+ decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()),
+ void())
+ >
+ : return_clock_time
+ <
+ Clock,
+ decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()))
+ >
+{};
+
+// Similiar to above
+template<typename Clock, typename Duration, typename = void>
+struct return_to_local
+{};
+
+template<typename Clock, typename Duration>
+struct return_to_local
+ <
+ Clock, Duration,
+ decltype(Clock::to_local(declval<time_point<Clock, Duration> const&>()),
+ void())
+ >
+ : return_clock_time
+ <
+ local_t,
+ decltype(Clock::to_local(declval<time_point<Clock, Duration> const&>()))
+ >
+{};
+
+// Similiar to above
+template<typename Clock, typename Duration, typename = void>
+struct return_from_local
+{};
+
+template<typename Clock, typename Duration>
+struct return_from_local
+ <
+ Clock, Duration,
+ decltype(Clock::from_local(declval<time_point<local_t, Duration> const&>()),
+ void())
+ >
+ : return_clock_time
+ <
+ Clock,
+ decltype(Clock::from_local(declval<time_point<local_t, Duration> const&>()))
+ >
+{};
+
+} // namespace ctc_detail
+
+template <class SrcClock>
+struct clock_time_conversion<std::chrono::system_clock, SrcClock>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_to_sys<SrcClock, Duration>::type
+ operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
+ {
+ return SrcClock::to_sys(tp);
+ }
+};
+
+template <class DstClock>
+struct clock_time_conversion<DstClock, std::chrono::system_clock>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_from_sys<DstClock, Duration>::type
+ operator()(const sys_time<Duration>& st) const
+ {
+ return DstClock::from_sys(st);
+ }
+};
+
+template <class SrcClock>
+struct clock_time_conversion<utc_clock, SrcClock>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_to_utc<SrcClock, Duration>::type
+ operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
+ {
+ return SrcClock::to_utc(tp);
+ }
+};
+
+template <class DstClock>
+struct clock_time_conversion<DstClock, utc_clock>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_from_utc<DstClock, Duration>::type
+ operator()(const utc_time<Duration>& ut) const
+ {
+ return DstClock::from_utc(ut);
+ }
+};
+
+template<typename SrcClock>
+struct clock_time_conversion<local_t, SrcClock>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_to_local<SrcClock, Duration>::type
+ operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
+ {
+ return SrcClock::to_local(tp);
+ }
+};
+
+template<typename DstClock>
+struct clock_time_conversion<DstClock, local_t>
+{
+ template <class Duration>
+ CONSTCD14
+ typename ctc_detail::return_from_local<DstClock, Duration>::type
+ operator()(const local_time<Duration>& lt) const
+ {
+ return DstClock::from_local(lt);
+ }
+};
+
+namespace clock_cast_detail
+{
+
+template <class Clock, class Duration>
+ using time_point = std::chrono::time_point<Clock, Duration>;
+using std::chrono::system_clock;
+
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+conv_clock(const time_point<SrcClock, Duration>& t)
+ -> decltype(std::declval<clock_time_conversion<DstClock, SrcClock>>()(t))
+{
+ return clock_time_conversion<DstClock, SrcClock>{}(t);
+}
+
+//direct trait conversion, 1st candidate
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const time_point<SrcClock, Duration>*)
+ -> decltype(conv_clock<DstClock>(t))
+{
+ return conv_clock<DstClock>(t);
+}
+
+//conversion through sys, 2nd candidate
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const void*)
+ -> decltype(conv_clock<DstClock>(conv_clock<system_clock>(t)))
+{
+ return conv_clock<DstClock>(conv_clock<system_clock>(t));
+}
+
+//conversion through utc, 2nd candidate
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const void*)
+ -> decltype(0, // MSVC_WORKAROUND
+ conv_clock<DstClock>(conv_clock<utc_clock>(t)))
+{
+ return conv_clock<DstClock>(conv_clock<utc_clock>(t));
+}
+
+//conversion through sys and utc, 3rd candidate
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, ...)
+ -> decltype(conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t))))
+{
+ return conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t)));
+}
+
+//conversion through utc and sys, 3rd candidate
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, ...)
+ -> decltype(0, // MSVC_WORKAROUND
+ conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t))))
+{
+ return conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t)));
+}
+
+} // namespace clock_cast_detail
+
+template <class DstClock, class SrcClock, class Duration>
+CONSTCD14
+auto
+clock_cast(const std::chrono::time_point<SrcClock, Duration>& tp)
+ -> decltype(clock_cast_detail::cc_impl<DstClock>(tp, &tp))
+{
+ return clock_cast_detail::cc_impl<DstClock>(tp, &tp);
+}
+
+// Deprecated API
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const utc_time<Duration>& t)
+{
+ return utc_clock::to_sys(t);
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const tai_time<Duration>& t)
+{
+ return utc_clock::to_sys(tai_clock::to_utc(t));
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const gps_time<Duration>& t)
+{
+ return utc_clock::to_sys(gps_clock::to_utc(t));
+}
+
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const sys_time<Duration>& t)
+{
+ return utc_clock::from_sys(t);
+}
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const tai_time<Duration>& t)
+{
+ return tai_clock::to_utc(t);
+}
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const gps_time<Duration>& t)
+{
+ return gps_clock::to_utc(t);
+}
+
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const sys_time<Duration>& t)
+{
+ return tai_clock::from_utc(utc_clock::from_sys(t));
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const utc_time<Duration>& t)
+{
+ return tai_clock::from_utc(t);
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const gps_time<Duration>& t)
+{
+ return tai_clock::from_utc(gps_clock::to_utc(t));
+}
+
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const sys_time<Duration>& t)
+{
+ return gps_clock::from_utc(utc_clock::from_sys(t));
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const utc_time<Duration>& t)
+{
+ return gps_clock::from_utc(t);
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const tai_time<Duration>& t)
+{
+ return gps_clock::from_utc(tai_clock::to_utc(t));
+}
+
+#endif // !MISSING_LEAP_SECONDS
+
+} // namespace date
+} // namespace arrow_vendored
+
+#endif // TZ_H
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz_private.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz_private.h
index 282842e7441..4a65dbcdd7d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz_private.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/tz_private.h
@@ -1,319 +1,319 @@
-#ifndef TZ_PRIVATE_H
-#define TZ_PRIVATE_H
-
-// The MIT License (MIT)
-//
-// Copyright (c) 2015, 2016 Howard Hinnant
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// Our apologies. When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-#include "tz.h"
-#else
-#include "date.h"
-#include <vector>
-#endif
-
-namespace arrow_vendored
-{
-namespace date
-{
-
-namespace detail
-{
-
-#if !USE_OS_TZDB
-
-enum class tz {utc, local, standard};
-
-//forward declare to avoid warnings in gcc 6.2
-class MonthDayTime;
-std::istream& operator>>(std::istream& is, MonthDayTime& x);
-std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
-
-
-class MonthDayTime
-{
-private:
- struct pair
- {
-#if defined(_MSC_VER) && (_MSC_VER < 1900)
- pair() : month_day_(date::jan / 1), weekday_(0U) {}
-
- pair(const date::month_day& month_day, const date::weekday& weekday)
- : month_day_(month_day), weekday_(weekday) {}
-#endif
-
- date::month_day month_day_;
- date::weekday weekday_;
- };
-
- enum Type {month_day, month_last_dow, lteq, gteq};
-
- Type type_{month_day};
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- union U
-#else
- struct U
-#endif
- {
- date::month_day month_day_;
- date::month_weekday_last month_weekday_last_;
- pair month_day_weekday_;
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- U() : month_day_{date::jan/1} {}
-#else
- U() :
- month_day_(date::jan/1),
- month_weekday_last_(date::month(0U), date::weekday_last(date::weekday(0U)))
- {}
-
-#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
- U& operator=(const date::month_day& x);
- U& operator=(const date::month_weekday_last& x);
- U& operator=(const pair& x);
- } u;
-
- std::chrono::hours h_{};
- std::chrono::minutes m_{};
- std::chrono::seconds s_{};
- tz zone_{tz::local};
-
-public:
- MonthDayTime() = default;
- MonthDayTime(local_seconds tp, tz timezone);
- MonthDayTime(const date::month_day& md, tz timezone);
-
- date::day day() const;
- date::month month() const;
- tz zone() const {return zone_;}
-
- void canonicalize(date::year y);
-
- sys_seconds
- to_sys(date::year y, std::chrono::seconds offset, std::chrono::seconds save) const;
- sys_days to_sys_days(date::year y) const;
-
- sys_seconds to_time_point(date::year y) const;
- int compare(date::year y, const MonthDayTime& x, date::year yx,
- std::chrono::seconds offset, std::chrono::minutes prev_save) const;
-
- friend std::istream& operator>>(std::istream& is, MonthDayTime& x);
- friend std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
-};
-
-// A Rule specifies one or more set of datetimes without using an offset.
-// Multiple dates are specified with multiple years. The years in effect
-// go from starting_year_ to ending_year_, inclusive. starting_year_ <=
-// ending_year_. save_ is in effect for times from the specified time
-// onward, including the specified time. When the specified time is
-// local, it uses the save_ from the chronologically previous Rule, or if
-// there is none, 0.
-
-//forward declare to avoid warnings in gcc 6.2
-class Rule;
-bool operator==(const Rule& x, const Rule& y);
-bool operator<(const Rule& x, const Rule& y);
-bool operator==(const Rule& x, const date::year& y);
-bool operator<(const Rule& x, const date::year& y);
-bool operator==(const date::year& x, const Rule& y);
-bool operator<(const date::year& x, const Rule& y);
-bool operator==(const Rule& x, const std::string& y);
-bool operator<(const Rule& x, const std::string& y);
-bool operator==(const std::string& x, const Rule& y);
-bool operator<(const std::string& x, const Rule& y);
-std::ostream& operator<<(std::ostream& os, const Rule& r);
-
-class Rule
-{
-private:
- std::string name_;
- date::year starting_year_{0};
- date::year ending_year_{0};
- MonthDayTime starting_at_;
- std::chrono::minutes save_{0};
- std::string abbrev_;
-
-public:
- Rule() = default;
- explicit Rule(const std::string& s);
- Rule(const Rule& r, date::year starting_year, date::year ending_year);
-
- const std::string& name() const {return name_;}
- const std::string& abbrev() const {return abbrev_;}
-
- const MonthDayTime& mdt() const {return starting_at_;}
- const date::year& starting_year() const {return starting_year_;}
- const date::year& ending_year() const {return ending_year_;}
- const std::chrono::minutes& save() const {return save_;}
-
- static void split_overlaps(std::vector<Rule>& rules);
-
- friend bool operator==(const Rule& x, const Rule& y);
- friend bool operator<(const Rule& x, const Rule& y);
- friend bool operator==(const Rule& x, const date::year& y);
- friend bool operator<(const Rule& x, const date::year& y);
- friend bool operator==(const date::year& x, const Rule& y);
- friend bool operator<(const date::year& x, const Rule& y);
- friend bool operator==(const Rule& x, const std::string& y);
- friend bool operator<(const Rule& x, const std::string& y);
- friend bool operator==(const std::string& x, const Rule& y);
- friend bool operator<(const std::string& x, const Rule& y);
-
- friend std::ostream& operator<<(std::ostream& os, const Rule& r);
-
-private:
- date::day day() const;
- date::month month() const;
- static void split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e);
- static bool overlaps(const Rule& x, const Rule& y);
- static void split(std::vector<Rule>& rules, std::size_t i, std::size_t k,
- std::size_t& e);
-};
-
-inline bool operator!=(const Rule& x, const Rule& y) {return !(x == y);}
-inline bool operator> (const Rule& x, const Rule& y) {return y < x;}
-inline bool operator<=(const Rule& x, const Rule& y) {return !(y < x);}
-inline bool operator>=(const Rule& x, const Rule& y) {return !(x < y);}
-
-inline bool operator!=(const Rule& x, const date::year& y) {return !(x == y);}
-inline bool operator> (const Rule& x, const date::year& y) {return y < x;}
-inline bool operator<=(const Rule& x, const date::year& y) {return !(y < x);}
-inline bool operator>=(const Rule& x, const date::year& y) {return !(x < y);}
-
-inline bool operator!=(const date::year& x, const Rule& y) {return !(x == y);}
-inline bool operator> (const date::year& x, const Rule& y) {return y < x;}
-inline bool operator<=(const date::year& x, const Rule& y) {return !(y < x);}
-inline bool operator>=(const date::year& x, const Rule& y) {return !(x < y);}
-
-inline bool operator!=(const Rule& x, const std::string& y) {return !(x == y);}
-inline bool operator> (const Rule& x, const std::string& y) {return y < x;}
-inline bool operator<=(const Rule& x, const std::string& y) {return !(y < x);}
-inline bool operator>=(const Rule& x, const std::string& y) {return !(x < y);}
-
-inline bool operator!=(const std::string& x, const Rule& y) {return !(x == y);}
-inline bool operator> (const std::string& x, const Rule& y) {return y < x;}
-inline bool operator<=(const std::string& x, const Rule& y) {return !(y < x);}
-inline bool operator>=(const std::string& x, const Rule& y) {return !(x < y);}
-
-struct zonelet
-{
- enum tag {has_rule, has_save, is_empty};
-
- std::chrono::seconds gmtoff_;
- tag tag_ = has_rule;
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
- union U
-#else
- struct U
-#endif
- {
- std::string rule_;
- std::chrono::minutes save_;
-
- ~U() {}
- U() {}
- U(const U&) {}
- U& operator=(const U&) = delete;
- } u;
-
- std::string format_;
- date::year until_year_{0};
- MonthDayTime until_date_;
- sys_seconds until_utc_;
- local_seconds until_std_;
- local_seconds until_loc_;
- std::chrono::minutes initial_save_{};
- std::string initial_abbrev_;
- std::pair<const Rule*, date::year> first_rule_{nullptr, date::year::min()};
- std::pair<const Rule*, date::year> last_rule_{nullptr, date::year::max()};
-
- ~zonelet();
- zonelet();
- zonelet(const zonelet& i);
- zonelet& operator=(const zonelet&) = delete;
-};
-
-#else // USE_OS_TZDB
-
-struct ttinfo
-{
- std::int32_t tt_gmtoff;
- unsigned char tt_isdst;
- unsigned char tt_abbrind;
- unsigned char pad[2];
-};
-
-static_assert(sizeof(ttinfo) == 8, "");
-
-struct expanded_ttinfo
-{
- std::chrono::seconds offset;
- std::string abbrev;
- bool is_dst;
-};
-
-struct transition
-{
- sys_seconds timepoint;
- const expanded_ttinfo* info;
-
- transition(sys_seconds tp, const expanded_ttinfo* i = nullptr)
- : timepoint(tp)
- , info(i)
- {}
-
- friend
- std::ostream&
- operator<<(std::ostream& os, const transition& t)
- {
- using date::operator<<;
- os << t.timepoint << "Z ";
- if (t.info->offset >= std::chrono::seconds{0})
- os << '+';
- os << make_time(t.info->offset);
- if (t.info->is_dst > 0)
- os << " daylight ";
- else
- os << " standard ";
- os << t.info->abbrev;
- return os;
- }
-};
-
-#endif // USE_OS_TZDB
-
-} // namespace detail
-
-} // namespace date
-} // namespace arrow_vendored
-
-#if defined(_MSC_VER) && (_MSC_VER < 1900)
-#include "tz.h"
-#endif
-
-#endif // TZ_PRIVATE_H
+#ifndef TZ_PRIVATE_H
+#define TZ_PRIVATE_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016 Howard Hinnant
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies. When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include "tz.h"
+#else
+#include "date.h"
+#include <vector>
+#endif
+
+namespace arrow_vendored
+{
+namespace date
+{
+
+namespace detail
+{
+
+#if !USE_OS_TZDB
+
+enum class tz {utc, local, standard};
+
+//forward declare to avoid warnings in gcc 6.2
+class MonthDayTime;
+std::istream& operator>>(std::istream& is, MonthDayTime& x);
+std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
+
+
+class MonthDayTime
+{
+private:
+ struct pair
+ {
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+ pair() : month_day_(date::jan / 1), weekday_(0U) {}
+
+ pair(const date::month_day& month_day, const date::weekday& weekday)
+ : month_day_(month_day), weekday_(weekday) {}
+#endif
+
+ date::month_day month_day_;
+ date::weekday weekday_;
+ };
+
+ enum Type {month_day, month_last_dow, lteq, gteq};
+
+ Type type_{month_day};
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ union U
+#else
+ struct U
+#endif
+ {
+ date::month_day month_day_;
+ date::month_weekday_last month_weekday_last_;
+ pair month_day_weekday_;
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ U() : month_day_{date::jan/1} {}
+#else
+ U() :
+ month_day_(date::jan/1),
+ month_weekday_last_(date::month(0U), date::weekday_last(date::weekday(0U)))
+ {}
+
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+ U& operator=(const date::month_day& x);
+ U& operator=(const date::month_weekday_last& x);
+ U& operator=(const pair& x);
+ } u;
+
+ std::chrono::hours h_{};
+ std::chrono::minutes m_{};
+ std::chrono::seconds s_{};
+ tz zone_{tz::local};
+
+public:
+ MonthDayTime() = default;
+ MonthDayTime(local_seconds tp, tz timezone);
+ MonthDayTime(const date::month_day& md, tz timezone);
+
+ date::day day() const;
+ date::month month() const;
+ tz zone() const {return zone_;}
+
+ void canonicalize(date::year y);
+
+ sys_seconds
+ to_sys(date::year y, std::chrono::seconds offset, std::chrono::seconds save) const;
+ sys_days to_sys_days(date::year y) const;
+
+ sys_seconds to_time_point(date::year y) const;
+ int compare(date::year y, const MonthDayTime& x, date::year yx,
+ std::chrono::seconds offset, std::chrono::minutes prev_save) const;
+
+ friend std::istream& operator>>(std::istream& is, MonthDayTime& x);
+ friend std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
+};
+
+// A Rule specifies one or more set of datetimes without using an offset.
+// Multiple dates are specified with multiple years. The years in effect
+// go from starting_year_ to ending_year_, inclusive. starting_year_ <=
+// ending_year_. save_ is in effect for times from the specified time
+// onward, including the specified time. When the specified time is
+// local, it uses the save_ from the chronologically previous Rule, or if
+// there is none, 0.
+
+//forward declare to avoid warnings in gcc 6.2
+class Rule;
+bool operator==(const Rule& x, const Rule& y);
+bool operator<(const Rule& x, const Rule& y);
+bool operator==(const Rule& x, const date::year& y);
+bool operator<(const Rule& x, const date::year& y);
+bool operator==(const date::year& x, const Rule& y);
+bool operator<(const date::year& x, const Rule& y);
+bool operator==(const Rule& x, const std::string& y);
+bool operator<(const Rule& x, const std::string& y);
+bool operator==(const std::string& x, const Rule& y);
+bool operator<(const std::string& x, const Rule& y);
+std::ostream& operator<<(std::ostream& os, const Rule& r);
+
+class Rule
+{
+private:
+ std::string name_;
+ date::year starting_year_{0};
+ date::year ending_year_{0};
+ MonthDayTime starting_at_;
+ std::chrono::minutes save_{0};
+ std::string abbrev_;
+
+public:
+ Rule() = default;
+ explicit Rule(const std::string& s);
+ Rule(const Rule& r, date::year starting_year, date::year ending_year);
+
+ const std::string& name() const {return name_;}
+ const std::string& abbrev() const {return abbrev_;}
+
+ const MonthDayTime& mdt() const {return starting_at_;}
+ const date::year& starting_year() const {return starting_year_;}
+ const date::year& ending_year() const {return ending_year_;}
+ const std::chrono::minutes& save() const {return save_;}
+
+ static void split_overlaps(std::vector<Rule>& rules);
+
+ friend bool operator==(const Rule& x, const Rule& y);
+ friend bool operator<(const Rule& x, const Rule& y);
+ friend bool operator==(const Rule& x, const date::year& y);
+ friend bool operator<(const Rule& x, const date::year& y);
+ friend bool operator==(const date::year& x, const Rule& y);
+ friend bool operator<(const date::year& x, const Rule& y);
+ friend bool operator==(const Rule& x, const std::string& y);
+ friend bool operator<(const Rule& x, const std::string& y);
+ friend bool operator==(const std::string& x, const Rule& y);
+ friend bool operator<(const std::string& x, const Rule& y);
+
+ friend std::ostream& operator<<(std::ostream& os, const Rule& r);
+
+private:
+ date::day day() const;
+ date::month month() const;
+ static void split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e);
+ static bool overlaps(const Rule& x, const Rule& y);
+ static void split(std::vector<Rule>& rules, std::size_t i, std::size_t k,
+ std::size_t& e);
+};
+
+inline bool operator!=(const Rule& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const Rule& y) {return y < x;}
+inline bool operator<=(const Rule& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const Rule& y) {return !(x < y);}
+
+inline bool operator!=(const Rule& x, const date::year& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const date::year& y) {return y < x;}
+inline bool operator<=(const Rule& x, const date::year& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const date::year& y) {return !(x < y);}
+
+inline bool operator!=(const date::year& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const date::year& x, const Rule& y) {return y < x;}
+inline bool operator<=(const date::year& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const date::year& x, const Rule& y) {return !(x < y);}
+
+inline bool operator!=(const Rule& x, const std::string& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const std::string& y) {return y < x;}
+inline bool operator<=(const Rule& x, const std::string& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const std::string& y) {return !(x < y);}
+
+inline bool operator!=(const std::string& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const std::string& x, const Rule& y) {return y < x;}
+inline bool operator<=(const std::string& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const std::string& x, const Rule& y) {return !(x < y);}
+
+struct zonelet
+{
+ enum tag {has_rule, has_save, is_empty};
+
+ std::chrono::seconds gmtoff_;
+ tag tag_ = has_rule;
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+ union U
+#else
+ struct U
+#endif
+ {
+ std::string rule_;
+ std::chrono::minutes save_;
+
+ ~U() {}
+ U() {}
+ U(const U&) {}
+ U& operator=(const U&) = delete;
+ } u;
+
+ std::string format_;
+ date::year until_year_{0};
+ MonthDayTime until_date_;
+ sys_seconds until_utc_;
+ local_seconds until_std_;
+ local_seconds until_loc_;
+ std::chrono::minutes initial_save_{};
+ std::string initial_abbrev_;
+ std::pair<const Rule*, date::year> first_rule_{nullptr, date::year::min()};
+ std::pair<const Rule*, date::year> last_rule_{nullptr, date::year::max()};
+
+ ~zonelet();
+ zonelet();
+ zonelet(const zonelet& i);
+ zonelet& operator=(const zonelet&) = delete;
+};
+
+#else // USE_OS_TZDB
+
+struct ttinfo
+{
+ std::int32_t tt_gmtoff;
+ unsigned char tt_isdst;
+ unsigned char tt_abbrind;
+ unsigned char pad[2];
+};
+
+static_assert(sizeof(ttinfo) == 8, "");
+
+struct expanded_ttinfo
+{
+ std::chrono::seconds offset;
+ std::string abbrev;
+ bool is_dst;
+};
+
+struct transition
+{
+ sys_seconds timepoint;
+ const expanded_ttinfo* info;
+
+ transition(sys_seconds tp, const expanded_ttinfo* i = nullptr)
+ : timepoint(tp)
+ , info(i)
+ {}
+
+ friend
+ std::ostream&
+ operator<<(std::ostream& os, const transition& t)
+ {
+ using date::operator<<;
+ os << t.timepoint << "Z ";
+ if (t.info->offset >= std::chrono::seconds{0})
+ os << '+';
+ os << make_time(t.info->offset);
+ if (t.info->is_dst > 0)
+ os << " daylight ";
+ else
+ os << " standard ";
+ os << t.info->abbrev;
+ return os;
+ }
+};
+
+#endif // USE_OS_TZDB
+
+} // namespace detail
+
+} // namespace date
+} // namespace arrow_vendored
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#include "tz.h"
+#endif
+
+#endif // TZ_PRIVATE_H
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/visibility.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/visibility.h
index ae031238d85..08e85657efb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/visibility.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/datetime/visibility.h
@@ -1,26 +1,26 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#if defined(ARROW_STATIC)
-// intentially empty
-#elif defined(ARROW_EXPORTING)
-#define DATE_BUILD_DLL
-#else
-#define DATE_USE_DLL
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_STATIC)
+// intentially empty
+#elif defined(ARROW_EXPORTING)
+#define DATE_BUILD_DLL
+#else
+#define DATE_USE_DLL
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/musl/strptime.c b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/musl/strptime.c
index e8111f57679..4a41787c9f8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/musl/strptime.c
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/musl/strptime.c
@@ -1,237 +1,237 @@
-// Vendored from musl git commit 593caa456309714402ca4cb77c3770f4c24da9da
-// + adaptations
-
-#include "arrow/vendored/strptime.h"
-
-#include <ctype.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef _WIN32
-#define strncasecmp _strnicmp
-#define strcasecmp _stricmp
-#else
-#include <strings.h>
-#endif
-
-#undef HAVE_LANGINFO
-
-#ifndef _WIN32
-#define HAVE_LANGINFO 1
-#endif
-
-#ifdef HAVE_LANGINFO
-#include <langinfo.h>
-#endif
-
-#define strptime arrow_strptime
-
-char *strptime(const char *__restrict s, const char *__restrict f, struct tm *__restrict tm)
-{
- int i, w, neg, adj, min, range, *dest, dummy;
-#ifdef HAVE_LANGINFO
- const char *ex;
- size_t len;
-#endif
- int want_century = 0, century = 0, relyear = 0;
- while (*f) {
- if (*f != '%') {
- if (isspace(*f)) for (; *s && isspace(*s); s++);
- else if (*s != *f) return 0;
- else s++;
- f++;
- continue;
- }
- f++;
- if (*f == '+') f++;
- if (isdigit(*f)) {
- char *new_f;
- w=strtoul(f, &new_f, 10);
- f = new_f;
- } else {
- w=-1;
- }
- adj=0;
- switch (*f++) {
-#ifdef HAVE_LANGINFO
- case 'a': case 'A':
- dest = &tm->tm_wday;
- min = ABDAY_1;
- range = 7;
- goto symbolic_range;
- case 'b': case 'B': case 'h':
- dest = &tm->tm_mon;
- min = ABMON_1;
- range = 12;
- goto symbolic_range;
- case 'c':
- s = strptime(s, nl_langinfo(D_T_FMT), tm);
- if (!s) return 0;
- break;
-#endif
- case 'C':
- dest = &century;
- if (w<0) w=2;
- want_century |= 2;
- goto numeric_digits;
- case 'd': case 'e':
- dest = &tm->tm_mday;
- min = 1;
- range = 31;
- goto numeric_range;
- case 'D':
- s = strptime(s, "%m/%d/%y", tm);
- if (!s) return 0;
- break;
- case 'H':
- dest = &tm->tm_hour;
- min = 0;
- range = 24;
- goto numeric_range;
- case 'I':
- dest = &tm->tm_hour;
- min = 1;
- range = 12;
- goto numeric_range;
- case 'j':
- dest = &tm->tm_yday;
- min = 1;
- range = 366;
- adj = 1;
- goto numeric_range;
- case 'm':
- dest = &tm->tm_mon;
- min = 1;
- range = 12;
- adj = 1;
- goto numeric_range;
- case 'M':
- dest = &tm->tm_min;
- min = 0;
- range = 60;
- goto numeric_range;
- case 'n': case 't':
- for (; *s && isspace(*s); s++);
- break;
-#ifdef HAVE_LANGINFO
- case 'p':
- ex = nl_langinfo(AM_STR);
- len = strlen(ex);
- if (!strncasecmp(s, ex, len)) {
- tm->tm_hour %= 12;
- s += len;
- break;
- }
- ex = nl_langinfo(PM_STR);
- len = strlen(ex);
- if (!strncasecmp(s, ex, len)) {
- tm->tm_hour %= 12;
- tm->tm_hour += 12;
- s += len;
- break;
- }
- return 0;
- case 'r':
- s = strptime(s, nl_langinfo(T_FMT_AMPM), tm);
- if (!s) return 0;
- break;
-#endif
- case 'R':
- s = strptime(s, "%H:%M", tm);
- if (!s) return 0;
- break;
- case 'S':
- dest = &tm->tm_sec;
- min = 0;
- range = 61;
- goto numeric_range;
- case 'T':
- s = strptime(s, "%H:%M:%S", tm);
- if (!s) return 0;
- break;
- case 'U':
- case 'W':
- /* Throw away result, for now. (FIXME?) */
- dest = &dummy;
- min = 0;
- range = 54;
- goto numeric_range;
- case 'w':
- dest = &tm->tm_wday;
- min = 0;
- range = 7;
- goto numeric_range;
-#ifdef HAVE_LANGINFO
- case 'x':
- s = strptime(s, nl_langinfo(D_FMT), tm);
- if (!s) return 0;
- break;
- case 'X':
- s = strptime(s, nl_langinfo(T_FMT), tm);
- if (!s) return 0;
- break;
-#endif
- case 'y':
- dest = &relyear;
- w = 2;
- want_century |= 1;
- goto numeric_digits;
- case 'Y':
- dest = &tm->tm_year;
- if (w<0) w=4;
- adj = 1900;
- want_century = 0;
- goto numeric_digits;
- case '%':
- if (*s++ != '%') return 0;
- break;
- default:
- return 0;
- numeric_range:
- if (!isdigit(*s)) return 0;
- *dest = 0;
- for (i=1; i<=min+range && isdigit(*s); i*=10)
- *dest = *dest * 10 + *s++ - '0';
- if (*dest - min >= range) return 0;
- *dest -= adj;
- switch((char *)dest - (char *)tm) {
- case offsetof(struct tm, tm_yday):
- ;
- }
- goto update;
- numeric_digits:
- neg = 0;
- if (*s == '+') s++;
- else if (*s == '-') neg=1, s++;
- if (!isdigit(*s)) return 0;
- for (*dest=i=0; i<w && isdigit(*s); i++)
- *dest = *dest * 10 + *s++ - '0';
- if (neg) *dest = -*dest;
- *dest -= adj;
- goto update;
-#ifdef HAVE_LANGINFO
- symbolic_range:
- for (i=2*range-1; i>=0; i--) {
- ex = nl_langinfo(min+i);
- len = strlen(ex);
- if (strncasecmp(s, ex, len)) continue;
- s += len;
- *dest = i % range;
- break;
- }
- if (i<0) return 0;
- goto update;
-#endif
- update:
- //FIXME
- ;
- }
- }
- if (want_century) {
- tm->tm_year = relyear;
- if (want_century & 2) tm->tm_year += century * 100 - 1900;
- else if (tm->tm_year <= 68) tm->tm_year += 100;
- }
- return (char *)s;
-}
+// Vendored from musl git commit 593caa456309714402ca4cb77c3770f4c24da9da
+// + adaptations
+
+#include "arrow/vendored/strptime.h"
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#else
+#include <strings.h>
+#endif
+
+#undef HAVE_LANGINFO
+
+#ifndef _WIN32
+#define HAVE_LANGINFO 1
+#endif
+
+#ifdef HAVE_LANGINFO
+#include <langinfo.h>
+#endif
+
+#define strptime arrow_strptime
+
+char *strptime(const char *__restrict s, const char *__restrict f, struct tm *__restrict tm)
+{
+ int i, w, neg, adj, min, range, *dest, dummy;
+#ifdef HAVE_LANGINFO
+ const char *ex;
+ size_t len;
+#endif
+ int want_century = 0, century = 0, relyear = 0;
+ while (*f) {
+ if (*f != '%') {
+ if (isspace(*f)) for (; *s && isspace(*s); s++);
+ else if (*s != *f) return 0;
+ else s++;
+ f++;
+ continue;
+ }
+ f++;
+ if (*f == '+') f++;
+ if (isdigit(*f)) {
+ char *new_f;
+ w=strtoul(f, &new_f, 10);
+ f = new_f;
+ } else {
+ w=-1;
+ }
+ adj=0;
+ switch (*f++) {
+#ifdef HAVE_LANGINFO
+ case 'a': case 'A':
+ dest = &tm->tm_wday;
+ min = ABDAY_1;
+ range = 7;
+ goto symbolic_range;
+ case 'b': case 'B': case 'h':
+ dest = &tm->tm_mon;
+ min = ABMON_1;
+ range = 12;
+ goto symbolic_range;
+ case 'c':
+ s = strptime(s, nl_langinfo(D_T_FMT), tm);
+ if (!s) return 0;
+ break;
+#endif
+ case 'C':
+ dest = &century;
+ if (w<0) w=2;
+ want_century |= 2;
+ goto numeric_digits;
+ case 'd': case 'e':
+ dest = &tm->tm_mday;
+ min = 1;
+ range = 31;
+ goto numeric_range;
+ case 'D':
+ s = strptime(s, "%m/%d/%y", tm);
+ if (!s) return 0;
+ break;
+ case 'H':
+ dest = &tm->tm_hour;
+ min = 0;
+ range = 24;
+ goto numeric_range;
+ case 'I':
+ dest = &tm->tm_hour;
+ min = 1;
+ range = 12;
+ goto numeric_range;
+ case 'j':
+ dest = &tm->tm_yday;
+ min = 1;
+ range = 366;
+ adj = 1;
+ goto numeric_range;
+ case 'm':
+ dest = &tm->tm_mon;
+ min = 1;
+ range = 12;
+ adj = 1;
+ goto numeric_range;
+ case 'M':
+ dest = &tm->tm_min;
+ min = 0;
+ range = 60;
+ goto numeric_range;
+ case 'n': case 't':
+ for (; *s && isspace(*s); s++);
+ break;
+#ifdef HAVE_LANGINFO
+ case 'p':
+ ex = nl_langinfo(AM_STR);
+ len = strlen(ex);
+ if (!strncasecmp(s, ex, len)) {
+ tm->tm_hour %= 12;
+ s += len;
+ break;
+ }
+ ex = nl_langinfo(PM_STR);
+ len = strlen(ex);
+ if (!strncasecmp(s, ex, len)) {
+ tm->tm_hour %= 12;
+ tm->tm_hour += 12;
+ s += len;
+ break;
+ }
+ return 0;
+ case 'r':
+ s = strptime(s, nl_langinfo(T_FMT_AMPM), tm);
+ if (!s) return 0;
+ break;
+#endif
+ case 'R':
+ s = strptime(s, "%H:%M", tm);
+ if (!s) return 0;
+ break;
+ case 'S':
+ dest = &tm->tm_sec;
+ min = 0;
+ range = 61;
+ goto numeric_range;
+ case 'T':
+ s = strptime(s, "%H:%M:%S", tm);
+ if (!s) return 0;
+ break;
+ case 'U':
+ case 'W':
+ /* Throw away result, for now. (FIXME?) */
+ dest = &dummy;
+ min = 0;
+ range = 54;
+ goto numeric_range;
+ case 'w':
+ dest = &tm->tm_wday;
+ min = 0;
+ range = 7;
+ goto numeric_range;
+#ifdef HAVE_LANGINFO
+ case 'x':
+ s = strptime(s, nl_langinfo(D_FMT), tm);
+ if (!s) return 0;
+ break;
+ case 'X':
+ s = strptime(s, nl_langinfo(T_FMT), tm);
+ if (!s) return 0;
+ break;
+#endif
+ case 'y':
+ dest = &relyear;
+ w = 2;
+ want_century |= 1;
+ goto numeric_digits;
+ case 'Y':
+ dest = &tm->tm_year;
+ if (w<0) w=4;
+ adj = 1900;
+ want_century = 0;
+ goto numeric_digits;
+ case '%':
+ if (*s++ != '%') return 0;
+ break;
+ default:
+ return 0;
+ numeric_range:
+ if (!isdigit(*s)) return 0;
+ *dest = 0;
+ for (i=1; i<=min+range && isdigit(*s); i*=10)
+ *dest = *dest * 10 + *s++ - '0';
+ if (*dest - min >= range) return 0;
+ *dest -= adj;
+ switch((char *)dest - (char *)tm) {
+ case offsetof(struct tm, tm_yday):
+ ;
+ }
+ goto update;
+ numeric_digits:
+ neg = 0;
+ if (*s == '+') s++;
+ else if (*s == '-') neg=1, s++;
+ if (!isdigit(*s)) return 0;
+ for (*dest=i=0; i<w && isdigit(*s); i++)
+ *dest = *dest * 10 + *s++ - '0';
+ if (neg) *dest = -*dest;
+ *dest -= adj;
+ goto update;
+#ifdef HAVE_LANGINFO
+ symbolic_range:
+ for (i=2*range-1; i>=0; i--) {
+ ex = nl_langinfo(min+i);
+ len = strlen(ex);
+ if (strncasecmp(s, ex, len)) continue;
+ s += len;
+ *dest = i % range;
+ break;
+ }
+ if (i<0) return 0;
+ goto update;
+#endif
+ update:
+ //FIXME
+ ;
+ }
+ }
+ if (want_century) {
+ tm->tm_year = relyear;
+ if (want_century & 2) tm->tm_year += century * 100 - 1900;
+ else if (tm->tm_year <= 68) tm->tm_year += 100;
+ }
+ return (char *)s;
+}
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
index 7f6426ac765..41ab493c548 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
@@ -1,1072 +1,1072 @@
-/* Overflow-safe math functions
- * Portable Snippets - https://github.com/nemequ/portable-snippets
- * Created by Evan Nemerson <evan@nemerson.com>
- *
- * To the extent possible under law, the authors have waived all
- * copyright and related or neighboring rights to this code. For
- * details, see the Creative Commons Zero 1.0 Universal license at
- * https://creativecommons.org/publicdomain/zero/1.0/
- */
-
-#if !defined(PSNIP_SAFE_H)
-#define PSNIP_SAFE_H
-
-#if !defined(PSNIP_SAFE_FORCE_PORTABLE)
-# if defined(__has_builtin)
-# if __has_builtin(__builtin_add_overflow) && !defined(__ibmxl__)
-# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW
-# endif
-# elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__INTEL_COMPILER)
-# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW
-# endif
-# if defined(__has_include)
-# if __has_include(<intsafe.h>)
-# define PSNIP_SAFE_HAVE_INTSAFE_H
-# endif
-# elif defined(_WIN32)
-# define PSNIP_SAFE_HAVE_INTSAFE_H
-# endif
-#endif /* !defined(PSNIP_SAFE_FORCE_PORTABLE) */
-
-#if defined(__GNUC__)
-# define PSNIP_SAFE_LIKELY(expr) __builtin_expect(!!(expr), 1)
-# define PSNIP_SAFE_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-#else
-# define PSNIP_SAFE_LIKELY(expr) !!(expr)
-# define PSNIP_SAFE_UNLIKELY(expr) !!(expr)
-#endif /* defined(__GNUC__) */
-
-#if !defined(PSNIP_SAFE_STATIC_INLINE)
-# if defined(__GNUC__)
-# define PSNIP_SAFE__COMPILER_ATTRIBUTES __attribute__((__unused__))
-# else
-# define PSNIP_SAFE__COMPILER_ATTRIBUTES
-# endif
-
-# if defined(HEDLEY_INLINE)
-# define PSNIP_SAFE__INLINE HEDLEY_INLINE
-# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-# define PSNIP_SAFE__INLINE inline
-# elif defined(__GNUC_STDC_INLINE__)
-# define PSNIP_SAFE__INLINE __inline__
-# elif defined(_MSC_VER) && _MSC_VER >= 1200
-# define PSNIP_SAFE__INLINE __inline
-# else
-# define PSNIP_SAFE__INLINE
-# endif
-
-# define PSNIP_SAFE__FUNCTION PSNIP_SAFE__COMPILER_ATTRIBUTES static PSNIP_SAFE__INLINE
-#endif
-
+/* Overflow-safe math functions
+ * Portable Snippets - https://github.com/nemequ/portable-snippets
+ * Created by Evan Nemerson <evan@nemerson.com>
+ *
+ * To the extent possible under law, the authors have waived all
+ * copyright and related or neighboring rights to this code. For
+ * details, see the Creative Commons Zero 1.0 Universal license at
+ * https://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#if !defined(PSNIP_SAFE_H)
+#define PSNIP_SAFE_H
+
+#if !defined(PSNIP_SAFE_FORCE_PORTABLE)
+# if defined(__has_builtin)
+# if __has_builtin(__builtin_add_overflow) && !defined(__ibmxl__)
+# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW
+# endif
+# elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__INTEL_COMPILER)
+# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW
+# endif
+# if defined(__has_include)
+# if __has_include(<intsafe.h>)
+# define PSNIP_SAFE_HAVE_INTSAFE_H
+# endif
+# elif defined(_WIN32)
+# define PSNIP_SAFE_HAVE_INTSAFE_H
+# endif
+#endif /* !defined(PSNIP_SAFE_FORCE_PORTABLE) */
+
+#if defined(__GNUC__)
+# define PSNIP_SAFE_LIKELY(expr) __builtin_expect(!!(expr), 1)
+# define PSNIP_SAFE_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#else
+# define PSNIP_SAFE_LIKELY(expr) !!(expr)
+# define PSNIP_SAFE_UNLIKELY(expr) !!(expr)
+#endif /* defined(__GNUC__) */
+
+#if !defined(PSNIP_SAFE_STATIC_INLINE)
+# if defined(__GNUC__)
+# define PSNIP_SAFE__COMPILER_ATTRIBUTES __attribute__((__unused__))
+# else
+# define PSNIP_SAFE__COMPILER_ATTRIBUTES
+# endif
+
+# if defined(HEDLEY_INLINE)
+# define PSNIP_SAFE__INLINE HEDLEY_INLINE
+# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+# define PSNIP_SAFE__INLINE inline
+# elif defined(__GNUC_STDC_INLINE__)
+# define PSNIP_SAFE__INLINE __inline__
+# elif defined(_MSC_VER) && _MSC_VER >= 1200
+# define PSNIP_SAFE__INLINE __inline
+# else
+# define PSNIP_SAFE__INLINE
+# endif
+
+# define PSNIP_SAFE__FUNCTION PSNIP_SAFE__COMPILER_ATTRIBUTES static PSNIP_SAFE__INLINE
+#endif
+
// !defined(__cplusplus) added for Solaris support
#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-# define psnip_safe_bool _Bool
-#else
-# define psnip_safe_bool int
-#endif
-
-#if !defined(PSNIP_SAFE_NO_FIXED)
-/* For maximum portability include the exact-int module from
- portable snippets. */
-# if \
- !defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
- !defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
- !defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
- !defined(psnip_int8_t) || !defined(psnip_uint8_t)
-# include <stdint.h>
-# if !defined(psnip_int64_t)
-# define psnip_int64_t int64_t
-# endif
-# if !defined(psnip_uint64_t)
-# define psnip_uint64_t uint64_t
-# endif
-# if !defined(psnip_int32_t)
-# define psnip_int32_t int32_t
-# endif
-# if !defined(psnip_uint32_t)
-# define psnip_uint32_t uint32_t
-# endif
-# if !defined(psnip_int16_t)
-# define psnip_int16_t int16_t
-# endif
-# if !defined(psnip_uint16_t)
-# define psnip_uint16_t uint16_t
-# endif
-# if !defined(psnip_int8_t)
-# define psnip_int8_t int8_t
-# endif
-# if !defined(psnip_uint8_t)
-# define psnip_uint8_t uint8_t
-# endif
-# endif
-#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
-#include <limits.h>
-#include <stdlib.h>
-
-#if !defined(PSNIP_SAFE_SIZE_MAX)
-# if defined(__SIZE_MAX__)
-# define PSNIP_SAFE_SIZE_MAX __SIZE_MAX__
-# elif defined(PSNIP_EXACT_INT_HAVE_STDINT)
-# include <stdint.h>
-# endif
-#endif
-
-#if defined(PSNIP_SAFE_SIZE_MAX)
-# define PSNIP_SAFE__SIZE_MAX_RT PSNIP_SAFE_SIZE_MAX
-#else
-# define PSNIP_SAFE__SIZE_MAX_RT (~((size_t) 0))
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-/* In VS 10, stdint.h and intsafe.h both define (U)INTN_MIN/MAX, which
- triggers warning C4005 (level 1). */
-# if defined(_MSC_VER) && (_MSC_VER == 1600)
-# pragma warning(push)
-# pragma warning(disable:4005)
-# endif
-# include <intsafe.h>
-# if defined(_MSC_VER) && (_MSC_VER == 1600)
-# pragma warning(pop)
-# endif
-#endif /* defined(PSNIP_SAFE_HAVE_INTSAFE_H) */
-
-/* If there is a type larger than the one we're concerned with it's
- * likely much faster to simply promote the operands, perform the
- * requested operation, verify that the result falls within the
- * original type, then cast the result back to the original type. */
-
-#if !defined(PSNIP_SAFE_NO_PROMOTIONS)
-
-#define PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, op_name, op) \
- PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \
- psnip_safe_larger_##name##_##op_name (T a, T b) { \
- return ((psnip_safe_##name##_larger) a) op ((psnip_safe_##name##_larger) b); \
- }
-
-#define PSNIP_SAFE_DEFINE_LARGER_UNARY_OP(T, name, op_name, op) \
- PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \
- psnip_safe_larger_##name##_##op_name (T value) { \
- return (op ((psnip_safe_##name##_larger) value)); \
- }
-
-#define PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(T, name) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %) \
- PSNIP_SAFE_DEFINE_LARGER_UNARY_OP (T, name, neg, -)
-
-#define PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(T, name) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \
- PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %)
-
-#define PSNIP_SAFE_IS_LARGER(ORIG_MAX, DEST_MAX) ((DEST_MAX / ORIG_MAX) >= ORIG_MAX)
-
-#if defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__SIZEOF_INT128__) && !defined(__ibmxl__)
-#define PSNIP_SAFE_HAVE_128
-typedef __int128 psnip_safe_int128_t;
-typedef unsigned __int128 psnip_safe_uint128_t;
-#endif /* defined(__GNUC__) */
-
-#if !defined(PSNIP_SAFE_NO_FIXED)
-#define PSNIP_SAFE_HAVE_INT8_LARGER
-#define PSNIP_SAFE_HAVE_UINT8_LARGER
-typedef psnip_int16_t psnip_safe_int8_larger;
-typedef psnip_uint16_t psnip_safe_uint8_larger;
-
-#define PSNIP_SAFE_HAVE_INT16_LARGER
-typedef psnip_int32_t psnip_safe_int16_larger;
-typedef psnip_uint32_t psnip_safe_uint16_larger;
-
-#define PSNIP_SAFE_HAVE_INT32_LARGER
-typedef psnip_int64_t psnip_safe_int32_larger;
-typedef psnip_uint64_t psnip_safe_uint32_larger;
-
-#if defined(PSNIP_SAFE_HAVE_128)
-#define PSNIP_SAFE_HAVE_INT64_LARGER
-typedef psnip_safe_int128_t psnip_safe_int64_larger;
-typedef psnip_safe_uint128_t psnip_safe_uint64_larger;
-#endif /* defined(PSNIP_SAFE_HAVE_128) */
-#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
-
-#define PSNIP_SAFE_HAVE_LARGER_SCHAR
-#if PSNIP_SAFE_IS_LARGER(SCHAR_MAX, SHRT_MAX)
-typedef short psnip_safe_schar_larger;
-#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, INT_MAX)
-typedef int psnip_safe_schar_larger;
-#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LONG_MAX)
-typedef long psnip_safe_schar_larger;
-#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LLONG_MAX)
-typedef long long psnip_safe_schar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fff)
-typedef psnip_int16_t psnip_safe_schar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffLL)
-typedef psnip_int32_t psnip_safe_schar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffffffffffLL)
-typedef psnip_int64_t psnip_safe_schar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SCHAR_MAX <= 0x7fffffffffffffffLL)
-typedef psnip_safe_int128_t psnip_safe_schar_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_SCHAR
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_UCHAR
-#if PSNIP_SAFE_IS_LARGER(UCHAR_MAX, USHRT_MAX)
-typedef unsigned short psnip_safe_uchar_larger;
-#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, UINT_MAX)
-typedef unsigned int psnip_safe_uchar_larger;
-#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULONG_MAX)
-typedef unsigned long psnip_safe_uchar_larger;
-#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULLONG_MAX)
-typedef unsigned long long psnip_safe_uchar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffU)
-typedef psnip_uint16_t psnip_safe_uchar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_uchar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_uchar_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UCHAR_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_uchar_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_UCHAR
-#endif
-
-#if CHAR_MIN == 0 && defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
-#define PSNIP_SAFE_HAVE_LARGER_CHAR
-typedef psnip_safe_uchar_larger psnip_safe_char_larger;
-#elif CHAR_MIN < 0 && defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
-#define PSNIP_SAFE_HAVE_LARGER_CHAR
-typedef psnip_safe_schar_larger psnip_safe_char_larger;
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_SHRT
-#if PSNIP_SAFE_IS_LARGER(SHRT_MAX, INT_MAX)
-typedef int psnip_safe_short_larger;
-#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LONG_MAX)
-typedef long psnip_safe_short_larger;
-#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LLONG_MAX)
-typedef long long psnip_safe_short_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fff)
-typedef psnip_int16_t psnip_safe_short_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffLL)
-typedef psnip_int32_t psnip_safe_short_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffffffffffLL)
-typedef psnip_int64_t psnip_safe_short_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SHRT_MAX <= 0x7fffffffffffffffLL)
-typedef psnip_safe_int128_t psnip_safe_short_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_SHRT
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_USHRT
-#if PSNIP_SAFE_IS_LARGER(USHRT_MAX, UINT_MAX)
-typedef unsigned int psnip_safe_ushort_larger;
-#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULONG_MAX)
-typedef unsigned long psnip_safe_ushort_larger;
-#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULLONG_MAX)
-typedef unsigned long long psnip_safe_ushort_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffff)
-typedef psnip_uint16_t psnip_safe_ushort_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_ushort_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_ushort_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (USHRT_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_ushort_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_USHRT
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_INT
-#if PSNIP_SAFE_IS_LARGER(INT_MAX, LONG_MAX)
-typedef long psnip_safe_int_larger;
-#elif PSNIP_SAFE_IS_LARGER(INT_MAX, LLONG_MAX)
-typedef long long psnip_safe_int_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fff)
-typedef psnip_int16_t psnip_safe_int_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffLL)
-typedef psnip_int32_t psnip_safe_int_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffffffffffLL)
-typedef psnip_int64_t psnip_safe_int_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (INT_MAX <= 0x7fffffffffffffffLL)
-typedef psnip_safe_int128_t psnip_safe_int_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_INT
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_UINT
-#if PSNIP_SAFE_IS_LARGER(UINT_MAX, ULONG_MAX)
-typedef unsigned long psnip_safe_uint_larger;
-#elif PSNIP_SAFE_IS_LARGER(UINT_MAX, ULLONG_MAX)
-typedef unsigned long long psnip_safe_uint_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffff)
-typedef psnip_uint16_t psnip_safe_uint_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_uint_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_uint_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UINT_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_uint_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_UINT
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_LONG
-#if PSNIP_SAFE_IS_LARGER(LONG_MAX, LLONG_MAX)
-typedef long long psnip_safe_long_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fff)
-typedef psnip_int16_t psnip_safe_long_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffLL)
-typedef psnip_int32_t psnip_safe_long_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffffffffffLL)
-typedef psnip_int64_t psnip_safe_long_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LONG_MAX <= 0x7fffffffffffffffLL)
-typedef psnip_safe_int128_t psnip_safe_long_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_LONG
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_ULONG
-#if PSNIP_SAFE_IS_LARGER(ULONG_MAX, ULLONG_MAX)
-typedef unsigned long long psnip_safe_ulong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffff)
-typedef psnip_uint16_t psnip_safe_ulong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_ulong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_ulong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULONG_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_ulong_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_ULONG
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_LLONG
-#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fff)
-typedef psnip_int16_t psnip_safe_llong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffLL)
-typedef psnip_int32_t psnip_safe_llong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffffffffffLL)
-typedef psnip_int64_t psnip_safe_llong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LLONG_MAX <= 0x7fffffffffffffffLL)
-typedef psnip_safe_int128_t psnip_safe_llong_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_LLONG
-#endif
-
-#define PSNIP_SAFE_HAVE_LARGER_ULLONG
-#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffff)
-typedef psnip_uint16_t psnip_safe_ullong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_ullong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_ullong_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULLONG_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_ullong_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_ULLONG
-#endif
-
-#if defined(PSNIP_SAFE_SIZE_MAX)
-#define PSNIP_SAFE_HAVE_LARGER_SIZE
-#if PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, USHRT_MAX)
-typedef unsigned short psnip_safe_size_larger;
-#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, UINT_MAX)
-typedef unsigned int psnip_safe_size_larger;
-#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULONG_MAX)
-typedef unsigned long psnip_safe_size_larger;
-#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULLONG_MAX)
-typedef unsigned long long psnip_safe_size_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffff)
-typedef psnip_uint16_t psnip_safe_size_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffUL)
-typedef psnip_uint32_t psnip_safe_size_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffffffffffULL)
-typedef psnip_uint64_t psnip_safe_size_larger;
-#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (PSNIP_SAFE_SIZE_MAX <= 0xffffffffffffffffULL)
-typedef psnip_safe_uint128_t psnip_safe_size_larger;
-#else
-#undef PSNIP_SAFE_HAVE_LARGER_SIZE
-#endif
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(signed char, schar)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned char, uchar)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
-#if CHAR_MIN == 0
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(char, char)
-#else
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(char, char)
-#endif
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_SHORT)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(short, short)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_USHORT)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned short, ushort)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_INT)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(int, int)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_UINT)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned int, uint)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_LONG)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long, long)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_ULONG)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long, ulong)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_LLONG)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long long, llong)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_ULLONG)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long long, ullong)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_LARGER_SIZE)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(size_t, size)
-#endif
-
-#if !defined(PSNIP_SAFE_NO_FIXED)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int8_t, int8)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint8_t, uint8)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int16_t, int16)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint16_t, uint16)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int32_t, int32)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint32_t, uint32)
-#if defined(PSNIP_SAFE_HAVE_128)
-PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int64_t, int64)
-PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint64_t, uint64)
-#endif
-#endif
-
-#endif /* !defined(PSNIP_SAFE_NO_PROMOTIONS) */
-
-#define PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(T, name, op_name) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_##op_name(T* res, T a, T b) { \
- return !__builtin_##op_name##_overflow(a, b, res); \
- }
-
-#define PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(T, name, op_name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_##op_name(T* res, T a, T b) { \
- const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \
- *res = (T) r; \
- return (r >= min) && (r <= max); \
- }
-
-#define PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(T, name, op_name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_##op_name(T* res, T a, T b) { \
- const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \
- *res = (T) r; \
- return (r <= max); \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_ADD(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_add (T* res, T a, T b) { \
- psnip_safe_bool r = !( ((b > 0) && (a > (max - b))) || \
- ((b < 0) && (a < (min - b))) ); \
- if(PSNIP_SAFE_LIKELY(r)) \
- *res = a + b; \
- return r; \
- }
-
-#define PSNIP_SAFE_DEFINE_UNSIGNED_ADD(T, name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_add (T* res, T a, T b) { \
- *res = (T) (a + b); \
- return !PSNIP_SAFE_UNLIKELY((b > 0) && (a > (max - b))); \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_SUB(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_sub (T* res, T a, T b) { \
- psnip_safe_bool r = !((b > 0 && a < (min + b)) || \
- (b < 0 && a > (max + b))); \
- if(PSNIP_SAFE_LIKELY(r)) \
- *res = a - b; \
- return r; \
- }
-
-#define PSNIP_SAFE_DEFINE_UNSIGNED_SUB(T, name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_sub (T* res, T a, T b) { \
- *res = a - b; \
- return !PSNIP_SAFE_UNLIKELY(b > a); \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_MUL(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_mul (T* res, T a, T b) { \
- psnip_safe_bool r = 1; \
- if (a > 0) { \
- if (b > 0) { \
- if (a > (max / b)) { \
- r = 0; \
- } \
- } else { \
- if (b < (min / a)) { \
- r = 0; \
- } \
- } \
- } else { \
- if (b > 0) { \
- if (a < (min / b)) { \
- r = 0; \
- } \
- } else { \
- if ( (a != 0) && (b < (max / a))) { \
- r = 0; \
- } \
- } \
- } \
- if(PSNIP_SAFE_LIKELY(r)) \
- *res = a * b; \
- return r; \
- }
-
-#define PSNIP_SAFE_DEFINE_UNSIGNED_MUL(T, name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_mul (T* res, T a, T b) { \
- *res = (T) (a * b); \
- return !PSNIP_SAFE_UNLIKELY((a > 0) && (b > 0) && (a > (max / b))); \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_DIV(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_div (T* res, T a, T b) { \
- if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
- *res = 0; \
- return 0; \
- } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \
- *res = min; \
- return 0; \
- } else { \
- *res = (T) (a / b); \
- return 1; \
- } \
- }
-
-#define PSNIP_SAFE_DEFINE_UNSIGNED_DIV(T, name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_div (T* res, T a, T b) { \
- if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
- *res = 0; \
- return 0; \
- } else { \
- *res = a / b; \
- return 1; \
- } \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_MOD(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_mod (T* res, T a, T b) { \
- if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
- *res = 0; \
- return 0; \
- } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \
- *res = min; \
- return 0; \
- } else { \
- *res = (T) (a % b); \
- return 1; \
- } \
- }
-
-#define PSNIP_SAFE_DEFINE_UNSIGNED_MOD(T, name, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_mod (T* res, T a, T b) { \
- if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
- *res = 0; \
- return 0; \
- } else { \
- *res = a % b; \
- return 1; \
- } \
- }
-
-#define PSNIP_SAFE_DEFINE_SIGNED_NEG(T, name, min, max) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_neg (T* res, T value) { \
- psnip_safe_bool r = value != min; \
- *res = PSNIP_SAFE_LIKELY(r) ? -value : max; \
- return r; \
- }
-
-#define PSNIP_SAFE_DEFINE_INTSAFE(T, name, op, isf) \
- PSNIP_SAFE__FUNCTION psnip_safe_bool \
- psnip_safe_##name##_##op (T* res, T a, T b) { \
- return isf(a, b, res) == S_OK; \
- }
-
-#if CHAR_MIN == 0
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, add, CHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, sub, CHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, mul, CHAR_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(char, char, CHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(char, char, CHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(char, char, CHAR_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(char, char, CHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(char, char, CHAR_MAX)
-#else /* CHAR_MIN != 0 */
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, add, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, sub, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, mul, CHAR_MIN, CHAR_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(char, char, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(char, char, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(char, char, CHAR_MIN, CHAR_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(char, char, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(char, char, CHAR_MIN, CHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(char, char, CHAR_MIN, CHAR_MAX)
-#endif
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, add, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, sub, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, mul, SCHAR_MIN, SCHAR_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(signed char, schar, SCHAR_MIN, SCHAR_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, add, UCHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, sub, UCHAR_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, mul, UCHAR_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned char, uchar, UCHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned char, uchar, UCHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned char, uchar, UCHAR_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned char, uchar, UCHAR_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned char, uchar, UCHAR_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_SHORT)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, add, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, sub, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, mul, SHRT_MIN, SHRT_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(short, short, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(short, short, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(short, short, SHRT_MIN, SHRT_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(short, short, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(short, short, SHRT_MIN, SHRT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(short, short, SHRT_MIN, SHRT_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, add, UShortAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, sub, UShortSub)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, mul, UShortMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_USHORT)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, add, USHRT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, sub, USHRT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, mul, USHRT_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned short, ushort, USHRT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned short, ushort, USHRT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned short, ushort, USHRT_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned short, ushort, USHRT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned short, ushort, USHRT_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_INT)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, add, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, sub, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, mul, INT_MIN, INT_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(int, int, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(int, int, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(int, int, INT_MIN, INT_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(int, int, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(int, int, INT_MIN, INT_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(int, int, INT_MIN, INT_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, add, UIntAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, sub, UIntSub)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, mul, UIntMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, add, UINT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, sub, UINT_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, mul, UINT_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned int, uint, UINT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned int, uint, UINT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned int, uint, UINT_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned int, uint, UINT_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned int, uint, UINT_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_LONG)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, add, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, sub, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, mul, LONG_MIN, LONG_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(long, long, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(long, long, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(long, long, LONG_MIN, LONG_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(long, long, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(long, long, LONG_MIN, LONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(long, long, LONG_MIN, LONG_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, add, ULongAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, sub, ULongSub)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, mul, ULongMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_ULONG)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, add, ULONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, sub, ULONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, mul, ULONG_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long, ulong, ULONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long, ulong, ULONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long, ulong, ULONG_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long, ulong, ULONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long, ulong, ULONG_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_LLONG)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, add, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, sub, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, mul, LLONG_MIN, LLONG_MAX)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(long long, llong, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(long long, llong, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(long long, llong, LLONG_MIN, LLONG_MAX)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(long long, llong, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(long long, llong, LLONG_MIN, LLONG_MAX)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(long long, llong, LLONG_MIN, LLONG_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, add, ULongLongAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, sub, ULongLongSub)
-PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, mul, ULongLongMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_ULLONG)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, add, ULLONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, sub, ULLONG_MAX)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, mul, ULLONG_MAX)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long long, ullong, ULLONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long long, ullong, ULLONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long long, ullong, ULLONG_MAX)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long long, ullong, ULLONG_MAX)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long long, ullong, ULLONG_MAX)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
-PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, add, SizeTAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, sub, SizeTSub)
-PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, mul, SizeTMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_SIZE)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, add, PSNIP_SAFE__SIZE_MAX_RT)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, sub, PSNIP_SAFE__SIZE_MAX_RT)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, mul, PSNIP_SAFE__SIZE_MAX_RT)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
-
-#if !defined(PSNIP_SAFE_NO_FIXED)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_INT8)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, add, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, sub, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, mul, (-0x7fLL-1), 0x7f)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT8)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, add, 0xff)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, sub, 0xff)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, mul, 0xff)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint8_t, uint8, 0xff)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint8_t, uint8, 0xff)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint8_t, uint8, 0xff)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint8_t, uint8, 0xff)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint8_t, uint8, 0xff)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_INT16)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, add, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, sub, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, mul, (-32767-1), 0x7fff)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int16_t, int16, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int16_t, int16, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int16_t, int16, (-32767-1), 0x7fff)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int16_t, int16, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int16_t, int16, (-32767-1), 0x7fff)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int16_t, int16, (-32767-1), 0x7fff)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, add, UShortAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, sub, UShortSub)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, mul, UShortMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT16)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, add, 0xffff)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, sub, 0xffff)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, mul, 0xffff)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint16_t, uint16, 0xffff)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint16_t, uint16, 0xffff)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint16_t, uint16, 0xffff)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint16_t, uint16, 0xffff)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint16_t, uint16, 0xffff)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_INT32)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, add, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, sub, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, mul, (-0x7fffffffLL-1), 0x7fffffffLL)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, add, UIntAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, sub, UIntSub)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, mul, UIntMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT32)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, add, 0xffffffffUL)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, sub, 0xffffffffUL)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, mul, 0xffffffffUL)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint32_t, uint32, 0xffffffffUL)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint32_t, uint32, 0xffffffffUL)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint32_t, uint32, 0xffffffffUL)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint32_t, uint32, 0xffffffffUL)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint32_t, uint32, 0xffffffffUL)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, mul)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_INT64)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, add, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, sub, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, mul, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-#else
-PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-#endif
-PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, add)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, sub)
-PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, mul)
-#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, add, ULongLongAdd)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, sub, ULongLongSub)
-PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, mul, ULongLongMult)
-#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT64)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, add, 0xffffffffffffffffULL)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, sub, 0xffffffffffffffffULL)
-PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, mul, 0xffffffffffffffffULL)
-#else
-PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
-PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
-PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
-#endif
-PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
-PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
-
-#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
-
-#define PSNIP_SAFE_C11_GENERIC_SELECTION(res, op) \
- _Generic((*res), \
- char: psnip_safe_char_##op, \
- unsigned char: psnip_safe_uchar_##op, \
- short: psnip_safe_short_##op, \
- unsigned short: psnip_safe_ushort_##op, \
- int: psnip_safe_int_##op, \
- unsigned int: psnip_safe_uint_##op, \
- long: psnip_safe_long_##op, \
- unsigned long: psnip_safe_ulong_##op, \
- long long: psnip_safe_llong_##op, \
- unsigned long long: psnip_safe_ullong_##op)
-
-#define PSNIP_SAFE_C11_GENERIC_BINARY_OP(op, res, a, b) \
- PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, a, b)
-#define PSNIP_SAFE_C11_GENERIC_UNARY_OP(op, res, v) \
- PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, v)
-
-#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
-#define psnip_safe_add(res, a, b) !__builtin_add_overflow(a, b, res)
-#define psnip_safe_sub(res, a, b) !__builtin_sub_overflow(a, b, res)
-#define psnip_safe_mul(res, a, b) !__builtin_mul_overflow(a, b, res)
-#define psnip_safe_div(res, a, b) !__builtin_div_overflow(a, b, res)
-#define psnip_safe_mod(res, a, b) !__builtin_mod_overflow(a, b, res)
-#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v)
-
-#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
-/* The are no fixed-length or size selections because they cause an
- * error about _Generic specifying two compatible types. Hopefully
- * this doesn't cause problems on exotic platforms, but if it does
- * please let me know and I'll try to figure something out. */
-
-#define psnip_safe_add(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(add, res, a, b)
-#define psnip_safe_sub(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(sub, res, a, b)
-#define psnip_safe_mul(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mul, res, a, b)
-#define psnip_safe_div(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(div, res, a, b)
-#define psnip_safe_mod(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mod, res, a, b)
-#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v)
-#endif
-
-#if !defined(PSNIP_SAFE_HAVE_BUILTINS) && (defined(PSNIP_SAFE_EMULATE_NATIVE) || defined(PSNIP_BUILTIN_EMULATE_NATIVE))
-# define __builtin_sadd_overflow(a, b, res) (!psnip_safe_int_add(res, a, b))
-# define __builtin_saddl_overflow(a, b, res) (!psnip_safe_long_add(res, a, b))
-# define __builtin_saddll_overflow(a, b, res) (!psnip_safe_llong_add(res, a, b))
-# define __builtin_uadd_overflow(a, b, res) (!psnip_safe_uint_add(res, a, b))
-# define __builtin_uaddl_overflow(a, b, res) (!psnip_safe_ulong_add(res, a, b))
-# define __builtin_uaddll_overflow(a, b, res) (!psnip_safe_ullong_add(res, a, b))
-
-# define __builtin_ssub_overflow(a, b, res) (!psnip_safe_int_sub(res, a, b))
-# define __builtin_ssubl_overflow(a, b, res) (!psnip_safe_long_sub(res, a, b))
-# define __builtin_ssubll_overflow(a, b, res) (!psnip_safe_llong_sub(res, a, b))
-# define __builtin_usub_overflow(a, b, res) (!psnip_safe_uint_sub(res, a, b))
-# define __builtin_usubl_overflow(a, b, res) (!psnip_safe_ulong_sub(res, a, b))
-# define __builtin_usubll_overflow(a, b, res) (!psnip_safe_ullong_sub(res, a, b))
-
-# define __builtin_smul_overflow(a, b, res) (!psnip_safe_int_mul(res, a, b))
-# define __builtin_smull_overflow(a, b, res) (!psnip_safe_long_mul(res, a, b))
-# define __builtin_smulll_overflow(a, b, res) (!psnip_safe_llong_mul(res, a, b))
-# define __builtin_umul_overflow(a, b, res) (!psnip_safe_uint_mul(res, a, b))
-# define __builtin_umull_overflow(a, b, res) (!psnip_safe_ulong_mul(res, a, b))
-# define __builtin_umulll_overflow(a, b, res) (!psnip_safe_ullong_mul(res, a, b))
-#endif
-
-#endif /* !defined(PSNIP_SAFE_H) */
+# define psnip_safe_bool _Bool
+#else
+# define psnip_safe_bool int
+#endif
+
+#if !defined(PSNIP_SAFE_NO_FIXED)
+/* For maximum portability include the exact-int module from
+ portable snippets. */
+# if \
+ !defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
+ !defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
+ !defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
+ !defined(psnip_int8_t) || !defined(psnip_uint8_t)
+# include <stdint.h>
+# if !defined(psnip_int64_t)
+# define psnip_int64_t int64_t
+# endif
+# if !defined(psnip_uint64_t)
+# define psnip_uint64_t uint64_t
+# endif
+# if !defined(psnip_int32_t)
+# define psnip_int32_t int32_t
+# endif
+# if !defined(psnip_uint32_t)
+# define psnip_uint32_t uint32_t
+# endif
+# if !defined(psnip_int16_t)
+# define psnip_int16_t int16_t
+# endif
+# if !defined(psnip_uint16_t)
+# define psnip_uint16_t uint16_t
+# endif
+# if !defined(psnip_int8_t)
+# define psnip_int8_t int8_t
+# endif
+# if !defined(psnip_uint8_t)
+# define psnip_uint8_t uint8_t
+# endif
+# endif
+#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
+#include <limits.h>
+#include <stdlib.h>
+
+#if !defined(PSNIP_SAFE_SIZE_MAX)
+# if defined(__SIZE_MAX__)
+# define PSNIP_SAFE_SIZE_MAX __SIZE_MAX__
+# elif defined(PSNIP_EXACT_INT_HAVE_STDINT)
+# include <stdint.h>
+# endif
+#endif
+
+#if defined(PSNIP_SAFE_SIZE_MAX)
+# define PSNIP_SAFE__SIZE_MAX_RT PSNIP_SAFE_SIZE_MAX
+#else
+# define PSNIP_SAFE__SIZE_MAX_RT (~((size_t) 0))
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+/* In VS 10, stdint.h and intsafe.h both define (U)INTN_MIN/MAX, which
+ triggers warning C4005 (level 1). */
+# if defined(_MSC_VER) && (_MSC_VER == 1600)
+# pragma warning(push)
+# pragma warning(disable:4005)
+# endif
+# include <intsafe.h>
+# if defined(_MSC_VER) && (_MSC_VER == 1600)
+# pragma warning(pop)
+# endif
+#endif /* defined(PSNIP_SAFE_HAVE_INTSAFE_H) */
+
+/* If there is a type larger than the one we're concerned with it's
+ * likely much faster to simply promote the operands, perform the
+ * requested operation, verify that the result falls within the
+ * original type, then cast the result back to the original type. */
+
+#if !defined(PSNIP_SAFE_NO_PROMOTIONS)
+
+#define PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, op_name, op) \
+ PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \
+ psnip_safe_larger_##name##_##op_name (T a, T b) { \
+ return ((psnip_safe_##name##_larger) a) op ((psnip_safe_##name##_larger) b); \
+ }
+
+#define PSNIP_SAFE_DEFINE_LARGER_UNARY_OP(T, name, op_name, op) \
+ PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \
+ psnip_safe_larger_##name##_##op_name (T value) { \
+ return (op ((psnip_safe_##name##_larger) value)); \
+ }
+
+#define PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(T, name) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %) \
+ PSNIP_SAFE_DEFINE_LARGER_UNARY_OP (T, name, neg, -)
+
+#define PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(T, name) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \
+ PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %)
+
+#define PSNIP_SAFE_IS_LARGER(ORIG_MAX, DEST_MAX) ((DEST_MAX / ORIG_MAX) >= ORIG_MAX)
+
+#if defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__SIZEOF_INT128__) && !defined(__ibmxl__)
+#define PSNIP_SAFE_HAVE_128
+typedef __int128 psnip_safe_int128_t;
+typedef unsigned __int128 psnip_safe_uint128_t;
+#endif /* defined(__GNUC__) */
+
+#if !defined(PSNIP_SAFE_NO_FIXED)
+#define PSNIP_SAFE_HAVE_INT8_LARGER
+#define PSNIP_SAFE_HAVE_UINT8_LARGER
+typedef psnip_int16_t psnip_safe_int8_larger;
+typedef psnip_uint16_t psnip_safe_uint8_larger;
+
+#define PSNIP_SAFE_HAVE_INT16_LARGER
+typedef psnip_int32_t psnip_safe_int16_larger;
+typedef psnip_uint32_t psnip_safe_uint16_larger;
+
+#define PSNIP_SAFE_HAVE_INT32_LARGER
+typedef psnip_int64_t psnip_safe_int32_larger;
+typedef psnip_uint64_t psnip_safe_uint32_larger;
+
+#if defined(PSNIP_SAFE_HAVE_128)
+#define PSNIP_SAFE_HAVE_INT64_LARGER
+typedef psnip_safe_int128_t psnip_safe_int64_larger;
+typedef psnip_safe_uint128_t psnip_safe_uint64_larger;
+#endif /* defined(PSNIP_SAFE_HAVE_128) */
+#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
+
+#define PSNIP_SAFE_HAVE_LARGER_SCHAR
+#if PSNIP_SAFE_IS_LARGER(SCHAR_MAX, SHRT_MAX)
+typedef short psnip_safe_schar_larger;
+#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, INT_MAX)
+typedef int psnip_safe_schar_larger;
+#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LONG_MAX)
+typedef long psnip_safe_schar_larger;
+#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LLONG_MAX)
+typedef long long psnip_safe_schar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fff)
+typedef psnip_int16_t psnip_safe_schar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffLL)
+typedef psnip_int32_t psnip_safe_schar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffffffffffLL)
+typedef psnip_int64_t psnip_safe_schar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SCHAR_MAX <= 0x7fffffffffffffffLL)
+typedef psnip_safe_int128_t psnip_safe_schar_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_SCHAR
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_UCHAR
+#if PSNIP_SAFE_IS_LARGER(UCHAR_MAX, USHRT_MAX)
+typedef unsigned short psnip_safe_uchar_larger;
+#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, UINT_MAX)
+typedef unsigned int psnip_safe_uchar_larger;
+#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULONG_MAX)
+typedef unsigned long psnip_safe_uchar_larger;
+#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULLONG_MAX)
+typedef unsigned long long psnip_safe_uchar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffU)
+typedef psnip_uint16_t psnip_safe_uchar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_uchar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_uchar_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UCHAR_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_uchar_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_UCHAR
+#endif
+
+#if CHAR_MIN == 0 && defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
+#define PSNIP_SAFE_HAVE_LARGER_CHAR
+typedef psnip_safe_uchar_larger psnip_safe_char_larger;
+#elif CHAR_MIN < 0 && defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
+#define PSNIP_SAFE_HAVE_LARGER_CHAR
+typedef psnip_safe_schar_larger psnip_safe_char_larger;
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_SHRT
+#if PSNIP_SAFE_IS_LARGER(SHRT_MAX, INT_MAX)
+typedef int psnip_safe_short_larger;
+#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LONG_MAX)
+typedef long psnip_safe_short_larger;
+#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LLONG_MAX)
+typedef long long psnip_safe_short_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fff)
+typedef psnip_int16_t psnip_safe_short_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffLL)
+typedef psnip_int32_t psnip_safe_short_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffffffffffLL)
+typedef psnip_int64_t psnip_safe_short_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SHRT_MAX <= 0x7fffffffffffffffLL)
+typedef psnip_safe_int128_t psnip_safe_short_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_SHRT
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_USHRT
+#if PSNIP_SAFE_IS_LARGER(USHRT_MAX, UINT_MAX)
+typedef unsigned int psnip_safe_ushort_larger;
+#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULONG_MAX)
+typedef unsigned long psnip_safe_ushort_larger;
+#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULLONG_MAX)
+typedef unsigned long long psnip_safe_ushort_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffff)
+typedef psnip_uint16_t psnip_safe_ushort_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_ushort_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_ushort_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (USHRT_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_ushort_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_USHRT
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_INT
+#if PSNIP_SAFE_IS_LARGER(INT_MAX, LONG_MAX)
+typedef long psnip_safe_int_larger;
+#elif PSNIP_SAFE_IS_LARGER(INT_MAX, LLONG_MAX)
+typedef long long psnip_safe_int_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fff)
+typedef psnip_int16_t psnip_safe_int_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffLL)
+typedef psnip_int32_t psnip_safe_int_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffffffffffLL)
+typedef psnip_int64_t psnip_safe_int_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (INT_MAX <= 0x7fffffffffffffffLL)
+typedef psnip_safe_int128_t psnip_safe_int_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_INT
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_UINT
+#if PSNIP_SAFE_IS_LARGER(UINT_MAX, ULONG_MAX)
+typedef unsigned long psnip_safe_uint_larger;
+#elif PSNIP_SAFE_IS_LARGER(UINT_MAX, ULLONG_MAX)
+typedef unsigned long long psnip_safe_uint_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffff)
+typedef psnip_uint16_t psnip_safe_uint_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_uint_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_uint_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UINT_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_uint_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_UINT
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_LONG
+#if PSNIP_SAFE_IS_LARGER(LONG_MAX, LLONG_MAX)
+typedef long long psnip_safe_long_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fff)
+typedef psnip_int16_t psnip_safe_long_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffLL)
+typedef psnip_int32_t psnip_safe_long_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffffffffffLL)
+typedef psnip_int64_t psnip_safe_long_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LONG_MAX <= 0x7fffffffffffffffLL)
+typedef psnip_safe_int128_t psnip_safe_long_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_LONG
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_ULONG
+#if PSNIP_SAFE_IS_LARGER(ULONG_MAX, ULLONG_MAX)
+typedef unsigned long long psnip_safe_ulong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffff)
+typedef psnip_uint16_t psnip_safe_ulong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_ulong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_ulong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULONG_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_ulong_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_ULONG
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_LLONG
+#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fff)
+typedef psnip_int16_t psnip_safe_llong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffLL)
+typedef psnip_int32_t psnip_safe_llong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffffffffffLL)
+typedef psnip_int64_t psnip_safe_llong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LLONG_MAX <= 0x7fffffffffffffffLL)
+typedef psnip_safe_int128_t psnip_safe_llong_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_LLONG
+#endif
+
+#define PSNIP_SAFE_HAVE_LARGER_ULLONG
+#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffff)
+typedef psnip_uint16_t psnip_safe_ullong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_ullong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_ullong_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULLONG_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_ullong_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_ULLONG
+#endif
+
+#if defined(PSNIP_SAFE_SIZE_MAX)
+#define PSNIP_SAFE_HAVE_LARGER_SIZE
+#if PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, USHRT_MAX)
+typedef unsigned short psnip_safe_size_larger;
+#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, UINT_MAX)
+typedef unsigned int psnip_safe_size_larger;
+#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULONG_MAX)
+typedef unsigned long psnip_safe_size_larger;
+#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULLONG_MAX)
+typedef unsigned long long psnip_safe_size_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffff)
+typedef psnip_uint16_t psnip_safe_size_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffUL)
+typedef psnip_uint32_t psnip_safe_size_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffffffffffULL)
+typedef psnip_uint64_t psnip_safe_size_larger;
+#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (PSNIP_SAFE_SIZE_MAX <= 0xffffffffffffffffULL)
+typedef psnip_safe_uint128_t psnip_safe_size_larger;
+#else
+#undef PSNIP_SAFE_HAVE_LARGER_SIZE
+#endif
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(signed char, schar)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned char, uchar)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
+#if CHAR_MIN == 0
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(char, char)
+#else
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(char, char)
+#endif
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_SHORT)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(short, short)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_USHORT)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned short, ushort)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_INT)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(int, int)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_UINT)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned int, uint)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_LONG)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long, long)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_ULONG)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long, ulong)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_LLONG)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long long, llong)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_ULLONG)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long long, ullong)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_LARGER_SIZE)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(size_t, size)
+#endif
+
+#if !defined(PSNIP_SAFE_NO_FIXED)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int8_t, int8)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint8_t, uint8)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int16_t, int16)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint16_t, uint16)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int32_t, int32)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint32_t, uint32)
+#if defined(PSNIP_SAFE_HAVE_128)
+PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int64_t, int64)
+PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint64_t, uint64)
+#endif
+#endif
+
+#endif /* !defined(PSNIP_SAFE_NO_PROMOTIONS) */
+
+#define PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(T, name, op_name) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_##op_name(T* res, T a, T b) { \
+ return !__builtin_##op_name##_overflow(a, b, res); \
+ }
+
+#define PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(T, name, op_name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_##op_name(T* res, T a, T b) { \
+ const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \
+ *res = (T) r; \
+ return (r >= min) && (r <= max); \
+ }
+
+#define PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(T, name, op_name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_##op_name(T* res, T a, T b) { \
+ const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \
+ *res = (T) r; \
+ return (r <= max); \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_ADD(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_add (T* res, T a, T b) { \
+ psnip_safe_bool r = !( ((b > 0) && (a > (max - b))) || \
+ ((b < 0) && (a < (min - b))) ); \
+ if(PSNIP_SAFE_LIKELY(r)) \
+ *res = a + b; \
+ return r; \
+ }
+
+#define PSNIP_SAFE_DEFINE_UNSIGNED_ADD(T, name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_add (T* res, T a, T b) { \
+ *res = (T) (a + b); \
+ return !PSNIP_SAFE_UNLIKELY((b > 0) && (a > (max - b))); \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_SUB(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_sub (T* res, T a, T b) { \
+ psnip_safe_bool r = !((b > 0 && a < (min + b)) || \
+ (b < 0 && a > (max + b))); \
+ if(PSNIP_SAFE_LIKELY(r)) \
+ *res = a - b; \
+ return r; \
+ }
+
+#define PSNIP_SAFE_DEFINE_UNSIGNED_SUB(T, name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_sub (T* res, T a, T b) { \
+ *res = a - b; \
+ return !PSNIP_SAFE_UNLIKELY(b > a); \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_MUL(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_mul (T* res, T a, T b) { \
+ psnip_safe_bool r = 1; \
+ if (a > 0) { \
+ if (b > 0) { \
+ if (a > (max / b)) { \
+ r = 0; \
+ } \
+ } else { \
+ if (b < (min / a)) { \
+ r = 0; \
+ } \
+ } \
+ } else { \
+ if (b > 0) { \
+ if (a < (min / b)) { \
+ r = 0; \
+ } \
+ } else { \
+ if ( (a != 0) && (b < (max / a))) { \
+ r = 0; \
+ } \
+ } \
+ } \
+ if(PSNIP_SAFE_LIKELY(r)) \
+ *res = a * b; \
+ return r; \
+ }
+
+#define PSNIP_SAFE_DEFINE_UNSIGNED_MUL(T, name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_mul (T* res, T a, T b) { \
+ *res = (T) (a * b); \
+ return !PSNIP_SAFE_UNLIKELY((a > 0) && (b > 0) && (a > (max / b))); \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_DIV(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_div (T* res, T a, T b) { \
+ if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
+ *res = 0; \
+ return 0; \
+ } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \
+ *res = min; \
+ return 0; \
+ } else { \
+ *res = (T) (a / b); \
+ return 1; \
+ } \
+ }
+
+#define PSNIP_SAFE_DEFINE_UNSIGNED_DIV(T, name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_div (T* res, T a, T b) { \
+ if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
+ *res = 0; \
+ return 0; \
+ } else { \
+ *res = a / b; \
+ return 1; \
+ } \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_MOD(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_mod (T* res, T a, T b) { \
+ if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
+ *res = 0; \
+ return 0; \
+ } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \
+ *res = min; \
+ return 0; \
+ } else { \
+ *res = (T) (a % b); \
+ return 1; \
+ } \
+ }
+
+#define PSNIP_SAFE_DEFINE_UNSIGNED_MOD(T, name, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_mod (T* res, T a, T b) { \
+ if (PSNIP_SAFE_UNLIKELY(b == 0)) { \
+ *res = 0; \
+ return 0; \
+ } else { \
+ *res = a % b; \
+ return 1; \
+ } \
+ }
+
+#define PSNIP_SAFE_DEFINE_SIGNED_NEG(T, name, min, max) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_neg (T* res, T value) { \
+ psnip_safe_bool r = value != min; \
+ *res = PSNIP_SAFE_LIKELY(r) ? -value : max; \
+ return r; \
+ }
+
+#define PSNIP_SAFE_DEFINE_INTSAFE(T, name, op, isf) \
+ PSNIP_SAFE__FUNCTION psnip_safe_bool \
+ psnip_safe_##name##_##op (T* res, T a, T b) { \
+ return isf(a, b, res) == S_OK; \
+ }
+
+#if CHAR_MIN == 0
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, add, CHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, sub, CHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, mul, CHAR_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(char, char, CHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(char, char, CHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(char, char, CHAR_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(char, char, CHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(char, char, CHAR_MAX)
+#else /* CHAR_MIN != 0 */
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, add, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, sub, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, mul, CHAR_MIN, CHAR_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(char, char, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(char, char, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(char, char, CHAR_MIN, CHAR_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(char, char, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(char, char, CHAR_MIN, CHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(char, char, CHAR_MIN, CHAR_MAX)
+#endif
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_SCHAR)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, add, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, sub, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, mul, SCHAR_MIN, SCHAR_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(signed char, schar, SCHAR_MIN, SCHAR_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UCHAR)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, add, UCHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, sub, UCHAR_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, mul, UCHAR_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned char, uchar, UCHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned char, uchar, UCHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned char, uchar, UCHAR_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned char, uchar, UCHAR_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned char, uchar, UCHAR_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_SHORT)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, add, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, sub, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, mul, SHRT_MIN, SHRT_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(short, short, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(short, short, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(short, short, SHRT_MIN, SHRT_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(short, short, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(short, short, SHRT_MIN, SHRT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(short, short, SHRT_MIN, SHRT_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, add, UShortAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, sub, UShortSub)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, mul, UShortMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_USHORT)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, add, USHRT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, sub, USHRT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, mul, USHRT_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned short, ushort, USHRT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned short, ushort, USHRT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned short, ushort, USHRT_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned short, ushort, USHRT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned short, ushort, USHRT_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_INT)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, add, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, sub, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, mul, INT_MIN, INT_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(int, int, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(int, int, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(int, int, INT_MIN, INT_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(int, int, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(int, int, INT_MIN, INT_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(int, int, INT_MIN, INT_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, add, UIntAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, sub, UIntSub)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, mul, UIntMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, add, UINT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, sub, UINT_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, mul, UINT_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned int, uint, UINT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned int, uint, UINT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned int, uint, UINT_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned int, uint, UINT_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned int, uint, UINT_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_LONG)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, add, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, sub, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, mul, LONG_MIN, LONG_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(long, long, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(long, long, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(long, long, LONG_MIN, LONG_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(long, long, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(long, long, LONG_MIN, LONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(long, long, LONG_MIN, LONG_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, add, ULongAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, sub, ULongSub)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, mul, ULongMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_ULONG)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, add, ULONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, sub, ULONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, mul, ULONG_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long, ulong, ULONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long, ulong, ULONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long, ulong, ULONG_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long, ulong, ULONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long, ulong, ULONG_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_LLONG)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, add, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, sub, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, mul, LLONG_MIN, LLONG_MAX)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(long long, llong, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(long long, llong, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(long long, llong, LLONG_MIN, LLONG_MAX)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(long long, llong, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(long long, llong, LLONG_MIN, LLONG_MAX)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(long long, llong, LLONG_MIN, LLONG_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, add, ULongLongAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, sub, ULongLongSub)
+PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, mul, ULongLongMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_ULLONG)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, add, ULLONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, sub, ULLONG_MAX)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, mul, ULLONG_MAX)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long long, ullong, ULLONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long long, ullong, ULLONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long long, ullong, ULLONG_MAX)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long long, ullong, ULLONG_MAX)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long long, ullong, ULLONG_MAX)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H)
+PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, add, SizeTAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, sub, SizeTSub)
+PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, mul, SizeTMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_SIZE)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, add, PSNIP_SAFE__SIZE_MAX_RT)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, sub, PSNIP_SAFE__SIZE_MAX_RT)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, mul, PSNIP_SAFE__SIZE_MAX_RT)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT)
+
+#if !defined(PSNIP_SAFE_NO_FIXED)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_INT8)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, add, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, sub, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, mul, (-0x7fLL-1), 0x7f)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int8_t, int8, (-0x7fLL-1), 0x7f)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT8)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, add, 0xff)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, sub, 0xff)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, mul, 0xff)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint8_t, uint8, 0xff)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint8_t, uint8, 0xff)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint8_t, uint8, 0xff)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint8_t, uint8, 0xff)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint8_t, uint8, 0xff)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_INT16)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, add, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, sub, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, mul, (-32767-1), 0x7fff)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int16_t, int16, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int16_t, int16, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int16_t, int16, (-32767-1), 0x7fff)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int16_t, int16, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int16_t, int16, (-32767-1), 0x7fff)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int16_t, int16, (-32767-1), 0x7fff)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, add, UShortAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, sub, UShortSub)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, mul, UShortMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT16)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, add, 0xffff)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, sub, 0xffff)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, mul, 0xffff)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint16_t, uint16, 0xffff)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint16_t, uint16, 0xffff)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint16_t, uint16, 0xffff)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint16_t, uint16, 0xffff)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint16_t, uint16, 0xffff)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_INT32)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, add, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, sub, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, mul, (-0x7fffffffLL-1), 0x7fffffffLL)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, add, UIntAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, sub, UIntSub)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, mul, UIntMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT32)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, add, 0xffffffffUL)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, sub, 0xffffffffUL)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, mul, 0xffffffffUL)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint32_t, uint32, 0xffffffffUL)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint32_t, uint32, 0xffffffffUL)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint32_t, uint32, 0xffffffffUL)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint32_t, uint32, 0xffffffffUL)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint32_t, uint32, 0xffffffffUL)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, mul)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_INT64)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, add, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, sub, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, mul, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+#else
+PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+#endif
+PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, add)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, sub)
+PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, mul)
+#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, add, ULongLongAdd)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, sub, ULongLongSub)
+PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, mul, ULongLongMult)
+#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT64)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, add, 0xffffffffffffffffULL)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, sub, 0xffffffffffffffffULL)
+PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, mul, 0xffffffffffffffffULL)
+#else
+PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
+PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
+PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
+#endif
+PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
+PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint64_t, uint64, 0xffffffffffffffffULL)
+
+#endif /* !defined(PSNIP_SAFE_NO_FIXED) */
+
+#define PSNIP_SAFE_C11_GENERIC_SELECTION(res, op) \
+ _Generic((*res), \
+ char: psnip_safe_char_##op, \
+ unsigned char: psnip_safe_uchar_##op, \
+ short: psnip_safe_short_##op, \
+ unsigned short: psnip_safe_ushort_##op, \
+ int: psnip_safe_int_##op, \
+ unsigned int: psnip_safe_uint_##op, \
+ long: psnip_safe_long_##op, \
+ unsigned long: psnip_safe_ulong_##op, \
+ long long: psnip_safe_llong_##op, \
+ unsigned long long: psnip_safe_ullong_##op)
+
+#define PSNIP_SAFE_C11_GENERIC_BINARY_OP(op, res, a, b) \
+ PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, a, b)
+#define PSNIP_SAFE_C11_GENERIC_UNARY_OP(op, res, v) \
+ PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, v)
+
+#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW)
+#define psnip_safe_add(res, a, b) !__builtin_add_overflow(a, b, res)
+#define psnip_safe_sub(res, a, b) !__builtin_sub_overflow(a, b, res)
+#define psnip_safe_mul(res, a, b) !__builtin_mul_overflow(a, b, res)
+#define psnip_safe_div(res, a, b) !__builtin_div_overflow(a, b, res)
+#define psnip_safe_mod(res, a, b) !__builtin_mod_overflow(a, b, res)
+#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v)
+
+#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+/* The are no fixed-length or size selections because they cause an
+ * error about _Generic specifying two compatible types. Hopefully
+ * this doesn't cause problems on exotic platforms, but if it does
+ * please let me know and I'll try to figure something out. */
+
+#define psnip_safe_add(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(add, res, a, b)
+#define psnip_safe_sub(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(sub, res, a, b)
+#define psnip_safe_mul(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mul, res, a, b)
+#define psnip_safe_div(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(div, res, a, b)
+#define psnip_safe_mod(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mod, res, a, b)
+#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v)
+#endif
+
+#if !defined(PSNIP_SAFE_HAVE_BUILTINS) && (defined(PSNIP_SAFE_EMULATE_NATIVE) || defined(PSNIP_BUILTIN_EMULATE_NATIVE))
+# define __builtin_sadd_overflow(a, b, res) (!psnip_safe_int_add(res, a, b))
+# define __builtin_saddl_overflow(a, b, res) (!psnip_safe_long_add(res, a, b))
+# define __builtin_saddll_overflow(a, b, res) (!psnip_safe_llong_add(res, a, b))
+# define __builtin_uadd_overflow(a, b, res) (!psnip_safe_uint_add(res, a, b))
+# define __builtin_uaddl_overflow(a, b, res) (!psnip_safe_ulong_add(res, a, b))
+# define __builtin_uaddll_overflow(a, b, res) (!psnip_safe_ullong_add(res, a, b))
+
+# define __builtin_ssub_overflow(a, b, res) (!psnip_safe_int_sub(res, a, b))
+# define __builtin_ssubl_overflow(a, b, res) (!psnip_safe_long_sub(res, a, b))
+# define __builtin_ssubll_overflow(a, b, res) (!psnip_safe_llong_sub(res, a, b))
+# define __builtin_usub_overflow(a, b, res) (!psnip_safe_uint_sub(res, a, b))
+# define __builtin_usubl_overflow(a, b, res) (!psnip_safe_ulong_sub(res, a, b))
+# define __builtin_usubll_overflow(a, b, res) (!psnip_safe_ullong_sub(res, a, b))
+
+# define __builtin_smul_overflow(a, b, res) (!psnip_safe_int_mul(res, a, b))
+# define __builtin_smull_overflow(a, b, res) (!psnip_safe_long_mul(res, a, b))
+# define __builtin_smulll_overflow(a, b, res) (!psnip_safe_llong_mul(res, a, b))
+# define __builtin_umul_overflow(a, b, res) (!psnip_safe_uint_mul(res, a, b))
+# define __builtin_umull_overflow(a, b, res) (!psnip_safe_ulong_mul(res, a, b))
+# define __builtin_umulll_overflow(a, b, res) (!psnip_safe_ullong_mul(res, a, b))
+#endif
+
+#endif /* !defined(PSNIP_SAFE_H) */
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/string_view.hpp b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/string_view.hpp
index a2d5567854f..3b8a7144f54 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/string_view.hpp
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/string_view.hpp
@@ -1,1531 +1,1531 @@
-// Vendored from git changeset v1.4.0
-
-// Copyright 2017-2020 by Martin Moene
-//
-// string-view lite, a C++17-like string_view for C++98 and later.
-// For more information see https://github.com/martinmoene/string-view-lite
-//
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
-#pragma once
-
-#ifndef NONSTD_SV_LITE_H_INCLUDED
-#define NONSTD_SV_LITE_H_INCLUDED
-
-#define string_view_lite_MAJOR 1
-#define string_view_lite_MINOR 4
-#define string_view_lite_PATCH 0
-
-#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH)
-
-#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x )
-#define nssv_STRINGIFY_( x ) #x
-
-// string-view lite configuration:
-
-#define nssv_STRING_VIEW_DEFAULT 0
-#define nssv_STRING_VIEW_NONSTD 1
-#define nssv_STRING_VIEW_STD 2
-
-#if !defined( nssv_CONFIG_SELECT_STRING_VIEW )
-# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD )
-#endif
-
-#if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW )
-# error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_...
-#endif
-
-#ifndef nssv_CONFIG_STD_SV_OPERATOR
-# define nssv_CONFIG_STD_SV_OPERATOR 0
-#endif
-
-#ifndef nssv_CONFIG_USR_SV_OPERATOR
-# define nssv_CONFIG_USR_SV_OPERATOR 1
-#endif
-
-#ifdef nssv_CONFIG_CONVERSION_STD_STRING
-# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING
-# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING
-#endif
-
-#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
-# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1
-#endif
-
-#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1
-#endif
-
-// Control presence of exception handling (try and auto discover):
-
-#ifndef nssv_CONFIG_NO_EXCEPTIONS
-# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)
-# define nssv_CONFIG_NO_EXCEPTIONS 0
-# else
-# define nssv_CONFIG_NO_EXCEPTIONS 1
-# endif
-#endif
-
-// C++ language version detection (C++20 is speculative):
-// Note: VC14.0/1900 (VS2015) lacks too much from C++14.
-
-#ifndef nssv_CPLUSPLUS
-# if defined(_MSVC_LANG ) && !defined(__clang__)
-# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG )
-# else
-# define nssv_CPLUSPLUS __cplusplus
-# endif
-#endif
-
-#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L )
-#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L )
-#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L )
-#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L )
-#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L )
-#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L )
-
-// use C++17 std::string_view if available and requested:
-
-#if nssv_CPP17_OR_GREATER && defined(__has_include )
-# if __has_include( <string_view> )
-# define nssv_HAVE_STD_STRING_VIEW 1
-# else
-# define nssv_HAVE_STD_STRING_VIEW 0
-# endif
-#else
-# define nssv_HAVE_STD_STRING_VIEW 0
-#endif
-
-#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) )
-
-#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW )
-#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH
-
-//
-// Use C++17 std::string_view:
-//
-
-#if nssv_USES_STD_STRING_VIEW
-
-#include <string_view>
-
-// Extensions for std::string:
-
-#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-
-namespace nonstd {
-
-template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
-std::basic_string<CharT, Traits, Allocator>
-to_string( std::basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
-{
- return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
-}
-
-template< class CharT, class Traits, class Allocator >
-std::basic_string_view<CharT, Traits>
-to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
-{
- return std::basic_string_view<CharT, Traits>( s.data(), s.size() );
-}
-
-// Literal operators sv and _sv:
-
-#if nssv_CONFIG_STD_SV_OPERATOR
-
-using namespace std::literals::string_view_literals;
-
-#endif
-
-#if nssv_CONFIG_USR_SV_OPERATOR
-
-inline namespace literals {
-inline namespace string_view_literals {
-
-
-constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
-{
- return std::string_view{ str, len };
-}
-
-constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
-{
- return std::u16string_view{ str, len };
-}
-
-constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
-{
- return std::u32string_view{ str, len };
-}
-
-constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
-{
- return std::wstring_view{ str, len };
-}
-
-}} // namespace literals::string_view_literals
-
-#endif // nssv_CONFIG_USR_SV_OPERATOR
-
-} // namespace nonstd
-
-#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-
-namespace nonstd {
-
-using std::string_view;
-using std::wstring_view;
-using std::u16string_view;
-using std::u32string_view;
-using std::basic_string_view;
-
-// literal "sv" and "_sv", see above
-
-using std::operator==;
-using std::operator!=;
-using std::operator<;
-using std::operator<=;
-using std::operator>;
-using std::operator>=;
-
-using std::operator<<;
-
-} // namespace nonstd
-
-#else // nssv_HAVE_STD_STRING_VIEW
-
-//
-// Before C++17: use string_view lite:
-//
-
-// Compiler versions:
-//
-// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0)
-// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002)
-// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003)
-// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005)
-// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008)
-// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010)
-// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012)
-// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013)
-// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015)
-// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017)
-// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019)
-
-#if defined(_MSC_VER ) && !defined(__clang__)
-# define nssv_COMPILER_MSVC_VER (_MSC_VER )
-# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) )
-#else
-# define nssv_COMPILER_MSVC_VER 0
-# define nssv_COMPILER_MSVC_VERSION 0
-#endif
-
-#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) )
-
-#if defined(__clang__)
-# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
-#else
-# define nssv_COMPILER_CLANG_VERSION 0
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__)
-# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
-#else
-# define nssv_COMPILER_GNUC_VERSION 0
-#endif
-
-// half-open range [lo..hi):
-#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) )
-
-// Presence of language and library features:
-
-#ifdef _HAS_CPP0X
-# define nssv_HAS_CPP0X _HAS_CPP0X
-#else
-# define nssv_HAS_CPP0X 0
-#endif
-
-// Unless defined otherwise below, consider VC14 as C++11 for variant-lite:
-
-#if nssv_COMPILER_MSVC_VER >= 1900
-# undef nssv_CPP11_OR_GREATER
-# define nssv_CPP11_OR_GREATER 1
-#endif
-
-#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500)
-#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600)
-#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700)
-#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800)
-#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900)
-#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910)
-
-#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER)
-#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER)
-
-// Presence of C++11 language features:
-
-#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140
-#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140
-#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140
-#define nssv_HAVE_NOEXCEPT nssv_CPP11_140
-#define nssv_HAVE_NULLPTR nssv_CPP11_100
-#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140
-#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140
-#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140
-#define nssv_HAVE_WCHAR16_T nssv_CPP11_100
-#define nssv_HAVE_WCHAR32_T nssv_CPP11_100
-
-#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) )
-# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140
-#else
-# define nssv_HAVE_STD_DEFINED_LITERALS 0
-#endif
-
-// Presence of C++14 language features:
-
-#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000
-
-// Presence of C++17 language features:
-
-#define nssv_HAVE_NODISCARD nssv_CPP17_000
-
-// Presence of C++ library features:
-
-#define nssv_HAVE_STD_HASH nssv_CPP11_120
-
-// C++ feature usage:
-
-#if nssv_HAVE_CONSTEXPR_11
-# define nssv_constexpr constexpr
-#else
-# define nssv_constexpr /*constexpr*/
-#endif
-
-#if nssv_HAVE_CONSTEXPR_14
-# define nssv_constexpr14 constexpr
-#else
-# define nssv_constexpr14 /*constexpr*/
-#endif
-
-#if nssv_HAVE_EXPLICIT_CONVERSION
-# define nssv_explicit explicit
-#else
-# define nssv_explicit /*explicit*/
-#endif
-
-#if nssv_HAVE_INLINE_NAMESPACE
-# define nssv_inline_ns inline
-#else
-# define nssv_inline_ns /*inline*/
-#endif
-
-#if nssv_HAVE_NOEXCEPT
-# define nssv_noexcept noexcept
-#else
-# define nssv_noexcept /*noexcept*/
-#endif
-
-//#if nssv_HAVE_REF_QUALIFIER
-//# define nssv_ref_qual &
-//# define nssv_refref_qual &&
-//#else
-//# define nssv_ref_qual /*&*/
-//# define nssv_refref_qual /*&&*/
-//#endif
-
-#if nssv_HAVE_NULLPTR
-# define nssv_nullptr nullptr
-#else
-# define nssv_nullptr NULL
-#endif
-
-#if nssv_HAVE_NODISCARD
-# define nssv_nodiscard [[nodiscard]]
-#else
-# define nssv_nodiscard /*[[nodiscard]]*/
-#endif
-
-// Additional includes:
-
-#include <algorithm>
-#include <cassert>
-#include <iterator>
-#include <limits>
-#include <ostream>
-#include <string> // std::char_traits<>
-
-#if ! nssv_CONFIG_NO_EXCEPTIONS
-# include <stdexcept>
-#endif
-
-#if nssv_CPP11_OR_GREATER
-# include <type_traits>
-#endif
-
-// Clang, GNUC, MSVC warning suppression macros:
-
-#if defined(__clang__)
-# pragma clang diagnostic ignored "-Wreserved-user-defined-literal"
-# pragma clang diagnostic push
-# pragma clang diagnostic ignored "-Wuser-defined-literals"
-#elif defined(__GNUC__)
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wliteral-suffix"
-#endif // __clang__
-
-#if nssv_COMPILER_MSVC_VERSION >= 140
-# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]]
-# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) )
-# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes))
-#else
-# define nssv_SUPPRESS_MSGSL_WARNING(expr)
-# define nssv_SUPPRESS_MSVC_WARNING(code, descr)
-# define nssv_DISABLE_MSVC_WARNINGS(codes)
-#endif
-
-#if defined(__clang__)
-# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop")
-#elif defined(__GNUC__)
-# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop")
-#elif nssv_COMPILER_MSVC_VERSION >= 140
-# define nssv_RESTORE_WARNINGS() __pragma(warning(pop ))
-#else
-# define nssv_RESTORE_WARNINGS()
-#endif
-
-// Suppress the following MSVC (GSL) warnings:
-// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not
-// start with an underscore are reserved
-// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions;
-// use brace initialization, gsl::narrow_cast or gsl::narow
-// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead
-
-nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 )
-//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" )
-//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix )
-
-namespace nonstd { namespace sv_lite {
-
-#if nssv_CPP11_OR_GREATER
-
-namespace detail {
-
-#if nssv_CPP14_OR_GREATER
-
-template< typename CharT >
-inline constexpr std::size_t length( CharT * s, std::size_t result = 0 )
-{
- CharT * v = s;
- std::size_t r = result;
- while ( *v != '\0' ) {
- ++v;
- ++r;
- }
- return r;
-}
-
-#else // nssv_CPP14_OR_GREATER
-
-// Expect tail call optimization to make length() non-recursive:
-
-template< typename CharT >
-inline constexpr std::size_t length( CharT * s, std::size_t result = 0 )
-{
- return *s == '\0' ? result : length( s + 1, result + 1 );
-}
-
-#endif // nssv_CPP14_OR_GREATER
-
-} // namespace detail
-
-#endif // nssv_CPP11_OR_GREATER
-
-template
-<
- class CharT,
- class Traits = std::char_traits<CharT>
->
-class basic_string_view;
-
-//
-// basic_string_view:
-//
-
-template
-<
- class CharT,
- class Traits /* = std::char_traits<CharT> */
->
-class basic_string_view
-{
-public:
- // Member types:
-
- typedef Traits traits_type;
- typedef CharT value_type;
-
- typedef CharT * pointer;
- typedef CharT const * const_pointer;
- typedef CharT & reference;
- typedef CharT const & const_reference;
-
- typedef const_pointer iterator;
- typedef const_pointer const_iterator;
- typedef std::reverse_iterator< const_iterator > reverse_iterator;
- typedef std::reverse_iterator< const_iterator > const_reverse_iterator;
-
- typedef std::size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- // 24.4.2.1 Construction and assignment:
-
- nssv_constexpr basic_string_view() nssv_noexcept
- : data_( nssv_nullptr )
- , size_( 0 )
- {}
-
-#if nssv_CPP11_OR_GREATER
- nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default;
-#else
- nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept
- : data_( other.data_)
- , size_( other.size_)
- {}
-#endif
-
- nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept
- : data_( s )
- , size_( count )
- {}
-
- nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept
- : data_( s )
-#if nssv_CPP17_OR_GREATER
- , size_( Traits::length(s) )
-#elif nssv_CPP11_OR_GREATER
- , size_( detail::length(s) )
-#else
- , size_( Traits::length(s) )
-#endif
- {}
-
- // Assignment:
-
-#if nssv_CPP11_OR_GREATER
- nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default;
-#else
- nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept
- {
- data_ = other.data_;
- size_ = other.size_;
- return *this;
- }
-#endif
-
- // 24.4.2.2 Iterator support:
-
- nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; }
- nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; }
-
- nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); }
- nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); }
-
- nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); }
- nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); }
-
- nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); }
- nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); }
-
- // 24.4.2.3 Capacity:
-
- nssv_constexpr size_type size() const nssv_noexcept { return size_; }
- nssv_constexpr size_type length() const nssv_noexcept { return size_; }
- nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); }
-
- // since C++20
- nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept
- {
- return 0 == size_;
- }
-
- // 24.4.2.4 Element access:
-
- nssv_constexpr const_reference operator[]( size_type pos ) const
- {
- return data_at( pos );
- }
-
- nssv_constexpr14 const_reference at( size_type pos ) const
- {
-#if nssv_CONFIG_NO_EXCEPTIONS
- assert( pos < size() );
-#else
- if ( pos >= size() )
- {
- throw std::out_of_range("nonstd::string_view::at()");
- }
-#endif
- return data_at( pos );
- }
-
- nssv_constexpr const_reference front() const { return data_at( 0 ); }
- nssv_constexpr const_reference back() const { return data_at( size() - 1 ); }
-
- nssv_constexpr const_pointer data() const nssv_noexcept { return data_; }
-
- // 24.4.2.5 Modifiers:
-
- nssv_constexpr14 void remove_prefix( size_type n )
- {
- assert( n <= size() );
- data_ += n;
- size_ -= n;
- }
-
- nssv_constexpr14 void remove_suffix( size_type n )
- {
- assert( n <= size() );
- size_ -= n;
- }
-
- nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept
- {
- using std::swap;
- swap( data_, other.data_ );
- swap( size_, other.size_ );
- }
-
- // 24.4.2.6 String operations:
-
- size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const
- {
-#if nssv_CONFIG_NO_EXCEPTIONS
- assert( pos <= size() );
-#else
- if ( pos > size() )
- {
- throw std::out_of_range("nonstd::string_view::copy()");
- }
-#endif
- const size_type rlen = (std::min)( n, size() - pos );
-
- (void) Traits::copy( dest, data() + pos, rlen );
-
- return rlen;
- }
-
- nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const
- {
-#if nssv_CONFIG_NO_EXCEPTIONS
- assert( pos <= size() );
-#else
- if ( pos > size() )
- {
- throw std::out_of_range("nonstd::string_view::substr()");
- }
-#endif
- return basic_string_view( data() + pos, (std::min)( n, size() - pos ) );
- }
-
- // compare(), 6x:
-
- nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1)
- {
- if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
- {
- return result;
- }
-
- return size() == other.size() ? 0 : size() < other.size() ? -1 : 1;
- }
-
- nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2)
- {
- return substr( pos1, n1 ).compare( other );
- }
-
- nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3)
- {
- return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) );
- }
-
- nssv_constexpr int compare( CharT const * s ) const // (4)
- {
- return compare( basic_string_view( s ) );
- }
-
- nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5)
- {
- return substr( pos1, n1 ).compare( basic_string_view( s ) );
- }
-
- nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6)
- {
- return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) );
- }
-
- // 24.4.2.7 Searching:
-
- // starts_with(), 3x, since C++20:
-
- nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1)
- {
- return size() >= v.size() && compare( 0, v.size(), v ) == 0;
- }
-
- nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2)
- {
- return starts_with( basic_string_view( &c, 1 ) );
- }
-
- nssv_constexpr bool starts_with( CharT const * s ) const // (3)
- {
- return starts_with( basic_string_view( s ) );
- }
-
- // ends_with(), 3x, since C++20:
-
- nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1)
- {
- return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0;
- }
-
- nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2)
- {
- return ends_with( basic_string_view( &c, 1 ) );
- }
-
- nssv_constexpr bool ends_with( CharT const * s ) const // (3)
- {
- return ends_with( basic_string_view( s ) );
- }
-
- // find(), 4x:
-
- nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
- {
- return assert( v.size() == 0 || v.data() != nssv_nullptr )
- , pos >= size()
- ? npos
- : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
- }
-
- nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
- {
- return find( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3)
- {
- return find( basic_string_view( s, n ), pos );
- }
-
- nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4)
- {
- return find( basic_string_view( s ), pos );
- }
-
- // rfind(), 4x:
-
- nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
- {
- if ( size() < v.size() )
- {
- return npos;
- }
-
- if ( v.empty() )
- {
- return (std::min)( size(), pos );
- }
-
- const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size();
- const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq );
-
- return result != last ? size_type( result - cbegin() ) : npos;
- }
-
- nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
- {
- return rfind( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3)
- {
- return rfind( basic_string_view( s, n ), pos );
- }
-
- nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4)
- {
- return rfind( basic_string_view( s ), pos );
- }
-
- // find_first_of(), 4x:
-
- nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
- {
- return pos >= size()
- ? npos
- : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
- }
-
- nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
- {
- return find_first_of( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3)
- {
- return find_first_of( basic_string_view( s, n ), pos );
- }
-
- nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4)
- {
- return find_first_of( basic_string_view( s ), pos );
- }
-
- // find_last_of(), 4x:
-
- nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
- {
- return empty()
- ? npos
- : pos >= size()
- ? find_last_of( v, size() - 1 )
- : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) );
- }
-
- nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
- {
- return find_last_of( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3)
- {
- return find_last_of( basic_string_view( s, count ), pos );
- }
-
- nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4)
- {
- return find_last_of( basic_string_view( s ), pos );
- }
-
- // find_first_not_of(), 4x:
-
- nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
- {
- return pos >= size()
- ? npos
- : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) );
- }
-
- nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
- {
- return find_first_not_of( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3)
- {
- return find_first_not_of( basic_string_view( s, count ), pos );
- }
-
- nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4)
- {
- return find_first_not_of( basic_string_view( s ), pos );
- }
-
- // find_last_not_of(), 4x:
-
- nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
- {
- return empty()
- ? npos
- : pos >= size()
- ? find_last_not_of( v, size() - 1 )
- : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) );
- }
-
- nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
- {
- return find_last_not_of( basic_string_view( &c, 1 ), pos );
- }
-
- nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3)
- {
- return find_last_not_of( basic_string_view( s, count ), pos );
- }
-
- nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4)
- {
- return find_last_not_of( basic_string_view( s ), pos );
- }
-
- // Constants:
-
-#if nssv_CPP17_OR_GREATER
- static nssv_constexpr size_type npos = size_type(-1);
-#elif nssv_CPP11_OR_GREATER
- enum : size_type { npos = size_type(-1) };
-#else
- enum { npos = size_type(-1) };
-#endif
-
-private:
- struct not_in_view
- {
- const basic_string_view v;
-
- nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {}
-
- nssv_constexpr bool operator()( CharT c ) const
- {
- return npos == v.find_first_of( c );
- }
- };
-
- nssv_constexpr size_type to_pos( const_iterator it ) const
- {
- return it == cend() ? npos : size_type( it - cbegin() );
- }
-
- nssv_constexpr size_type to_pos( const_reverse_iterator it ) const
- {
- return it == crend() ? npos : size_type( crend() - it - 1 );
- }
-
- nssv_constexpr const_reference data_at( size_type pos ) const
- {
-#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 )
- return data_[pos];
-#else
- return assert( pos < size() ), data_[pos];
-#endif
- }
-
-private:
- const_pointer data_;
- size_type size_;
-
-public:
-#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
-
- template< class Allocator >
- basic_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) nssv_noexcept
- : data_( s.data() )
- , size_( s.size() )
- {}
-
-#if nssv_HAVE_EXPLICIT_CONVERSION
-
- template< class Allocator >
- explicit operator std::basic_string<CharT, Traits, Allocator>() const
- {
- return to_string( Allocator() );
- }
-
-#endif // nssv_HAVE_EXPLICIT_CONVERSION
-
-#if nssv_CPP11_OR_GREATER
-
- template< class Allocator = std::allocator<CharT> >
- std::basic_string<CharT, Traits, Allocator>
- to_string( Allocator const & a = Allocator() ) const
- {
- return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
- }
-
-#else
-
- std::basic_string<CharT, Traits>
- to_string() const
- {
- return std::basic_string<CharT, Traits>( begin(), end() );
- }
-
- template< class Allocator >
- std::basic_string<CharT, Traits, Allocator>
- to_string( Allocator const & a ) const
- {
- return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
- }
-
-#endif // nssv_CPP11_OR_GREATER
-
-#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
-};
-
-//
-// Non-member functions:
-//
-
-// 24.4.3 Non-member comparison functions:
-// lexicographically compare two string views (function template):
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator== (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) == 0 ; }
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator!= (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) != 0 ; }
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator< (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) < 0 ; }
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator<= (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) <= 0 ; }
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator> (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) > 0 ; }
-
-template< class CharT, class Traits >
-nssv_constexpr bool operator>= (
- basic_string_view <CharT, Traits> lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) >= 0 ; }
-
-// Let S be basic_string_view<CharT, Traits>, and sv be an instance of S.
-// Implementations shall provide sufficient additional overloads marked
-// constexpr and noexcept so that an object t with an implicit conversion
-// to S can be compared according to Table 67.
-
-#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 )
-
-// accomodate for older compilers:
-
-// ==
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator==(
- basic_string_view<CharT, Traits> lhs,
- CharT const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) == 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator==(
- CharT const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) == 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator==(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator==(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
-
-// !=
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator!=(
- basic_string_view<CharT, Traits> lhs,
- char const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) != 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator!=(
- char const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) != 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator!=(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator!=(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; }
-
-// <
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<(
- basic_string_view<CharT, Traits> lhs,
- char const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) < 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<(
- char const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) > 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) < 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return rhs.compare( lhs ) > 0; }
-
-// <=
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<=(
- basic_string_view<CharT, Traits> lhs,
- char const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) <= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<=(
- char const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) >= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<=(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) <= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator<=(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return rhs.compare( lhs ) >= 0; }
-
-// >
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>(
- basic_string_view<CharT, Traits> lhs,
- char const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) > 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>(
- char const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) < 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) > 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return rhs.compare( lhs ) < 0; }
-
-// >=
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>=(
- basic_string_view<CharT, Traits> lhs,
- char const * rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) >= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>=(
- char const * lhs,
- basic_string_view<CharT, Traits> rhs ) nssv_noexcept
-{ return rhs.compare( lhs ) <= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>=(
- basic_string_view<CharT, Traits> lhs,
- std::basic_string<CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) >= 0; }
-
-template< class CharT, class Traits>
-nssv_constexpr bool operator>=(
- std::basic_string<CharT, Traits> rhs,
- basic_string_view<CharT, Traits> lhs ) nssv_noexcept
-{ return rhs.compare( lhs ) <= 0; }
-
-#else // newer compilers:
-
-#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view<T,U> >::type
-
-#if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 )
-# define nssv_MSVC_ORDER(x) , int=x
-#else
-# define nssv_MSVC_ORDER(x) /*, int=x*/
-#endif
-
-// ==
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator==(
- basic_string_view <CharT, Traits> lhs,
- nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) == 0; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator==(
- nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs,
- basic_string_view <CharT, Traits> rhs ) nssv_noexcept
-{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
-
-// !=
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator!= (
- basic_string_view < CharT, Traits > lhs,
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
-{ return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator!= (
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
- basic_string_view < CharT, Traits > rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) != 0 ; }
-
-// <
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator< (
- basic_string_view < CharT, Traits > lhs,
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) < 0 ; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator< (
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
- basic_string_view < CharT, Traits > rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) < 0 ; }
-
-// <=
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator<= (
- basic_string_view < CharT, Traits > lhs,
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) <= 0 ; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator<= (
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
- basic_string_view < CharT, Traits > rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) <= 0 ; }
-
-// >
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator> (
- basic_string_view < CharT, Traits > lhs,
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) > 0 ; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator> (
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
- basic_string_view < CharT, Traits > rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) > 0 ; }
-
-// >=
-
-template< class CharT, class Traits nssv_MSVC_ORDER(1) >
-nssv_constexpr bool operator>= (
- basic_string_view < CharT, Traits > lhs,
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) >= 0 ; }
-
-template< class CharT, class Traits nssv_MSVC_ORDER(2) >
-nssv_constexpr bool operator>= (
- nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
- basic_string_view < CharT, Traits > rhs ) nssv_noexcept
-{ return lhs.compare( rhs ) >= 0 ; }
-
-#undef nssv_MSVC_ORDER
-#undef nssv_BASIC_STRING_VIEW_I
-
-#endif // compiler-dependent approach to comparisons
-
-// 24.4.4 Inserters and extractors:
-
-namespace detail {
-
-template< class Stream >
-void write_padding( Stream & os, std::streamsize n )
-{
- for ( std::streamsize i = 0; i < n; ++i )
- os.rdbuf()->sputc( os.fill() );
-}
-
-template< class Stream, class View >
-Stream & write_to_stream( Stream & os, View const & sv )
-{
- typename Stream::sentry sentry( os );
-
- if ( !os )
- return os;
-
- const std::streamsize length = static_cast<std::streamsize>( sv.length() );
-
- // Whether, and how, to pad:
- const bool pad = ( length < os.width() );
- const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right;
-
- if ( left_pad )
- write_padding( os, os.width() - length );
-
- // Write span characters:
- os.rdbuf()->sputn( sv.begin(), length );
-
- if ( pad && !left_pad )
- write_padding( os, os.width() - length );
-
- // Reset output stream width:
- os.width( 0 );
-
- return os;
-}
-
-} // namespace detail
-
-template< class CharT, class Traits >
-std::basic_ostream<CharT, Traits> &
-operator<<(
- std::basic_ostream<CharT, Traits>& os,
- basic_string_view <CharT, Traits> sv )
-{
- return detail::write_to_stream( os, sv );
-}
-
-// Several typedefs for common character types are provided:
-
-typedef basic_string_view<char> string_view;
-typedef basic_string_view<wchar_t> wstring_view;
-#if nssv_HAVE_WCHAR16_T
-typedef basic_string_view<char16_t> u16string_view;
-typedef basic_string_view<char32_t> u32string_view;
-#endif
-
-}} // namespace nonstd::sv_lite
-
-//
-// 24.4.6 Suffix for basic_string_view literals:
-//
-
-#if nssv_HAVE_USER_DEFINED_LITERALS
-
-namespace nonstd {
-nssv_inline_ns namespace literals {
-nssv_inline_ns namespace string_view_literals {
-
-#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
-
-nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
-{
- return nonstd::sv_lite::string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
-{
- return nonstd::sv_lite::u16string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
-{
- return nonstd::sv_lite::u32string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
-{
- return nonstd::sv_lite::wstring_view{ str, len };
-}
-
-#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
-
-#if nssv_CONFIG_USR_SV_OPERATOR
-
-nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
-{
- return nonstd::sv_lite::string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
-{
- return nonstd::sv_lite::u16string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
-{
- return nonstd::sv_lite::u32string_view{ str, len };
-}
-
-nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
-{
- return nonstd::sv_lite::wstring_view{ str, len };
-}
-
-#endif // nssv_CONFIG_USR_SV_OPERATOR
-
-}}} // namespace nonstd::literals::string_view_literals
-
-#endif
-
-//
-// Extensions for std::string:
-//
-
-#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-
-namespace nonstd {
-namespace sv_lite {
-
-// Exclude MSVC 14 (19.00): it yields ambiguous to_string():
-
-#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140
-
-template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
-std::basic_string<CharT, Traits, Allocator>
-to_string( basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
-{
- return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
-}
-
-#else
-
-template< class CharT, class Traits >
-std::basic_string<CharT, Traits>
-to_string( basic_string_view<CharT, Traits> v )
-{
- return std::basic_string<CharT, Traits>( v.begin(), v.end() );
-}
-
-template< class CharT, class Traits, class Allocator >
-std::basic_string<CharT, Traits, Allocator>
-to_string( basic_string_view<CharT, Traits> v, Allocator const & a )
-{
- return std::basic_string<CharT, Traits, Allocator>( v.begin(), v.end(), a );
-}
-
-#endif // nssv_CPP11_OR_GREATER
-
-template< class CharT, class Traits, class Allocator >
-basic_string_view<CharT, Traits>
-to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
-{
- return basic_string_view<CharT, Traits>( s.data(), s.size() );
-}
-
-}} // namespace nonstd::sv_lite
-
-#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-
-//
-// make types and algorithms available in namespace nonstd:
-//
-
-namespace nonstd {
-
-using sv_lite::basic_string_view;
-using sv_lite::string_view;
-using sv_lite::wstring_view;
-
-#if nssv_HAVE_WCHAR16_T
-using sv_lite::u16string_view;
-#endif
-#if nssv_HAVE_WCHAR32_T
-using sv_lite::u32string_view;
-#endif
-
-// literal "sv"
-
-using sv_lite::operator==;
-using sv_lite::operator!=;
-using sv_lite::operator<;
-using sv_lite::operator<=;
-using sv_lite::operator>;
-using sv_lite::operator>=;
-
-using sv_lite::operator<<;
-
-#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
-using sv_lite::to_string;
-using sv_lite::to_string_view;
-#endif
-
-} // namespace nonstd
-
-// 24.4.5 Hash support (C++11):
-
-// Note: The hash value of a string view object is equal to the hash value of
-// the corresponding string object.
-
-#if nssv_HAVE_STD_HASH
-
-#include <functional>
-
-namespace std {
-
-template<>
-struct hash< nonstd::string_view >
-{
-public:
- std::size_t operator()( nonstd::string_view v ) const nssv_noexcept
- {
- return std::hash<std::string>()( std::string( v.data(), v.size() ) );
- }
-};
-
-template<>
-struct hash< nonstd::wstring_view >
-{
-public:
- std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept
- {
- return std::hash<std::wstring>()( std::wstring( v.data(), v.size() ) );
- }
-};
-
-template<>
-struct hash< nonstd::u16string_view >
-{
-public:
- std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept
- {
- return std::hash<std::u16string>()( std::u16string( v.data(), v.size() ) );
- }
-};
-
-template<>
-struct hash< nonstd::u32string_view >
-{
-public:
- std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept
- {
- return std::hash<std::u32string>()( std::u32string( v.data(), v.size() ) );
- }
-};
-
-} // namespace std
-
-#endif // nssv_HAVE_STD_HASH
-
-nssv_RESTORE_WARNINGS()
-
-#endif // nssv_HAVE_STD_STRING_VIEW
-#endif // NONSTD_SV_LITE_H_INCLUDED
+// Vendored from git changeset v1.4.0
+
+// Copyright 2017-2020 by Martin Moene
+//
+// string-view lite, a C++17-like string_view for C++98 and later.
+// For more information see https://github.com/martinmoene/string-view-lite
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#ifndef NONSTD_SV_LITE_H_INCLUDED
+#define NONSTD_SV_LITE_H_INCLUDED
+
+#define string_view_lite_MAJOR 1
+#define string_view_lite_MINOR 4
+#define string_view_lite_PATCH 0
+
+#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH)
+
+#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x )
+#define nssv_STRINGIFY_( x ) #x
+
+// string-view lite configuration:
+
+#define nssv_STRING_VIEW_DEFAULT 0
+#define nssv_STRING_VIEW_NONSTD 1
+#define nssv_STRING_VIEW_STD 2
+
+#if !defined( nssv_CONFIG_SELECT_STRING_VIEW )
+# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD )
+#endif
+
+#if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW )
+# error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_...
+#endif
+
+#ifndef nssv_CONFIG_STD_SV_OPERATOR
+# define nssv_CONFIG_STD_SV_OPERATOR 0
+#endif
+
+#ifndef nssv_CONFIG_USR_SV_OPERATOR
+# define nssv_CONFIG_USR_SV_OPERATOR 1
+#endif
+
+#ifdef nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING
+#endif
+
+#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1
+#endif
+
+#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1
+#endif
+
+// Control presence of exception handling (try and auto discover):
+
+#ifndef nssv_CONFIG_NO_EXCEPTIONS
+# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+# define nssv_CONFIG_NO_EXCEPTIONS 0
+# else
+# define nssv_CONFIG_NO_EXCEPTIONS 1
+# endif
+#endif
+
+// C++ language version detection (C++20 is speculative):
+// Note: VC14.0/1900 (VS2015) lacks too much from C++14.
+
+#ifndef nssv_CPLUSPLUS
+# if defined(_MSVC_LANG ) && !defined(__clang__)
+# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG )
+# else
+# define nssv_CPLUSPLUS __cplusplus
+# endif
+#endif
+
+#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L )
+#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L )
+#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L )
+#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L )
+
+// use C++17 std::string_view if available and requested:
+
+#if nssv_CPP17_OR_GREATER && defined(__has_include )
+# if __has_include( <string_view> )
+# define nssv_HAVE_STD_STRING_VIEW 1
+# else
+# define nssv_HAVE_STD_STRING_VIEW 0
+# endif
+#else
+# define nssv_HAVE_STD_STRING_VIEW 0
+#endif
+
+#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) )
+
+#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW )
+#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH
+
+//
+// Use C++17 std::string_view:
+//
+
+#if nssv_USES_STD_STRING_VIEW
+
+#include <string_view>
+
+// Extensions for std::string:
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( std::basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+ return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+ return std::basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+// Literal operators sv and _sv:
+
+#if nssv_CONFIG_STD_SV_OPERATOR
+
+using namespace std::literals::string_view_literals;
+
+#endif
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+inline namespace literals {
+inline namespace string_view_literals {
+
+
+constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
+{
+ return std::string_view{ str, len };
+}
+
+constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
+{
+ return std::u16string_view{ str, len };
+}
+
+constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
+{
+ return std::u32string_view{ str, len };
+}
+
+constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
+{
+ return std::wstring_view{ str, len };
+}
+
+}} // namespace literals::string_view_literals
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+} // namespace nonstd
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+using std::string_view;
+using std::wstring_view;
+using std::u16string_view;
+using std::u32string_view;
+using std::basic_string_view;
+
+// literal "sv" and "_sv", see above
+
+using std::operator==;
+using std::operator!=;
+using std::operator<;
+using std::operator<=;
+using std::operator>;
+using std::operator>=;
+
+using std::operator<<;
+
+} // namespace nonstd
+
+#else // nssv_HAVE_STD_STRING_VIEW
+
+//
+// Before C++17: use string_view lite:
+//
+
+// Compiler versions:
+//
+// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0)
+// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002)
+// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003)
+// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005)
+// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008)
+// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010)
+// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012)
+// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013)
+// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015)
+// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017)
+// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019)
+
+#if defined(_MSC_VER ) && !defined(__clang__)
+# define nssv_COMPILER_MSVC_VER (_MSC_VER )
+# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) )
+#else
+# define nssv_COMPILER_MSVC_VER 0
+# define nssv_COMPILER_MSVC_VERSION 0
+#endif
+
+#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) )
+
+#if defined(__clang__)
+# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
+#else
+# define nssv_COMPILER_CLANG_VERSION 0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#else
+# define nssv_COMPILER_GNUC_VERSION 0
+#endif
+
+// half-open range [lo..hi):
+#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) )
+
+// Presence of language and library features:
+
+#ifdef _HAS_CPP0X
+# define nssv_HAS_CPP0X _HAS_CPP0X
+#else
+# define nssv_HAS_CPP0X 0
+#endif
+
+// Unless defined otherwise below, consider VC14 as C++11 for variant-lite:
+
+#if nssv_COMPILER_MSVC_VER >= 1900
+# undef nssv_CPP11_OR_GREATER
+# define nssv_CPP11_OR_GREATER 1
+#endif
+
+#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500)
+#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600)
+#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700)
+#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800)
+#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900)
+#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910)
+
+#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER)
+#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER)
+
+// Presence of C++11 language features:
+
+#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140
+#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140
+#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140
+#define nssv_HAVE_NOEXCEPT nssv_CPP11_140
+#define nssv_HAVE_NULLPTR nssv_CPP11_100
+#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140
+#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140
+#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140
+#define nssv_HAVE_WCHAR16_T nssv_CPP11_100
+#define nssv_HAVE_WCHAR32_T nssv_CPP11_100
+
+#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) )
+# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140
+#else
+# define nssv_HAVE_STD_DEFINED_LITERALS 0
+#endif
+
+// Presence of C++14 language features:
+
+#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000
+
+// Presence of C++17 language features:
+
+#define nssv_HAVE_NODISCARD nssv_CPP17_000
+
+// Presence of C++ library features:
+
+#define nssv_HAVE_STD_HASH nssv_CPP11_120
+
+// C++ feature usage:
+
+#if nssv_HAVE_CONSTEXPR_11
+# define nssv_constexpr constexpr
+#else
+# define nssv_constexpr /*constexpr*/
+#endif
+
+#if nssv_HAVE_CONSTEXPR_14
+# define nssv_constexpr14 constexpr
+#else
+# define nssv_constexpr14 /*constexpr*/
+#endif
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+# define nssv_explicit explicit
+#else
+# define nssv_explicit /*explicit*/
+#endif
+
+#if nssv_HAVE_INLINE_NAMESPACE
+# define nssv_inline_ns inline
+#else
+# define nssv_inline_ns /*inline*/
+#endif
+
+#if nssv_HAVE_NOEXCEPT
+# define nssv_noexcept noexcept
+#else
+# define nssv_noexcept /*noexcept*/
+#endif
+
+//#if nssv_HAVE_REF_QUALIFIER
+//# define nssv_ref_qual &
+//# define nssv_refref_qual &&
+//#else
+//# define nssv_ref_qual /*&*/
+//# define nssv_refref_qual /*&&*/
+//#endif
+
+#if nssv_HAVE_NULLPTR
+# define nssv_nullptr nullptr
+#else
+# define nssv_nullptr NULL
+#endif
+
+#if nssv_HAVE_NODISCARD
+# define nssv_nodiscard [[nodiscard]]
+#else
+# define nssv_nodiscard /*[[nodiscard]]*/
+#endif
+
+// Additional includes:
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <ostream>
+#include <string> // std::char_traits<>
+
+#if ! nssv_CONFIG_NO_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#if nssv_CPP11_OR_GREATER
+# include <type_traits>
+#endif
+
+// Clang, GNUC, MSVC warning suppression macros:
+
+#if defined(__clang__)
+# pragma clang diagnostic ignored "-Wreserved-user-defined-literal"
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wuser-defined-literals"
+#elif defined(__GNUC__)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wliteral-suffix"
+#endif // __clang__
+
+#if nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]]
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) )
+# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes))
+#else
+# define nssv_SUPPRESS_MSGSL_WARNING(expr)
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr)
+# define nssv_DISABLE_MSVC_WARNINGS(codes)
+#endif
+
+#if defined(__clang__)
+# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop")
+#elif defined(__GNUC__)
+# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop")
+#elif nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_RESTORE_WARNINGS() __pragma(warning(pop ))
+#else
+# define nssv_RESTORE_WARNINGS()
+#endif
+
+// Suppress the following MSVC (GSL) warnings:
+// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not
+// start with an underscore are reserved
+// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions;
+// use brace initialization, gsl::narrow_cast or gsl::narow
+// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead
+
+nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 )
+//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" )
+//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix )
+
+namespace nonstd { namespace sv_lite {
+
+#if nssv_CPP11_OR_GREATER
+
+namespace detail {
+
+#if nssv_CPP14_OR_GREATER
+
+template< typename CharT >
+inline constexpr std::size_t length( CharT * s, std::size_t result = 0 )
+{
+ CharT * v = s;
+ std::size_t r = result;
+ while ( *v != '\0' ) {
+ ++v;
+ ++r;
+ }
+ return r;
+}
+
+#else // nssv_CPP14_OR_GREATER
+
+// Expect tail call optimization to make length() non-recursive:
+
+template< typename CharT >
+inline constexpr std::size_t length( CharT * s, std::size_t result = 0 )
+{
+ return *s == '\0' ? result : length( s + 1, result + 1 );
+}
+
+#endif // nssv_CPP14_OR_GREATER
+
+} // namespace detail
+
+#endif // nssv_CPP11_OR_GREATER
+
+template
+<
+ class CharT,
+ class Traits = std::char_traits<CharT>
+>
+class basic_string_view;
+
+//
+// basic_string_view:
+//
+
+template
+<
+ class CharT,
+ class Traits /* = std::char_traits<CharT> */
+>
+class basic_string_view
+{
+public:
+ // Member types:
+
+ typedef Traits traits_type;
+ typedef CharT value_type;
+
+ typedef CharT * pointer;
+ typedef CharT const * const_pointer;
+ typedef CharT & reference;
+ typedef CharT const & const_reference;
+
+ typedef const_pointer iterator;
+ typedef const_pointer const_iterator;
+ typedef std::reverse_iterator< const_iterator > reverse_iterator;
+ typedef std::reverse_iterator< const_iterator > const_reverse_iterator;
+
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+
+ // 24.4.2.1 Construction and assignment:
+
+ nssv_constexpr basic_string_view() nssv_noexcept
+ : data_( nssv_nullptr )
+ , size_( 0 )
+ {}
+
+#if nssv_CPP11_OR_GREATER
+ nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default;
+#else
+ nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept
+ : data_( other.data_)
+ , size_( other.size_)
+ {}
+#endif
+
+ nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept
+ : data_( s )
+ , size_( count )
+ {}
+
+ nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept
+ : data_( s )
+#if nssv_CPP17_OR_GREATER
+ , size_( Traits::length(s) )
+#elif nssv_CPP11_OR_GREATER
+ , size_( detail::length(s) )
+#else
+ , size_( Traits::length(s) )
+#endif
+ {}
+
+ // Assignment:
+
+#if nssv_CPP11_OR_GREATER
+ nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default;
+#else
+ nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept
+ {
+ data_ = other.data_;
+ size_ = other.size_;
+ return *this;
+ }
+#endif
+
+ // 24.4.2.2 Iterator support:
+
+ nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; }
+ nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; }
+
+ nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); }
+ nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); }
+
+ nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); }
+ nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); }
+
+ nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); }
+ nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); }
+
+ // 24.4.2.3 Capacity:
+
+ nssv_constexpr size_type size() const nssv_noexcept { return size_; }
+ nssv_constexpr size_type length() const nssv_noexcept { return size_; }
+ nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); }
+
+ // since C++20
+ nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept
+ {
+ return 0 == size_;
+ }
+
+ // 24.4.2.4 Element access:
+
+ nssv_constexpr const_reference operator[]( size_type pos ) const
+ {
+ return data_at( pos );
+ }
+
+ nssv_constexpr14 const_reference at( size_type pos ) const
+ {
+#if nssv_CONFIG_NO_EXCEPTIONS
+ assert( pos < size() );
+#else
+ if ( pos >= size() )
+ {
+ throw std::out_of_range("nonstd::string_view::at()");
+ }
+#endif
+ return data_at( pos );
+ }
+
+ nssv_constexpr const_reference front() const { return data_at( 0 ); }
+ nssv_constexpr const_reference back() const { return data_at( size() - 1 ); }
+
+ nssv_constexpr const_pointer data() const nssv_noexcept { return data_; }
+
+ // 24.4.2.5 Modifiers:
+
+ nssv_constexpr14 void remove_prefix( size_type n )
+ {
+ assert( n <= size() );
+ data_ += n;
+ size_ -= n;
+ }
+
+ nssv_constexpr14 void remove_suffix( size_type n )
+ {
+ assert( n <= size() );
+ size_ -= n;
+ }
+
+ nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept
+ {
+ using std::swap;
+ swap( data_, other.data_ );
+ swap( size_, other.size_ );
+ }
+
+ // 24.4.2.6 String operations:
+
+ size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const
+ {
+#if nssv_CONFIG_NO_EXCEPTIONS
+ assert( pos <= size() );
+#else
+ if ( pos > size() )
+ {
+ throw std::out_of_range("nonstd::string_view::copy()");
+ }
+#endif
+ const size_type rlen = (std::min)( n, size() - pos );
+
+ (void) Traits::copy( dest, data() + pos, rlen );
+
+ return rlen;
+ }
+
+ nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const
+ {
+#if nssv_CONFIG_NO_EXCEPTIONS
+ assert( pos <= size() );
+#else
+ if ( pos > size() )
+ {
+ throw std::out_of_range("nonstd::string_view::substr()");
+ }
+#endif
+ return basic_string_view( data() + pos, (std::min)( n, size() - pos ) );
+ }
+
+ // compare(), 6x:
+
+ nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1)
+ {
+ if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
+ {
+ return result;
+ }
+
+ return size() == other.size() ? 0 : size() < other.size() ? -1 : 1;
+ }
+
+ nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2)
+ {
+ return substr( pos1, n1 ).compare( other );
+ }
+
+ nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3)
+ {
+ return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) );
+ }
+
+ nssv_constexpr int compare( CharT const * s ) const // (4)
+ {
+ return compare( basic_string_view( s ) );
+ }
+
+ nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5)
+ {
+ return substr( pos1, n1 ).compare( basic_string_view( s ) );
+ }
+
+ nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6)
+ {
+ return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) );
+ }
+
+ // 24.4.2.7 Searching:
+
+ // starts_with(), 3x, since C++20:
+
+ nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1)
+ {
+ return size() >= v.size() && compare( 0, v.size(), v ) == 0;
+ }
+
+ nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2)
+ {
+ return starts_with( basic_string_view( &c, 1 ) );
+ }
+
+ nssv_constexpr bool starts_with( CharT const * s ) const // (3)
+ {
+ return starts_with( basic_string_view( s ) );
+ }
+
+ // ends_with(), 3x, since C++20:
+
+ nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1)
+ {
+ return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0;
+ }
+
+ nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2)
+ {
+ return ends_with( basic_string_view( &c, 1 ) );
+ }
+
+ nssv_constexpr bool ends_with( CharT const * s ) const // (3)
+ {
+ return ends_with( basic_string_view( s ) );
+ }
+
+ // find(), 4x:
+
+ nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
+ {
+ return assert( v.size() == 0 || v.data() != nssv_nullptr )
+ , pos >= size()
+ ? npos
+ : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+ }
+
+ nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
+ {
+ return find( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3)
+ {
+ return find( basic_string_view( s, n ), pos );
+ }
+
+ nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4)
+ {
+ return find( basic_string_view( s ), pos );
+ }
+
+ // rfind(), 4x:
+
+ nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
+ {
+ if ( size() < v.size() )
+ {
+ return npos;
+ }
+
+ if ( v.empty() )
+ {
+ return (std::min)( size(), pos );
+ }
+
+ const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size();
+ const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq );
+
+ return result != last ? size_type( result - cbegin() ) : npos;
+ }
+
+ nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
+ {
+ return rfind( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3)
+ {
+ return rfind( basic_string_view( s, n ), pos );
+ }
+
+ nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4)
+ {
+ return rfind( basic_string_view( s ), pos );
+ }
+
+ // find_first_of(), 4x:
+
+ nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
+ {
+ return pos >= size()
+ ? npos
+ : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+ }
+
+ nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
+ {
+ return find_first_of( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3)
+ {
+ return find_first_of( basic_string_view( s, n ), pos );
+ }
+
+ nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4)
+ {
+ return find_first_of( basic_string_view( s ), pos );
+ }
+
+ // find_last_of(), 4x:
+
+ nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
+ {
+ return empty()
+ ? npos
+ : pos >= size()
+ ? find_last_of( v, size() - 1 )
+ : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) );
+ }
+
+ nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
+ {
+ return find_last_of( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3)
+ {
+ return find_last_of( basic_string_view( s, count ), pos );
+ }
+
+ nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4)
+ {
+ return find_last_of( basic_string_view( s ), pos );
+ }
+
+ // find_first_not_of(), 4x:
+
+ nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1)
+ {
+ return pos >= size()
+ ? npos
+ : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) );
+ }
+
+ nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2)
+ {
+ return find_first_not_of( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3)
+ {
+ return find_first_not_of( basic_string_view( s, count ), pos );
+ }
+
+ nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4)
+ {
+ return find_first_not_of( basic_string_view( s ), pos );
+ }
+
+ // find_last_not_of(), 4x:
+
+ nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1)
+ {
+ return empty()
+ ? npos
+ : pos >= size()
+ ? find_last_not_of( v, size() - 1 )
+ : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) );
+ }
+
+ nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2)
+ {
+ return find_last_not_of( basic_string_view( &c, 1 ), pos );
+ }
+
+ nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3)
+ {
+ return find_last_not_of( basic_string_view( s, count ), pos );
+ }
+
+ nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4)
+ {
+ return find_last_not_of( basic_string_view( s ), pos );
+ }
+
+ // Constants:
+
+#if nssv_CPP17_OR_GREATER
+ static nssv_constexpr size_type npos = size_type(-1);
+#elif nssv_CPP11_OR_GREATER
+ enum : size_type { npos = size_type(-1) };
+#else
+ enum { npos = size_type(-1) };
+#endif
+
+private:
+ struct not_in_view
+ {
+ const basic_string_view v;
+
+ nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {}
+
+ nssv_constexpr bool operator()( CharT c ) const
+ {
+ return npos == v.find_first_of( c );
+ }
+ };
+
+ nssv_constexpr size_type to_pos( const_iterator it ) const
+ {
+ return it == cend() ? npos : size_type( it - cbegin() );
+ }
+
+ nssv_constexpr size_type to_pos( const_reverse_iterator it ) const
+ {
+ return it == crend() ? npos : size_type( crend() - it - 1 );
+ }
+
+ nssv_constexpr const_reference data_at( size_type pos ) const
+ {
+#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 )
+ return data_[pos];
+#else
+ return assert( pos < size() ), data_[pos];
+#endif
+ }
+
+private:
+ const_pointer data_;
+ size_type size_;
+
+public:
+#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+
+ template< class Allocator >
+ basic_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) nssv_noexcept
+ : data_( s.data() )
+ , size_( s.size() )
+ {}
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+
+ template< class Allocator >
+ explicit operator std::basic_string<CharT, Traits, Allocator>() const
+ {
+ return to_string( Allocator() );
+ }
+
+#endif // nssv_HAVE_EXPLICIT_CONVERSION
+
+#if nssv_CPP11_OR_GREATER
+
+ template< class Allocator = std::allocator<CharT> >
+ std::basic_string<CharT, Traits, Allocator>
+ to_string( Allocator const & a = Allocator() ) const
+ {
+ return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+ }
+
+#else
+
+ std::basic_string<CharT, Traits>
+ to_string() const
+ {
+ return std::basic_string<CharT, Traits>( begin(), end() );
+ }
+
+ template< class Allocator >
+ std::basic_string<CharT, Traits, Allocator>
+ to_string( Allocator const & a ) const
+ {
+ return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+ }
+
+#endif // nssv_CPP11_OR_GREATER
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+};
+
+//
+// Non-member functions:
+//
+
+// 24.4.3 Non-member comparison functions:
+// lexicographically compare two string views (function template):
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator== (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) == 0 ; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator!= (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) != 0 ; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator< (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0 ; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator<= (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0 ; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator> (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0 ; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator>= (
+ basic_string_view <CharT, Traits> lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0 ; }
+
+// Let S be basic_string_view<CharT, Traits>, and sv be an instance of S.
+// Implementations shall provide sufficient additional overloads marked
+// constexpr and noexcept so that an object t with an implicit conversion
+// to S can be compared according to Table 67.
+
+#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 )
+
+// accomodate for older compilers:
+
+// ==
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+ basic_string_view<CharT, Traits> lhs,
+ CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+ CharT const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+ basic_string_view<CharT, Traits> lhs,
+ char const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) != 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+ char const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) != 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; }
+
+// <
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+ basic_string_view<CharT, Traits> lhs,
+ char const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+ char const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+// <=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+ basic_string_view<CharT, Traits> lhs,
+ char const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+ char const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+// >
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+ basic_string_view<CharT, Traits> lhs,
+ char const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+ char const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+// >=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+ basic_string_view<CharT, Traits> lhs,
+ char const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+ char const * lhs,
+ basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+ basic_string_view<CharT, Traits> lhs,
+ std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+ std::basic_string<CharT, Traits> rhs,
+ basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+#else // newer compilers:
+
+#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view<T,U> >::type
+
+#if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 )
+# define nssv_MSVC_ORDER(x) , int=x
+#else
+# define nssv_MSVC_ORDER(x) /*, int=x*/
+#endif
+
+// ==
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator==(
+ basic_string_view <CharT, Traits> lhs,
+ nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator==(
+ nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs,
+ basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator!= (
+ basic_string_view < CharT, Traits > lhs,
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator!= (
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+ basic_string_view < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) != 0 ; }
+
+// <
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator< (
+ basic_string_view < CharT, Traits > lhs,
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0 ; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator< (
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+ basic_string_view < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0 ; }
+
+// <=
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator<= (
+ basic_string_view < CharT, Traits > lhs,
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0 ; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator<= (
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+ basic_string_view < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0 ; }
+
+// >
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator> (
+ basic_string_view < CharT, Traits > lhs,
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0 ; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator> (
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+ basic_string_view < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0 ; }
+
+// >=
+
+template< class CharT, class Traits nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator>= (
+ basic_string_view < CharT, Traits > lhs,
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0 ; }
+
+template< class CharT, class Traits nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator>= (
+ nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+ basic_string_view < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0 ; }
+
+#undef nssv_MSVC_ORDER
+#undef nssv_BASIC_STRING_VIEW_I
+
+#endif // compiler-dependent approach to comparisons
+
+// 24.4.4 Inserters and extractors:
+
+namespace detail {
+
+template< class Stream >
+void write_padding( Stream & os, std::streamsize n )
+{
+ for ( std::streamsize i = 0; i < n; ++i )
+ os.rdbuf()->sputc( os.fill() );
+}
+
+template< class Stream, class View >
+Stream & write_to_stream( Stream & os, View const & sv )
+{
+ typename Stream::sentry sentry( os );
+
+ if ( !os )
+ return os;
+
+ const std::streamsize length = static_cast<std::streamsize>( sv.length() );
+
+ // Whether, and how, to pad:
+ const bool pad = ( length < os.width() );
+ const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right;
+
+ if ( left_pad )
+ write_padding( os, os.width() - length );
+
+ // Write span characters:
+ os.rdbuf()->sputn( sv.begin(), length );
+
+ if ( pad && !left_pad )
+ write_padding( os, os.width() - length );
+
+ // Reset output stream width:
+ os.width( 0 );
+
+ return os;
+}
+
+} // namespace detail
+
+template< class CharT, class Traits >
+std::basic_ostream<CharT, Traits> &
+operator<<(
+ std::basic_ostream<CharT, Traits>& os,
+ basic_string_view <CharT, Traits> sv )
+{
+ return detail::write_to_stream( os, sv );
+}
+
+// Several typedefs for common character types are provided:
+
+typedef basic_string_view<char> string_view;
+typedef basic_string_view<wchar_t> wstring_view;
+#if nssv_HAVE_WCHAR16_T
+typedef basic_string_view<char16_t> u16string_view;
+typedef basic_string_view<char32_t> u32string_view;
+#endif
+
+}} // namespace nonstd::sv_lite
+
+//
+// 24.4.6 Suffix for basic_string_view literals:
+//
+
+#if nssv_HAVE_USER_DEFINED_LITERALS
+
+namespace nonstd {
+nssv_inline_ns namespace literals {
+nssv_inline_ns namespace string_view_literals {
+
+#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
+{
+ return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
+{
+ return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
+{
+ return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
+{
+ return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
+{
+ return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
+{
+ return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
+{
+ return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
+{
+ return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+}}} // namespace nonstd::literals::string_view_literals
+
+#endif
+
+//
+// Extensions for std::string:
+//
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+namespace sv_lite {
+
+// Exclude MSVC 14 (19.00): it yields ambiguous to_string():
+
+#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+ return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#else
+
+template< class CharT, class Traits >
+std::basic_string<CharT, Traits>
+to_string( basic_string_view<CharT, Traits> v )
+{
+ return std::basic_string<CharT, Traits>( v.begin(), v.end() );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a )
+{
+ return std::basic_string<CharT, Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#endif // nssv_CPP11_OR_GREATER
+
+template< class CharT, class Traits, class Allocator >
+basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+ return basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+}} // namespace nonstd::sv_lite
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+//
+// make types and algorithms available in namespace nonstd:
+//
+
+namespace nonstd {
+
+using sv_lite::basic_string_view;
+using sv_lite::string_view;
+using sv_lite::wstring_view;
+
+#if nssv_HAVE_WCHAR16_T
+using sv_lite::u16string_view;
+#endif
+#if nssv_HAVE_WCHAR32_T
+using sv_lite::u32string_view;
+#endif
+
+// literal "sv"
+
+using sv_lite::operator==;
+using sv_lite::operator!=;
+using sv_lite::operator<;
+using sv_lite::operator<=;
+using sv_lite::operator>;
+using sv_lite::operator>=;
+
+using sv_lite::operator<<;
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+using sv_lite::to_string;
+using sv_lite::to_string_view;
+#endif
+
+} // namespace nonstd
+
+// 24.4.5 Hash support (C++11):
+
+// Note: The hash value of a string view object is equal to the hash value of
+// the corresponding string object.
+
+#if nssv_HAVE_STD_HASH
+
+#include <functional>
+
+namespace std {
+
+template<>
+struct hash< nonstd::string_view >
+{
+public:
+ std::size_t operator()( nonstd::string_view v ) const nssv_noexcept
+ {
+ return std::hash<std::string>()( std::string( v.data(), v.size() ) );
+ }
+};
+
+template<>
+struct hash< nonstd::wstring_view >
+{
+public:
+ std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept
+ {
+ return std::hash<std::wstring>()( std::wstring( v.data(), v.size() ) );
+ }
+};
+
+template<>
+struct hash< nonstd::u16string_view >
+{
+public:
+ std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept
+ {
+ return std::hash<std::u16string>()( std::u16string( v.data(), v.size() ) );
+ }
+};
+
+template<>
+struct hash< nonstd::u32string_view >
+{
+public:
+ std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept
+ {
+ return std::hash<std::u32string>()( std::u32string( v.data(), v.size() ) );
+ }
+};
+
+} // namespace std
+
+#endif // nssv_HAVE_STD_HASH
+
+nssv_RESTORE_WARNINGS()
+
+#endif // nssv_HAVE_STD_STRING_VIEW
+#endif // NONSTD_SV_LITE_H_INCLUDED
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/strptime.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/strptime.h
index 764a4440ee4..dedb1e28b24 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/strptime.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/strptime.h
@@ -1,35 +1,35 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <time.h>
-
-#include "arrow/util/visibility.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// A less featureful implementation of strptime() for platforms lacking
-// a standard implementation (e.g. Windows).
-ARROW_EXPORT char* arrow_strptime(const char* __restrict, const char* __restrict,
- struct tm* __restrict);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <time.h>
+
+#include "arrow/util/visibility.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A less featureful implementation of strptime() for platforms lacking
+// a standard implementation (e.g. Windows).
+ARROW_EXPORT char* arrow_strptime(const char* __restrict, const char* __restrict,
+ struct tm* __restrict);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/checked.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/checked.h
index 648636e4686..a9efce4c94d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/checked.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/checked.h
@@ -1,333 +1,333 @@
-// Copyright 2006-2016 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-#include <stdexcept>
-
-namespace utf8
-{
- // Base for the exceptions that may be thrown from the library
- class exception : public ::std::exception {
- };
-
- // Exceptions that may be thrown from the library functions.
- class invalid_code_point : public exception {
- uint32_t cp;
- public:
- invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
- virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid code point"; }
- uint32_t code_point() const {return cp;}
- };
-
- class invalid_utf8 : public exception {
- uint8_t u8;
- public:
- invalid_utf8 (uint8_t u) : u8(u) {}
- virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid UTF-8"; }
- uint8_t utf8_octet() const {return u8;}
- };
-
- class invalid_utf16 : public exception {
- uint16_t u16;
- public:
- invalid_utf16 (uint16_t u) : u16(u) {}
- virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid UTF-16"; }
- uint16_t utf16_word() const {return u16;}
- };
-
- class not_enough_room : public exception {
- public:
- virtual const char* what() const NOEXCEPT OVERRIDE { return "Not enough space"; }
- };
-
- /// The library API - functions intended to be called by the users
-
- template <typename octet_iterator>
- octet_iterator append(uint32_t cp, octet_iterator result)
- {
- if (!utf8::internal::is_code_point_valid(cp))
- throw invalid_code_point(cp);
-
- if (cp < 0x80) // one octet
- *(result++) = static_cast<uint8_t>(cp);
- else if (cp < 0x800) { // two octets
- *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
- *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
- }
- else if (cp < 0x10000) { // three octets
- *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
- *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
- *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
- }
- else { // four octets
- *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
- *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
- *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
- *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
- }
- return result;
- }
-
- template <typename octet_iterator, typename output_iterator>
- output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
- {
- while (start != end) {
- octet_iterator sequence_start = start;
- internal::utf_error err_code = utf8::internal::validate_next(start, end);
- switch (err_code) {
- case internal::UTF8_OK :
- for (octet_iterator it = sequence_start; it != start; ++it)
- *out++ = *it;
- break;
- case internal::NOT_ENOUGH_ROOM:
- out = utf8::append (replacement, out);
- start = end;
- break;
- case internal::INVALID_LEAD:
- out = utf8::append (replacement, out);
- ++start;
- break;
- case internal::INCOMPLETE_SEQUENCE:
- case internal::OVERLONG_SEQUENCE:
- case internal::INVALID_CODE_POINT:
- out = utf8::append (replacement, out);
- ++start;
- // just one replacement mark for the sequence
- while (start != end && utf8::internal::is_trail(*start))
- ++start;
- break;
- }
- }
- return out;
- }
-
- template <typename octet_iterator, typename output_iterator>
- inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
- {
- static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
- return utf8::replace_invalid(start, end, out, replacement_marker);
- }
-
- template <typename octet_iterator>
- uint32_t next(octet_iterator& it, octet_iterator end)
- {
- uint32_t cp = 0;
- internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
- switch (err_code) {
- case internal::UTF8_OK :
- break;
- case internal::NOT_ENOUGH_ROOM :
- throw not_enough_room();
- case internal::INVALID_LEAD :
- case internal::INCOMPLETE_SEQUENCE :
- case internal::OVERLONG_SEQUENCE :
- throw invalid_utf8(*it);
- case internal::INVALID_CODE_POINT :
- throw invalid_code_point(cp);
- }
- return cp;
- }
-
- template <typename octet_iterator>
- uint32_t peek_next(octet_iterator it, octet_iterator end)
- {
- return utf8::next(it, end);
- }
-
- template <typename octet_iterator>
- uint32_t prior(octet_iterator& it, octet_iterator start)
- {
- // can't do much if it == start
- if (it == start)
- throw not_enough_room();
-
- octet_iterator end = it;
- // Go back until we hit either a lead octet or start
- while (utf8::internal::is_trail(*(--it)))
- if (it == start)
- throw invalid_utf8(*it); // error - no lead byte in the sequence
- return utf8::peek_next(it, end);
- }
-
- template <typename octet_iterator, typename distance_type>
- void advance (octet_iterator& it, distance_type n, octet_iterator end)
- {
- const distance_type zero(0);
- if (n < zero) {
- // backward
- for (distance_type i = n; i < zero; ++i)
- utf8::prior(it, end);
- } else {
- // forward
- for (distance_type i = zero; i < n; ++i)
- utf8::next(it, end);
- }
- }
-
- template <typename octet_iterator>
- typename std::iterator_traits<octet_iterator>::difference_type
- distance (octet_iterator first, octet_iterator last)
- {
- typename std::iterator_traits<octet_iterator>::difference_type dist;
- for (dist = 0; first < last; ++dist)
- utf8::next(first, last);
- return dist;
- }
-
- template <typename u16bit_iterator, typename octet_iterator>
- octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
- {
- while (start != end) {
- uint32_t cp = utf8::internal::mask16(*start++);
- // Take care of surrogate pairs first
- if (utf8::internal::is_lead_surrogate(cp)) {
- if (start != end) {
- uint32_t trail_surrogate = utf8::internal::mask16(*start++);
- if (utf8::internal::is_trail_surrogate(trail_surrogate))
- cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
- else
- throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
- }
- else
- throw invalid_utf16(static_cast<uint16_t>(cp));
-
- }
- // Lone trail surrogate
- else if (utf8::internal::is_trail_surrogate(cp))
- throw invalid_utf16(static_cast<uint16_t>(cp));
-
- result = utf8::append(cp, result);
- }
- return result;
- }
-
- template <typename u16bit_iterator, typename octet_iterator>
- u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
- {
- while (start < end) {
- uint32_t cp = utf8::next(start, end);
- if (cp > 0xffff) { //make a surrogate pair
- *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
- *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
- }
- else
- *result++ = static_cast<uint16_t>(cp);
- }
- return result;
- }
-
- template <typename octet_iterator, typename u32bit_iterator>
- octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
- {
- while (start != end)
- result = utf8::append(*(start++), result);
-
- return result;
- }
-
- template <typename octet_iterator, typename u32bit_iterator>
- u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
- {
- while (start < end)
- (*result++) = utf8::next(start, end);
-
- return result;
- }
-
- // The iterator class
- template <typename octet_iterator>
- class iterator {
- octet_iterator it;
- octet_iterator range_start;
- octet_iterator range_end;
- public:
- typedef uint32_t value_type;
- typedef uint32_t* pointer;
- typedef uint32_t& reference;
- typedef std::ptrdiff_t difference_type;
- typedef std::bidirectional_iterator_tag iterator_category;
- iterator () {}
- explicit iterator (const octet_iterator& octet_it,
- const octet_iterator& rangestart,
- const octet_iterator& rangeend) :
- it(octet_it), range_start(rangestart), range_end(rangeend)
- {
- if (it < range_start || it > range_end)
- throw std::out_of_range("Invalid utf-8 iterator position");
- }
- // the default "big three" are OK
- octet_iterator base () const { return it; }
- uint32_t operator * () const
- {
- octet_iterator temp = it;
- return utf8::next(temp, range_end);
- }
- bool operator == (const iterator& rhs) const
- {
- if (range_start != rhs.range_start || range_end != rhs.range_end)
- throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
- return (it == rhs.it);
- }
- bool operator != (const iterator& rhs) const
- {
- return !(operator == (rhs));
- }
- iterator& operator ++ ()
- {
- utf8::next(it, range_end);
- return *this;
- }
- iterator operator ++ (int)
- {
- iterator temp = *this;
- utf8::next(it, range_end);
- return temp;
- }
- iterator& operator -- ()
- {
- utf8::prior(it, range_start);
- return *this;
- }
- iterator operator -- (int)
- {
- iterator temp = *this;
- utf8::prior(it, range_start);
- return temp;
- }
- }; // class iterator
-
-} // namespace utf8
-
-#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
-#include "cpp11.h"
-#endif // C++ 11 or later
-
-#endif //header guard
-
+// Copyright 2006-2016 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+ // Base for the exceptions that may be thrown from the library
+ class exception : public ::std::exception {
+ };
+
+ // Exceptions that may be thrown from the library functions.
+ class invalid_code_point : public exception {
+ uint32_t cp;
+ public:
+ invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
+ virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid code point"; }
+ uint32_t code_point() const {return cp;}
+ };
+
+ class invalid_utf8 : public exception {
+ uint8_t u8;
+ public:
+ invalid_utf8 (uint8_t u) : u8(u) {}
+ virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid UTF-8"; }
+ uint8_t utf8_octet() const {return u8;}
+ };
+
+ class invalid_utf16 : public exception {
+ uint16_t u16;
+ public:
+ invalid_utf16 (uint16_t u) : u16(u) {}
+ virtual const char* what() const NOEXCEPT OVERRIDE { return "Invalid UTF-16"; }
+ uint16_t utf16_word() const {return u16;}
+ };
+
+ class not_enough_room : public exception {
+ public:
+ virtual const char* what() const NOEXCEPT OVERRIDE { return "Not enough space"; }
+ };
+
+ /// The library API - functions intended to be called by the users
+
+ template <typename octet_iterator>
+ octet_iterator append(uint32_t cp, octet_iterator result)
+ {
+ if (!utf8::internal::is_code_point_valid(cp))
+ throw invalid_code_point(cp);
+
+ if (cp < 0x80) // one octet
+ *(result++) = static_cast<uint8_t>(cp);
+ else if (cp < 0x800) { // two octets
+ *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else if (cp < 0x10000) { // three octets
+ *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else { // four octets
+ *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
+ *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator, typename output_iterator>
+ output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+ {
+ while (start != end) {
+ octet_iterator sequence_start = start;
+ internal::utf_error err_code = utf8::internal::validate_next(start, end);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ for (octet_iterator it = sequence_start; it != start; ++it)
+ *out++ = *it;
+ break;
+ case internal::NOT_ENOUGH_ROOM:
+ out = utf8::append (replacement, out);
+ start = end;
+ break;
+ case internal::INVALID_LEAD:
+ out = utf8::append (replacement, out);
+ ++start;
+ break;
+ case internal::INCOMPLETE_SEQUENCE:
+ case internal::OVERLONG_SEQUENCE:
+ case internal::INVALID_CODE_POINT:
+ out = utf8::append (replacement, out);
+ ++start;
+ // just one replacement mark for the sequence
+ while (start != end && utf8::internal::is_trail(*start))
+ ++start;
+ break;
+ }
+ }
+ return out;
+ }
+
+ template <typename octet_iterator, typename output_iterator>
+ inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+ {
+ static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+ return utf8::replace_invalid(start, end, out, replacement_marker);
+ }
+
+ template <typename octet_iterator>
+ uint32_t next(octet_iterator& it, octet_iterator end)
+ {
+ uint32_t cp = 0;
+ internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ break;
+ case internal::NOT_ENOUGH_ROOM :
+ throw not_enough_room();
+ case internal::INVALID_LEAD :
+ case internal::INCOMPLETE_SEQUENCE :
+ case internal::OVERLONG_SEQUENCE :
+ throw invalid_utf8(*it);
+ case internal::INVALID_CODE_POINT :
+ throw invalid_code_point(cp);
+ }
+ return cp;
+ }
+
+ template <typename octet_iterator>
+ uint32_t peek_next(octet_iterator it, octet_iterator end)
+ {
+ return utf8::next(it, end);
+ }
+
+ template <typename octet_iterator>
+ uint32_t prior(octet_iterator& it, octet_iterator start)
+ {
+ // can't do much if it == start
+ if (it == start)
+ throw not_enough_room();
+
+ octet_iterator end = it;
+ // Go back until we hit either a lead octet or start
+ while (utf8::internal::is_trail(*(--it)))
+ if (it == start)
+ throw invalid_utf8(*it); // error - no lead byte in the sequence
+ return utf8::peek_next(it, end);
+ }
+
+ template <typename octet_iterator, typename distance_type>
+ void advance (octet_iterator& it, distance_type n, octet_iterator end)
+ {
+ const distance_type zero(0);
+ if (n < zero) {
+ // backward
+ for (distance_type i = n; i < zero; ++i)
+ utf8::prior(it, end);
+ } else {
+ // forward
+ for (distance_type i = zero; i < n; ++i)
+ utf8::next(it, end);
+ }
+ }
+
+ template <typename octet_iterator>
+ typename std::iterator_traits<octet_iterator>::difference_type
+ distance (octet_iterator first, octet_iterator last)
+ {
+ typename std::iterator_traits<octet_iterator>::difference_type dist;
+ for (dist = 0; first < last; ++dist)
+ utf8::next(first, last);
+ return dist;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+ {
+ while (start != end) {
+ uint32_t cp = utf8::internal::mask16(*start++);
+ // Take care of surrogate pairs first
+ if (utf8::internal::is_lead_surrogate(cp)) {
+ if (start != end) {
+ uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+ if (utf8::internal::is_trail_surrogate(trail_surrogate))
+ cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+ else
+ throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+ }
+ else
+ throw invalid_utf16(static_cast<uint16_t>(cp));
+
+ }
+ // Lone trail surrogate
+ else if (utf8::internal::is_trail_surrogate(cp))
+ throw invalid_utf16(static_cast<uint16_t>(cp));
+
+ result = utf8::append(cp, result);
+ }
+ return result;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+ {
+ while (start < end) {
+ uint32_t cp = utf8::next(start, end);
+ if (cp > 0xffff) { //make a surrogate pair
+ *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
+ *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+ }
+ else
+ *result++ = static_cast<uint16_t>(cp);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+ {
+ while (start != end)
+ result = utf8::append(*(start++), result);
+
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+ {
+ while (start < end)
+ (*result++) = utf8::next(start, end);
+
+ return result;
+ }
+
+ // The iterator class
+ template <typename octet_iterator>
+ class iterator {
+ octet_iterator it;
+ octet_iterator range_start;
+ octet_iterator range_end;
+ public:
+ typedef uint32_t value_type;
+ typedef uint32_t* pointer;
+ typedef uint32_t& reference;
+ typedef std::ptrdiff_t difference_type;
+ typedef std::bidirectional_iterator_tag iterator_category;
+ iterator () {}
+ explicit iterator (const octet_iterator& octet_it,
+ const octet_iterator& rangestart,
+ const octet_iterator& rangeend) :
+ it(octet_it), range_start(rangestart), range_end(rangeend)
+ {
+ if (it < range_start || it > range_end)
+ throw std::out_of_range("Invalid utf-8 iterator position");
+ }
+ // the default "big three" are OK
+ octet_iterator base () const { return it; }
+ uint32_t operator * () const
+ {
+ octet_iterator temp = it;
+ return utf8::next(temp, range_end);
+ }
+ bool operator == (const iterator& rhs) const
+ {
+ if (range_start != rhs.range_start || range_end != rhs.range_end)
+ throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+ return (it == rhs.it);
+ }
+ bool operator != (const iterator& rhs) const
+ {
+ return !(operator == (rhs));
+ }
+ iterator& operator ++ ()
+ {
+ utf8::next(it, range_end);
+ return *this;
+ }
+ iterator operator ++ (int)
+ {
+ iterator temp = *this;
+ utf8::next(it, range_end);
+ return temp;
+ }
+ iterator& operator -- ()
+ {
+ utf8::prior(it, range_start);
+ return *this;
+ }
+ iterator operator -- (int)
+ {
+ iterator temp = *this;
+ utf8::prior(it, range_start);
+ return temp;
+ }
+ }; // class iterator
+
+} // namespace utf8
+
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+#include "cpp11.h"
+#endif // C++ 11 or later
+
+#endif //header guard
+
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/core.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/core.h
index 244e8923112..459b2486685 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/core.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/core.h
@@ -1,338 +1,338 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include <iterator>
-
-// Determine the C++ standard version.
-// If the user defines UTF_CPP_CPLUSPLUS, use that.
-// Otherwise, trust the unreliable predefined macro __cplusplus
-
-#if !defined UTF_CPP_CPLUSPLUS
- #define UTF_CPP_CPLUSPLUS __cplusplus
-#endif
-
-#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
- #define OVERRIDE override
- #define NOEXCEPT noexcept
-#else // C++ 98/03
- #define OVERRIDE
- #define NOEXCEPT throw()
-#endif // C++ 11 or later
-
-
-namespace utf8
-{
- // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
- // You may need to change them to match your system.
- // These typedefs have the same names as ones from cstdint, or boost/cstdint
- typedef unsigned char uint8_t;
- typedef unsigned short uint16_t;
- typedef unsigned int uint32_t;
-
-// Helper code - not intended to be directly called by the library users. May be changed at any time
-namespace internal
-{
- // Unicode constants
- // Leading (high) surrogates: 0xd800 - 0xdbff
- // Trailing (low) surrogates: 0xdc00 - 0xdfff
- const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
- const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
- const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
- const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
- const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
- const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
-
- // Maximum valid value for a Unicode code point
- const uint32_t CODE_POINT_MAX = 0x0010ffffu;
-
- template<typename octet_type>
- inline uint8_t mask8(octet_type oc)
- {
- return static_cast<uint8_t>(0xff & oc);
- }
- template<typename u16_type>
- inline uint16_t mask16(u16_type oc)
- {
- return static_cast<uint16_t>(0xffff & oc);
- }
- template<typename octet_type>
- inline bool is_trail(octet_type oc)
- {
- return ((utf8::internal::mask8(oc) >> 6) == 0x2);
- }
-
- template <typename u16>
- inline bool is_lead_surrogate(u16 cp)
- {
- return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
- }
-
- template <typename u16>
- inline bool is_trail_surrogate(u16 cp)
- {
- return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
- }
-
- template <typename u16>
- inline bool is_surrogate(u16 cp)
- {
- return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
- }
-
- template <typename u32>
- inline bool is_code_point_valid(u32 cp)
- {
- return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
- }
-
- template <typename octet_iterator>
- inline typename std::iterator_traits<octet_iterator>::difference_type
- sequence_length(octet_iterator lead_it)
- {
- uint8_t lead = utf8::internal::mask8(*lead_it);
- if (lead < 0x80)
- return 1;
- else if ((lead >> 5) == 0x6)
- return 2;
- else if ((lead >> 4) == 0xe)
- return 3;
- else if ((lead >> 3) == 0x1e)
- return 4;
- else
- return 0;
- }
-
- template <typename octet_difference_type>
- inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
- {
- if (cp < 0x80) {
- if (length != 1)
- return true;
- }
- else if (cp < 0x800) {
- if (length != 2)
- return true;
- }
- else if (cp < 0x10000) {
- if (length != 3)
- return true;
- }
-
- return false;
- }
-
- enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
-
- /// Helper for get_sequence_x
- template <typename octet_iterator>
- utf_error increase_safely(octet_iterator& it, octet_iterator end)
- {
- if (++it == end)
- return NOT_ENOUGH_ROOM;
-
- if (!utf8::internal::is_trail(*it))
- return INCOMPLETE_SEQUENCE;
-
- return UTF8_OK;
- }
-
- #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
-
- /// get_sequence_x functions decode utf-8 sequences of the length x
- template <typename octet_iterator>
- utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
- {
- if (it == end)
- return NOT_ENOUGH_ROOM;
-
- code_point = utf8::internal::mask8(*it);
-
- return UTF8_OK;
- }
-
- template <typename octet_iterator>
- utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
- {
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+// Determine the C++ standard version.
+// If the user defines UTF_CPP_CPLUSPLUS, use that.
+// Otherwise, trust the unreliable predefined macro __cplusplus
+
+#if !defined UTF_CPP_CPLUSPLUS
+ #define UTF_CPP_CPLUSPLUS __cplusplus
+#endif
+
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+ #define OVERRIDE override
+ #define NOEXCEPT noexcept
+#else // C++ 98/03
+ #define OVERRIDE
+ #define NOEXCEPT throw()
+#endif // C++ 11 or later
+
+
+namespace utf8
+{
+ // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+ // You may need to change them to match your system.
+ // These typedefs have the same names as ones from cstdint, or boost/cstdint
+ typedef unsigned char uint8_t;
+ typedef unsigned short uint16_t;
+ typedef unsigned int uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+ // Unicode constants
+ // Leading (high) surrogates: 0xd800 - 0xdbff
+ // Trailing (low) surrogates: 0xdc00 - 0xdfff
+ const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
+ const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
+ const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+ const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+ const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
+ const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
+
+ // Maximum valid value for a Unicode code point
+ const uint32_t CODE_POINT_MAX = 0x0010ffffu;
+
+ template<typename octet_type>
+ inline uint8_t mask8(octet_type oc)
+ {
+ return static_cast<uint8_t>(0xff & oc);
+ }
+ template<typename u16_type>
+ inline uint16_t mask16(u16_type oc)
+ {
+ return static_cast<uint16_t>(0xffff & oc);
+ }
+ template<typename octet_type>
+ inline bool is_trail(octet_type oc)
+ {
+ return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+ }
+
+ template <typename u16>
+ inline bool is_lead_surrogate(u16 cp)
+ {
+ return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+ }
+
+ template <typename u16>
+ inline bool is_trail_surrogate(u16 cp)
+ {
+ return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+ }
+
+ template <typename u16>
+ inline bool is_surrogate(u16 cp)
+ {
+ return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+ }
+
+ template <typename u32>
+ inline bool is_code_point_valid(u32 cp)
+ {
+ return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+ }
+
+ template <typename octet_iterator>
+ inline typename std::iterator_traits<octet_iterator>::difference_type
+ sequence_length(octet_iterator lead_it)
+ {
+ uint8_t lead = utf8::internal::mask8(*lead_it);
+ if (lead < 0x80)
+ return 1;
+ else if ((lead >> 5) == 0x6)
+ return 2;
+ else if ((lead >> 4) == 0xe)
+ return 3;
+ else if ((lead >> 3) == 0x1e)
+ return 4;
+ else
+ return 0;
+ }
+
+ template <typename octet_difference_type>
+ inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+ {
+ if (cp < 0x80) {
+ if (length != 1)
+ return true;
+ }
+ else if (cp < 0x800) {
+ if (length != 2)
+ return true;
+ }
+ else if (cp < 0x10000) {
+ if (length != 3)
+ return true;
+ }
+
+ return false;
+ }
+
+ enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+ /// Helper for get_sequence_x
+ template <typename octet_iterator>
+ utf_error increase_safely(octet_iterator& it, octet_iterator end)
+ {
+ if (++it == end)
+ return NOT_ENOUGH_ROOM;
+
+ if (!utf8::internal::is_trail(*it))
+ return INCOMPLETE_SEQUENCE;
+
+ return UTF8_OK;
+ }
+
+ #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
+
+ /// get_sequence_x functions decode utf-8 sequences of the length x
+ template <typename octet_iterator>
+ utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
if (it == end)
- return NOT_ENOUGH_ROOM;
-
- code_point = utf8::internal::mask8(*it);
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
-
- return UTF8_OK;
- }
-
- template <typename octet_iterator>
- utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
- {
- if (it == end)
- return NOT_ENOUGH_ROOM;
-
- code_point = utf8::internal::mask8(*it);
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point += (*it) & 0x3f;
-
- return UTF8_OK;
- }
-
- template <typename octet_iterator>
- utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
- {
- if (it == end)
- return NOT_ENOUGH_ROOM;
-
- code_point = utf8::internal::mask8(*it);
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
-
- UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
- code_point += (*it) & 0x3f;
-
- return UTF8_OK;
- }
-
- #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
-
- template <typename octet_iterator>
- utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
- {
- if (it == end)
- return NOT_ENOUGH_ROOM;
-
- // Save the original value of it so we can go back in case of failure
- // Of course, it does not make much sense with i.e. stream iterators
- octet_iterator original_it = it;
-
- uint32_t cp = 0;
- // Determine the sequence length based on the lead octet
- typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
- const octet_difference_type length = utf8::internal::sequence_length(it);
-
- // Get trail octets and calculate the code point
- utf_error err = UTF8_OK;
- switch (length) {
- case 0:
- return INVALID_LEAD;
- case 1:
- err = utf8::internal::get_sequence_1(it, end, cp);
- break;
- case 2:
- err = utf8::internal::get_sequence_2(it, end, cp);
- break;
- case 3:
- err = utf8::internal::get_sequence_3(it, end, cp);
- break;
- case 4:
- err = utf8::internal::get_sequence_4(it, end, cp);
- break;
- }
-
- if (err == UTF8_OK) {
- // Decoding succeeded. Now, security checks...
- if (utf8::internal::is_code_point_valid(cp)) {
- if (!utf8::internal::is_overlong_sequence(cp, length)){
- // Passed! Return here.
- code_point = cp;
- ++it;
- return UTF8_OK;
- }
- else
- err = OVERLONG_SEQUENCE;
- }
- else
- err = INVALID_CODE_POINT;
- }
-
- // Failure branch - restore the original value of the iterator
- it = original_it;
- return err;
- }
-
- template <typename octet_iterator>
- inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
- uint32_t ignored;
- return utf8::internal::validate_next(it, end, ignored);
- }
-
-} // namespace internal
-
- /// The library API - functions intended to be called by the users
-
- // Byte order mark
- const uint8_t bom[] = {0xef, 0xbb, 0xbf};
-
- template <typename octet_iterator>
- octet_iterator find_invalid(octet_iterator start, octet_iterator end)
- {
- octet_iterator result = start;
- while (result != end) {
- utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
- if (err_code != internal::UTF8_OK)
- return result;
- }
- return result;
- }
-
- template <typename octet_iterator>
- inline bool is_valid(octet_iterator start, octet_iterator end)
- {
- return (utf8::find_invalid(start, end) == end);
- }
-
- template <typename octet_iterator>
- inline bool starts_with_bom (octet_iterator it, octet_iterator end)
- {
- return (
- ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
- ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
- ((it != end) && (utf8::internal::mask8(*it)) == bom[2])
- );
- }
-} // namespace utf8
-
-#endif // header guard
-
-
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (*it) & 0x3f;
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (*it) & 0x3f;
+
+ return UTF8_OK;
+ }
+
+ #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+ template <typename octet_iterator>
+ utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ // Save the original value of it so we can go back in case of failure
+ // Of course, it does not make much sense with i.e. stream iterators
+ octet_iterator original_it = it;
+
+ uint32_t cp = 0;
+ // Determine the sequence length based on the lead octet
+ typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+ const octet_difference_type length = utf8::internal::sequence_length(it);
+
+ // Get trail octets and calculate the code point
+ utf_error err = UTF8_OK;
+ switch (length) {
+ case 0:
+ return INVALID_LEAD;
+ case 1:
+ err = utf8::internal::get_sequence_1(it, end, cp);
+ break;
+ case 2:
+ err = utf8::internal::get_sequence_2(it, end, cp);
+ break;
+ case 3:
+ err = utf8::internal::get_sequence_3(it, end, cp);
+ break;
+ case 4:
+ err = utf8::internal::get_sequence_4(it, end, cp);
+ break;
+ }
+
+ if (err == UTF8_OK) {
+ // Decoding succeeded. Now, security checks...
+ if (utf8::internal::is_code_point_valid(cp)) {
+ if (!utf8::internal::is_overlong_sequence(cp, length)){
+ // Passed! Return here.
+ code_point = cp;
+ ++it;
+ return UTF8_OK;
+ }
+ else
+ err = OVERLONG_SEQUENCE;
+ }
+ else
+ err = INVALID_CODE_POINT;
+ }
+
+ // Failure branch - restore the original value of the iterator
+ it = original_it;
+ return err;
+ }
+
+ template <typename octet_iterator>
+ inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+ uint32_t ignored;
+ return utf8::internal::validate_next(it, end, ignored);
+ }
+
+} // namespace internal
+
+ /// The library API - functions intended to be called by the users
+
+ // Byte order mark
+ const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+ template <typename octet_iterator>
+ octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+ {
+ octet_iterator result = start;
+ while (result != end) {
+ utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+ if (err_code != internal::UTF8_OK)
+ return result;
+ }
+ return result;
+ }
+
+ template <typename octet_iterator>
+ inline bool is_valid(octet_iterator start, octet_iterator end)
+ {
+ return (utf8::find_invalid(start, end) == end);
+ }
+
+ template <typename octet_iterator>
+ inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+ {
+ return (
+ ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+ ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+ ((it != end) && (utf8::internal::mask8(*it)) == bom[2])
+ );
+ }
+} // namespace utf8
+
+#endif // header guard
+
+
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/cpp11.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/cpp11.h
index d93961b04f8..f4379ff1fb8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/cpp11.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/utfcpp/cpp11.h
@@ -1,103 +1,103 @@
-// Copyright 2018 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
-#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
-
-#include "checked.h"
-#include <string>
-
-namespace utf8
-{
-
- inline void append(char32_t cp, std::string& s)
- {
- append(uint32_t(cp), std::back_inserter(s));
- }
-
- inline std::string utf16to8(const std::u16string& s)
- {
- std::string result;
- utf16to8(s.begin(), s.end(), std::back_inserter(result));
- return result;
- }
-
- inline std::u16string utf8to16(const std::string& s)
- {
- std::u16string result;
- utf8to16(s.begin(), s.end(), std::back_inserter(result));
- return result;
- }
-
- inline std::string utf32to8(const std::u32string& s)
- {
- std::string result;
- utf32to8(s.begin(), s.end(), std::back_inserter(result));
- return result;
- }
-
- inline std::u32string utf8to32(const std::string& s)
- {
- std::u32string result;
- utf8to32(s.begin(), s.end(), std::back_inserter(result));
- return result;
- }
-
- inline std::size_t find_invalid(const std::string& s)
- {
- std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
- return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
- }
-
- inline bool is_valid(const std::string& s)
- {
- return is_valid(s.begin(), s.end());
- }
-
- inline std::string replace_invalid(const std::string& s, char32_t replacement)
- {
- std::string result;
- replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
- return result;
- }
-
- inline std::string replace_invalid(const std::string& s)
- {
- std::string result;
- replace_invalid(s.begin(), s.end(), std::back_inserter(result));
- return result;
- }
-
- inline bool starts_with_bom(const std::string& s)
- {
- return starts_with_bom(s.begin(), s.end());
- }
-
-} // namespace utf8
-
-#endif // header guard
-
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+
+#include "checked.h"
+#include <string>
+
+namespace utf8
+{
+
+ inline void append(char32_t cp, std::string& s)
+ {
+ append(uint32_t(cp), std::back_inserter(s));
+ }
+
+ inline std::string utf16to8(const std::u16string& s)
+ {
+ std::string result;
+ utf16to8(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::u16string utf8to16(const std::string& s)
+ {
+ std::u16string result;
+ utf8to16(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::string utf32to8(const std::u32string& s)
+ {
+ std::string result;
+ utf32to8(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::u32string utf8to32(const std::string& s)
+ {
+ std::u32string result;
+ utf8to32(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::size_t find_invalid(const std::string& s)
+ {
+ std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
+ return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
+ }
+
+ inline bool is_valid(const std::string& s)
+ {
+ return is_valid(s.begin(), s.end());
+ }
+
+ inline std::string replace_invalid(const std::string& s, char32_t replacement)
+ {
+ std::string result;
+ replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+ return result;
+ }
+
+ inline std::string replace_invalid(const std::string& s)
+ {
+ std::string result;
+ replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline bool starts_with_bom(const std::string& s)
+ {
+ return starts_with_bom(s.begin(), s.end());
+ }
+
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
index 851785081c7..52963706a3e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
@@ -1,169 +1,169 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/visitor.h"
-
-#include <memory>
-
-#include "arrow/array.h" // IWYU pragma: keep
-#include "arrow/extension_type.h"
-#include "arrow/scalar.h" // IWYU pragma: keep
-#include "arrow/status.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-#define ARRAY_VISITOR_DEFAULT(ARRAY_CLASS) \
- Status ArrayVisitor::Visit(const ARRAY_CLASS& array) { \
- return Status::NotImplemented(array.type()->ToString()); \
- }
-
-ARRAY_VISITOR_DEFAULT(NullArray)
-ARRAY_VISITOR_DEFAULT(BooleanArray)
-ARRAY_VISITOR_DEFAULT(Int8Array)
-ARRAY_VISITOR_DEFAULT(Int16Array)
-ARRAY_VISITOR_DEFAULT(Int32Array)
-ARRAY_VISITOR_DEFAULT(Int64Array)
-ARRAY_VISITOR_DEFAULT(UInt8Array)
-ARRAY_VISITOR_DEFAULT(UInt16Array)
-ARRAY_VISITOR_DEFAULT(UInt32Array)
-ARRAY_VISITOR_DEFAULT(UInt64Array)
-ARRAY_VISITOR_DEFAULT(HalfFloatArray)
-ARRAY_VISITOR_DEFAULT(FloatArray)
-ARRAY_VISITOR_DEFAULT(DoubleArray)
-ARRAY_VISITOR_DEFAULT(BinaryArray)
-ARRAY_VISITOR_DEFAULT(StringArray)
-ARRAY_VISITOR_DEFAULT(LargeBinaryArray)
-ARRAY_VISITOR_DEFAULT(LargeStringArray)
-ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
-ARRAY_VISITOR_DEFAULT(Date32Array)
-ARRAY_VISITOR_DEFAULT(Date64Array)
-ARRAY_VISITOR_DEFAULT(Time32Array)
-ARRAY_VISITOR_DEFAULT(Time64Array)
-ARRAY_VISITOR_DEFAULT(TimestampArray)
-ARRAY_VISITOR_DEFAULT(DayTimeIntervalArray)
-ARRAY_VISITOR_DEFAULT(MonthIntervalArray)
-ARRAY_VISITOR_DEFAULT(DurationArray)
-ARRAY_VISITOR_DEFAULT(ListArray)
-ARRAY_VISITOR_DEFAULT(LargeListArray)
-ARRAY_VISITOR_DEFAULT(MapArray)
-ARRAY_VISITOR_DEFAULT(FixedSizeListArray)
-ARRAY_VISITOR_DEFAULT(StructArray)
-ARRAY_VISITOR_DEFAULT(SparseUnionArray)
-ARRAY_VISITOR_DEFAULT(DenseUnionArray)
-ARRAY_VISITOR_DEFAULT(DictionaryArray)
-ARRAY_VISITOR_DEFAULT(Decimal128Array)
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/visitor.h"
+
+#include <memory>
+
+#include "arrow/array.h" // IWYU pragma: keep
+#include "arrow/extension_type.h"
+#include "arrow/scalar.h" // IWYU pragma: keep
+#include "arrow/status.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+#define ARRAY_VISITOR_DEFAULT(ARRAY_CLASS) \
+ Status ArrayVisitor::Visit(const ARRAY_CLASS& array) { \
+ return Status::NotImplemented(array.type()->ToString()); \
+ }
+
+ARRAY_VISITOR_DEFAULT(NullArray)
+ARRAY_VISITOR_DEFAULT(BooleanArray)
+ARRAY_VISITOR_DEFAULT(Int8Array)
+ARRAY_VISITOR_DEFAULT(Int16Array)
+ARRAY_VISITOR_DEFAULT(Int32Array)
+ARRAY_VISITOR_DEFAULT(Int64Array)
+ARRAY_VISITOR_DEFAULT(UInt8Array)
+ARRAY_VISITOR_DEFAULT(UInt16Array)
+ARRAY_VISITOR_DEFAULT(UInt32Array)
+ARRAY_VISITOR_DEFAULT(UInt64Array)
+ARRAY_VISITOR_DEFAULT(HalfFloatArray)
+ARRAY_VISITOR_DEFAULT(FloatArray)
+ARRAY_VISITOR_DEFAULT(DoubleArray)
+ARRAY_VISITOR_DEFAULT(BinaryArray)
+ARRAY_VISITOR_DEFAULT(StringArray)
+ARRAY_VISITOR_DEFAULT(LargeBinaryArray)
+ARRAY_VISITOR_DEFAULT(LargeStringArray)
+ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
+ARRAY_VISITOR_DEFAULT(Date32Array)
+ARRAY_VISITOR_DEFAULT(Date64Array)
+ARRAY_VISITOR_DEFAULT(Time32Array)
+ARRAY_VISITOR_DEFAULT(Time64Array)
+ARRAY_VISITOR_DEFAULT(TimestampArray)
+ARRAY_VISITOR_DEFAULT(DayTimeIntervalArray)
+ARRAY_VISITOR_DEFAULT(MonthIntervalArray)
+ARRAY_VISITOR_DEFAULT(DurationArray)
+ARRAY_VISITOR_DEFAULT(ListArray)
+ARRAY_VISITOR_DEFAULT(LargeListArray)
+ARRAY_VISITOR_DEFAULT(MapArray)
+ARRAY_VISITOR_DEFAULT(FixedSizeListArray)
+ARRAY_VISITOR_DEFAULT(StructArray)
+ARRAY_VISITOR_DEFAULT(SparseUnionArray)
+ARRAY_VISITOR_DEFAULT(DenseUnionArray)
+ARRAY_VISITOR_DEFAULT(DictionaryArray)
+ARRAY_VISITOR_DEFAULT(Decimal128Array)
ARRAY_VISITOR_DEFAULT(Decimal256Array)
-ARRAY_VISITOR_DEFAULT(ExtensionArray)
-
-#undef ARRAY_VISITOR_DEFAULT
-
-// ----------------------------------------------------------------------
-// Default implementations of TypeVisitor methods
-
-#define TYPE_VISITOR_DEFAULT(TYPE_CLASS) \
- Status TypeVisitor::Visit(const TYPE_CLASS& type) { \
- return Status::NotImplemented(type.ToString()); \
- }
-
-TYPE_VISITOR_DEFAULT(NullType)
-TYPE_VISITOR_DEFAULT(BooleanType)
-TYPE_VISITOR_DEFAULT(Int8Type)
-TYPE_VISITOR_DEFAULT(Int16Type)
-TYPE_VISITOR_DEFAULT(Int32Type)
-TYPE_VISITOR_DEFAULT(Int64Type)
-TYPE_VISITOR_DEFAULT(UInt8Type)
-TYPE_VISITOR_DEFAULT(UInt16Type)
-TYPE_VISITOR_DEFAULT(UInt32Type)
-TYPE_VISITOR_DEFAULT(UInt64Type)
-TYPE_VISITOR_DEFAULT(HalfFloatType)
-TYPE_VISITOR_DEFAULT(FloatType)
-TYPE_VISITOR_DEFAULT(DoubleType)
-TYPE_VISITOR_DEFAULT(StringType)
-TYPE_VISITOR_DEFAULT(BinaryType)
-TYPE_VISITOR_DEFAULT(LargeStringType)
-TYPE_VISITOR_DEFAULT(LargeBinaryType)
-TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
-TYPE_VISITOR_DEFAULT(Date64Type)
-TYPE_VISITOR_DEFAULT(Date32Type)
-TYPE_VISITOR_DEFAULT(Time32Type)
-TYPE_VISITOR_DEFAULT(Time64Type)
-TYPE_VISITOR_DEFAULT(TimestampType)
-TYPE_VISITOR_DEFAULT(DayTimeIntervalType)
-TYPE_VISITOR_DEFAULT(MonthIntervalType)
-TYPE_VISITOR_DEFAULT(DurationType)
-TYPE_VISITOR_DEFAULT(Decimal128Type)
+ARRAY_VISITOR_DEFAULT(ExtensionArray)
+
+#undef ARRAY_VISITOR_DEFAULT
+
+// ----------------------------------------------------------------------
+// Default implementations of TypeVisitor methods
+
+#define TYPE_VISITOR_DEFAULT(TYPE_CLASS) \
+ Status TypeVisitor::Visit(const TYPE_CLASS& type) { \
+ return Status::NotImplemented(type.ToString()); \
+ }
+
+TYPE_VISITOR_DEFAULT(NullType)
+TYPE_VISITOR_DEFAULT(BooleanType)
+TYPE_VISITOR_DEFAULT(Int8Type)
+TYPE_VISITOR_DEFAULT(Int16Type)
+TYPE_VISITOR_DEFAULT(Int32Type)
+TYPE_VISITOR_DEFAULT(Int64Type)
+TYPE_VISITOR_DEFAULT(UInt8Type)
+TYPE_VISITOR_DEFAULT(UInt16Type)
+TYPE_VISITOR_DEFAULT(UInt32Type)
+TYPE_VISITOR_DEFAULT(UInt64Type)
+TYPE_VISITOR_DEFAULT(HalfFloatType)
+TYPE_VISITOR_DEFAULT(FloatType)
+TYPE_VISITOR_DEFAULT(DoubleType)
+TYPE_VISITOR_DEFAULT(StringType)
+TYPE_VISITOR_DEFAULT(BinaryType)
+TYPE_VISITOR_DEFAULT(LargeStringType)
+TYPE_VISITOR_DEFAULT(LargeBinaryType)
+TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
+TYPE_VISITOR_DEFAULT(Date64Type)
+TYPE_VISITOR_DEFAULT(Date32Type)
+TYPE_VISITOR_DEFAULT(Time32Type)
+TYPE_VISITOR_DEFAULT(Time64Type)
+TYPE_VISITOR_DEFAULT(TimestampType)
+TYPE_VISITOR_DEFAULT(DayTimeIntervalType)
+TYPE_VISITOR_DEFAULT(MonthIntervalType)
+TYPE_VISITOR_DEFAULT(DurationType)
+TYPE_VISITOR_DEFAULT(Decimal128Type)
TYPE_VISITOR_DEFAULT(Decimal256Type)
-TYPE_VISITOR_DEFAULT(ListType)
-TYPE_VISITOR_DEFAULT(LargeListType)
-TYPE_VISITOR_DEFAULT(MapType)
-TYPE_VISITOR_DEFAULT(FixedSizeListType)
-TYPE_VISITOR_DEFAULT(StructType)
-TYPE_VISITOR_DEFAULT(SparseUnionType)
-TYPE_VISITOR_DEFAULT(DenseUnionType)
-TYPE_VISITOR_DEFAULT(DictionaryType)
-TYPE_VISITOR_DEFAULT(ExtensionType)
-
-#undef TYPE_VISITOR_DEFAULT
-
-// ----------------------------------------------------------------------
-// Default implementations of ScalarVisitor methods
-
-#define SCALAR_VISITOR_DEFAULT(SCALAR_CLASS) \
- Status ScalarVisitor::Visit(const SCALAR_CLASS& scalar) { \
- return Status::NotImplemented( \
- "ScalarVisitor not implemented for " ARROW_STRINGIFY(SCALAR_CLASS)); \
- }
-
-SCALAR_VISITOR_DEFAULT(NullScalar)
-SCALAR_VISITOR_DEFAULT(BooleanScalar)
-SCALAR_VISITOR_DEFAULT(Int8Scalar)
-SCALAR_VISITOR_DEFAULT(Int16Scalar)
-SCALAR_VISITOR_DEFAULT(Int32Scalar)
-SCALAR_VISITOR_DEFAULT(Int64Scalar)
-SCALAR_VISITOR_DEFAULT(UInt8Scalar)
-SCALAR_VISITOR_DEFAULT(UInt16Scalar)
-SCALAR_VISITOR_DEFAULT(UInt32Scalar)
-SCALAR_VISITOR_DEFAULT(UInt64Scalar)
-SCALAR_VISITOR_DEFAULT(HalfFloatScalar)
-SCALAR_VISITOR_DEFAULT(FloatScalar)
-SCALAR_VISITOR_DEFAULT(DoubleScalar)
-SCALAR_VISITOR_DEFAULT(StringScalar)
-SCALAR_VISITOR_DEFAULT(BinaryScalar)
-SCALAR_VISITOR_DEFAULT(LargeStringScalar)
-SCALAR_VISITOR_DEFAULT(LargeBinaryScalar)
-SCALAR_VISITOR_DEFAULT(FixedSizeBinaryScalar)
-SCALAR_VISITOR_DEFAULT(Date64Scalar)
-SCALAR_VISITOR_DEFAULT(Date32Scalar)
-SCALAR_VISITOR_DEFAULT(Time32Scalar)
-SCALAR_VISITOR_DEFAULT(Time64Scalar)
-SCALAR_VISITOR_DEFAULT(TimestampScalar)
-SCALAR_VISITOR_DEFAULT(DayTimeIntervalScalar)
-SCALAR_VISITOR_DEFAULT(MonthIntervalScalar)
-SCALAR_VISITOR_DEFAULT(DurationScalar)
-SCALAR_VISITOR_DEFAULT(Decimal128Scalar)
+TYPE_VISITOR_DEFAULT(ListType)
+TYPE_VISITOR_DEFAULT(LargeListType)
+TYPE_VISITOR_DEFAULT(MapType)
+TYPE_VISITOR_DEFAULT(FixedSizeListType)
+TYPE_VISITOR_DEFAULT(StructType)
+TYPE_VISITOR_DEFAULT(SparseUnionType)
+TYPE_VISITOR_DEFAULT(DenseUnionType)
+TYPE_VISITOR_DEFAULT(DictionaryType)
+TYPE_VISITOR_DEFAULT(ExtensionType)
+
+#undef TYPE_VISITOR_DEFAULT
+
+// ----------------------------------------------------------------------
+// Default implementations of ScalarVisitor methods
+
+#define SCALAR_VISITOR_DEFAULT(SCALAR_CLASS) \
+ Status ScalarVisitor::Visit(const SCALAR_CLASS& scalar) { \
+ return Status::NotImplemented( \
+ "ScalarVisitor not implemented for " ARROW_STRINGIFY(SCALAR_CLASS)); \
+ }
+
+SCALAR_VISITOR_DEFAULT(NullScalar)
+SCALAR_VISITOR_DEFAULT(BooleanScalar)
+SCALAR_VISITOR_DEFAULT(Int8Scalar)
+SCALAR_VISITOR_DEFAULT(Int16Scalar)
+SCALAR_VISITOR_DEFAULT(Int32Scalar)
+SCALAR_VISITOR_DEFAULT(Int64Scalar)
+SCALAR_VISITOR_DEFAULT(UInt8Scalar)
+SCALAR_VISITOR_DEFAULT(UInt16Scalar)
+SCALAR_VISITOR_DEFAULT(UInt32Scalar)
+SCALAR_VISITOR_DEFAULT(UInt64Scalar)
+SCALAR_VISITOR_DEFAULT(HalfFloatScalar)
+SCALAR_VISITOR_DEFAULT(FloatScalar)
+SCALAR_VISITOR_DEFAULT(DoubleScalar)
+SCALAR_VISITOR_DEFAULT(StringScalar)
+SCALAR_VISITOR_DEFAULT(BinaryScalar)
+SCALAR_VISITOR_DEFAULT(LargeStringScalar)
+SCALAR_VISITOR_DEFAULT(LargeBinaryScalar)
+SCALAR_VISITOR_DEFAULT(FixedSizeBinaryScalar)
+SCALAR_VISITOR_DEFAULT(Date64Scalar)
+SCALAR_VISITOR_DEFAULT(Date32Scalar)
+SCALAR_VISITOR_DEFAULT(Time32Scalar)
+SCALAR_VISITOR_DEFAULT(Time64Scalar)
+SCALAR_VISITOR_DEFAULT(TimestampScalar)
+SCALAR_VISITOR_DEFAULT(DayTimeIntervalScalar)
+SCALAR_VISITOR_DEFAULT(MonthIntervalScalar)
+SCALAR_VISITOR_DEFAULT(DurationScalar)
+SCALAR_VISITOR_DEFAULT(Decimal128Scalar)
SCALAR_VISITOR_DEFAULT(Decimal256Scalar)
-SCALAR_VISITOR_DEFAULT(ListScalar)
-SCALAR_VISITOR_DEFAULT(LargeListScalar)
-SCALAR_VISITOR_DEFAULT(MapScalar)
-SCALAR_VISITOR_DEFAULT(FixedSizeListScalar)
-SCALAR_VISITOR_DEFAULT(StructScalar)
-SCALAR_VISITOR_DEFAULT(DictionaryScalar)
-
-#undef SCALAR_VISITOR_DEFAULT
-
-} // namespace arrow
+SCALAR_VISITOR_DEFAULT(ListScalar)
+SCALAR_VISITOR_DEFAULT(LargeListScalar)
+SCALAR_VISITOR_DEFAULT(MapScalar)
+SCALAR_VISITOR_DEFAULT(FixedSizeListScalar)
+SCALAR_VISITOR_DEFAULT(StructScalar)
+SCALAR_VISITOR_DEFAULT(DictionaryScalar)
+
+#undef SCALAR_VISITOR_DEFAULT
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
index 0382e461199..f9966454523 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
@@ -1,152 +1,152 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/status.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/visibility.h"
-
-namespace arrow {
-
-class ARROW_EXPORT ArrayVisitor {
- public:
- virtual ~ArrayVisitor() = default;
-
- virtual Status Visit(const NullArray& array);
- virtual Status Visit(const BooleanArray& array);
- virtual Status Visit(const Int8Array& array);
- virtual Status Visit(const Int16Array& array);
- virtual Status Visit(const Int32Array& array);
- virtual Status Visit(const Int64Array& array);
- virtual Status Visit(const UInt8Array& array);
- virtual Status Visit(const UInt16Array& array);
- virtual Status Visit(const UInt32Array& array);
- virtual Status Visit(const UInt64Array& array);
- virtual Status Visit(const HalfFloatArray& array);
- virtual Status Visit(const FloatArray& array);
- virtual Status Visit(const DoubleArray& array);
- virtual Status Visit(const StringArray& array);
- virtual Status Visit(const BinaryArray& array);
- virtual Status Visit(const LargeStringArray& array);
- virtual Status Visit(const LargeBinaryArray& array);
- virtual Status Visit(const FixedSizeBinaryArray& array);
- virtual Status Visit(const Date32Array& array);
- virtual Status Visit(const Date64Array& array);
- virtual Status Visit(const Time32Array& array);
- virtual Status Visit(const Time64Array& array);
- virtual Status Visit(const TimestampArray& array);
- virtual Status Visit(const DayTimeIntervalArray& array);
- virtual Status Visit(const MonthIntervalArray& array);
- virtual Status Visit(const DurationArray& array);
- virtual Status Visit(const Decimal128Array& array);
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class ARROW_EXPORT ArrayVisitor {
+ public:
+ virtual ~ArrayVisitor() = default;
+
+ virtual Status Visit(const NullArray& array);
+ virtual Status Visit(const BooleanArray& array);
+ virtual Status Visit(const Int8Array& array);
+ virtual Status Visit(const Int16Array& array);
+ virtual Status Visit(const Int32Array& array);
+ virtual Status Visit(const Int64Array& array);
+ virtual Status Visit(const UInt8Array& array);
+ virtual Status Visit(const UInt16Array& array);
+ virtual Status Visit(const UInt32Array& array);
+ virtual Status Visit(const UInt64Array& array);
+ virtual Status Visit(const HalfFloatArray& array);
+ virtual Status Visit(const FloatArray& array);
+ virtual Status Visit(const DoubleArray& array);
+ virtual Status Visit(const StringArray& array);
+ virtual Status Visit(const BinaryArray& array);
+ virtual Status Visit(const LargeStringArray& array);
+ virtual Status Visit(const LargeBinaryArray& array);
+ virtual Status Visit(const FixedSizeBinaryArray& array);
+ virtual Status Visit(const Date32Array& array);
+ virtual Status Visit(const Date64Array& array);
+ virtual Status Visit(const Time32Array& array);
+ virtual Status Visit(const Time64Array& array);
+ virtual Status Visit(const TimestampArray& array);
+ virtual Status Visit(const DayTimeIntervalArray& array);
+ virtual Status Visit(const MonthIntervalArray& array);
+ virtual Status Visit(const DurationArray& array);
+ virtual Status Visit(const Decimal128Array& array);
virtual Status Visit(const Decimal256Array& array);
- virtual Status Visit(const ListArray& array);
- virtual Status Visit(const LargeListArray& array);
- virtual Status Visit(const MapArray& array);
- virtual Status Visit(const FixedSizeListArray& array);
- virtual Status Visit(const StructArray& array);
- virtual Status Visit(const SparseUnionArray& array);
- virtual Status Visit(const DenseUnionArray& array);
- virtual Status Visit(const DictionaryArray& array);
- virtual Status Visit(const ExtensionArray& array);
-};
-
-class ARROW_EXPORT TypeVisitor {
- public:
- virtual ~TypeVisitor() = default;
-
- virtual Status Visit(const NullType& type);
- virtual Status Visit(const BooleanType& type);
- virtual Status Visit(const Int8Type& type);
- virtual Status Visit(const Int16Type& type);
- virtual Status Visit(const Int32Type& type);
- virtual Status Visit(const Int64Type& type);
- virtual Status Visit(const UInt8Type& type);
- virtual Status Visit(const UInt16Type& type);
- virtual Status Visit(const UInt32Type& type);
- virtual Status Visit(const UInt64Type& type);
- virtual Status Visit(const HalfFloatType& type);
- virtual Status Visit(const FloatType& type);
- virtual Status Visit(const DoubleType& type);
- virtual Status Visit(const StringType& type);
- virtual Status Visit(const BinaryType& type);
- virtual Status Visit(const LargeStringType& type);
- virtual Status Visit(const LargeBinaryType& type);
- virtual Status Visit(const FixedSizeBinaryType& type);
- virtual Status Visit(const Date64Type& type);
- virtual Status Visit(const Date32Type& type);
- virtual Status Visit(const Time32Type& type);
- virtual Status Visit(const Time64Type& type);
- virtual Status Visit(const TimestampType& type);
- virtual Status Visit(const MonthIntervalType& type);
- virtual Status Visit(const DayTimeIntervalType& type);
- virtual Status Visit(const DurationType& type);
- virtual Status Visit(const Decimal128Type& type);
+ virtual Status Visit(const ListArray& array);
+ virtual Status Visit(const LargeListArray& array);
+ virtual Status Visit(const MapArray& array);
+ virtual Status Visit(const FixedSizeListArray& array);
+ virtual Status Visit(const StructArray& array);
+ virtual Status Visit(const SparseUnionArray& array);
+ virtual Status Visit(const DenseUnionArray& array);
+ virtual Status Visit(const DictionaryArray& array);
+ virtual Status Visit(const ExtensionArray& array);
+};
+
+class ARROW_EXPORT TypeVisitor {
+ public:
+ virtual ~TypeVisitor() = default;
+
+ virtual Status Visit(const NullType& type);
+ virtual Status Visit(const BooleanType& type);
+ virtual Status Visit(const Int8Type& type);
+ virtual Status Visit(const Int16Type& type);
+ virtual Status Visit(const Int32Type& type);
+ virtual Status Visit(const Int64Type& type);
+ virtual Status Visit(const UInt8Type& type);
+ virtual Status Visit(const UInt16Type& type);
+ virtual Status Visit(const UInt32Type& type);
+ virtual Status Visit(const UInt64Type& type);
+ virtual Status Visit(const HalfFloatType& type);
+ virtual Status Visit(const FloatType& type);
+ virtual Status Visit(const DoubleType& type);
+ virtual Status Visit(const StringType& type);
+ virtual Status Visit(const BinaryType& type);
+ virtual Status Visit(const LargeStringType& type);
+ virtual Status Visit(const LargeBinaryType& type);
+ virtual Status Visit(const FixedSizeBinaryType& type);
+ virtual Status Visit(const Date64Type& type);
+ virtual Status Visit(const Date32Type& type);
+ virtual Status Visit(const Time32Type& type);
+ virtual Status Visit(const Time64Type& type);
+ virtual Status Visit(const TimestampType& type);
+ virtual Status Visit(const MonthIntervalType& type);
+ virtual Status Visit(const DayTimeIntervalType& type);
+ virtual Status Visit(const DurationType& type);
+ virtual Status Visit(const Decimal128Type& type);
virtual Status Visit(const Decimal256Type& type);
- virtual Status Visit(const ListType& type);
- virtual Status Visit(const LargeListType& type);
- virtual Status Visit(const MapType& type);
- virtual Status Visit(const FixedSizeListType& type);
- virtual Status Visit(const StructType& type);
- virtual Status Visit(const SparseUnionType& type);
- virtual Status Visit(const DenseUnionType& type);
- virtual Status Visit(const DictionaryType& type);
- virtual Status Visit(const ExtensionType& type);
-};
-
-class ARROW_EXPORT ScalarVisitor {
- public:
- virtual ~ScalarVisitor() = default;
-
- virtual Status Visit(const NullScalar& scalar);
- virtual Status Visit(const BooleanScalar& scalar);
- virtual Status Visit(const Int8Scalar& scalar);
- virtual Status Visit(const Int16Scalar& scalar);
- virtual Status Visit(const Int32Scalar& scalar);
- virtual Status Visit(const Int64Scalar& scalar);
- virtual Status Visit(const UInt8Scalar& scalar);
- virtual Status Visit(const UInt16Scalar& scalar);
- virtual Status Visit(const UInt32Scalar& scalar);
- virtual Status Visit(const UInt64Scalar& scalar);
- virtual Status Visit(const HalfFloatScalar& scalar);
- virtual Status Visit(const FloatScalar& scalar);
- virtual Status Visit(const DoubleScalar& scalar);
- virtual Status Visit(const StringScalar& scalar);
- virtual Status Visit(const BinaryScalar& scalar);
- virtual Status Visit(const LargeStringScalar& scalar);
- virtual Status Visit(const LargeBinaryScalar& scalar);
- virtual Status Visit(const FixedSizeBinaryScalar& scalar);
- virtual Status Visit(const Date64Scalar& scalar);
- virtual Status Visit(const Date32Scalar& scalar);
- virtual Status Visit(const Time32Scalar& scalar);
- virtual Status Visit(const Time64Scalar& scalar);
- virtual Status Visit(const TimestampScalar& scalar);
- virtual Status Visit(const DayTimeIntervalScalar& scalar);
- virtual Status Visit(const MonthIntervalScalar& scalar);
- virtual Status Visit(const DurationScalar& scalar);
- virtual Status Visit(const Decimal128Scalar& scalar);
+ virtual Status Visit(const ListType& type);
+ virtual Status Visit(const LargeListType& type);
+ virtual Status Visit(const MapType& type);
+ virtual Status Visit(const FixedSizeListType& type);
+ virtual Status Visit(const StructType& type);
+ virtual Status Visit(const SparseUnionType& type);
+ virtual Status Visit(const DenseUnionType& type);
+ virtual Status Visit(const DictionaryType& type);
+ virtual Status Visit(const ExtensionType& type);
+};
+
+class ARROW_EXPORT ScalarVisitor {
+ public:
+ virtual ~ScalarVisitor() = default;
+
+ virtual Status Visit(const NullScalar& scalar);
+ virtual Status Visit(const BooleanScalar& scalar);
+ virtual Status Visit(const Int8Scalar& scalar);
+ virtual Status Visit(const Int16Scalar& scalar);
+ virtual Status Visit(const Int32Scalar& scalar);
+ virtual Status Visit(const Int64Scalar& scalar);
+ virtual Status Visit(const UInt8Scalar& scalar);
+ virtual Status Visit(const UInt16Scalar& scalar);
+ virtual Status Visit(const UInt32Scalar& scalar);
+ virtual Status Visit(const UInt64Scalar& scalar);
+ virtual Status Visit(const HalfFloatScalar& scalar);
+ virtual Status Visit(const FloatScalar& scalar);
+ virtual Status Visit(const DoubleScalar& scalar);
+ virtual Status Visit(const StringScalar& scalar);
+ virtual Status Visit(const BinaryScalar& scalar);
+ virtual Status Visit(const LargeStringScalar& scalar);
+ virtual Status Visit(const LargeBinaryScalar& scalar);
+ virtual Status Visit(const FixedSizeBinaryScalar& scalar);
+ virtual Status Visit(const Date64Scalar& scalar);
+ virtual Status Visit(const Date32Scalar& scalar);
+ virtual Status Visit(const Time32Scalar& scalar);
+ virtual Status Visit(const Time64Scalar& scalar);
+ virtual Status Visit(const TimestampScalar& scalar);
+ virtual Status Visit(const DayTimeIntervalScalar& scalar);
+ virtual Status Visit(const MonthIntervalScalar& scalar);
+ virtual Status Visit(const DurationScalar& scalar);
+ virtual Status Visit(const Decimal128Scalar& scalar);
virtual Status Visit(const Decimal256Scalar& scalar);
- virtual Status Visit(const ListScalar& scalar);
- virtual Status Visit(const LargeListScalar& scalar);
- virtual Status Visit(const MapScalar& scalar);
- virtual Status Visit(const FixedSizeListScalar& scalar);
- virtual Status Visit(const StructScalar& scalar);
- virtual Status Visit(const DictionaryScalar& scalar);
-};
-
-} // namespace arrow
+ virtual Status Visit(const ListScalar& scalar);
+ virtual Status Visit(const LargeListScalar& scalar);
+ virtual Status Visit(const MapScalar& scalar);
+ virtual Status Visit(const FixedSizeListScalar& scalar);
+ virtual Status Visit(const StructScalar& scalar);
+ virtual Status Visit(const DictionaryScalar& scalar);
+};
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h b/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
index 132c35aeaa1..2b22e4af5f9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
@@ -1,449 +1,449 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Private header, not to be exported
-
-#pragma once
-
-#include <utility>
-
-#include "arrow/array.h"
-#include "arrow/extension_type.h"
-#include "arrow/scalar.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_util.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/functional.h"
-#include "arrow/util/string_view.h"
-
-namespace arrow {
-
-#define ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(ACTION) \
- ACTION(Int8); \
- ACTION(UInt8); \
- ACTION(Int16); \
- ACTION(UInt16); \
- ACTION(Int32); \
- ACTION(UInt32); \
- ACTION(Int64); \
- ACTION(UInt64)
-
-#define ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(ACTION) \
- ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(ACTION); \
- ACTION(HalfFloat); \
- ACTION(Float); \
- ACTION(Double)
-
-#define ARROW_GENERATE_FOR_ALL_TYPES(ACTION) \
- ACTION(Null); \
- ACTION(Boolean); \
- ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(ACTION); \
- ACTION(String); \
- ACTION(Binary); \
- ACTION(LargeString); \
- ACTION(LargeBinary); \
- ACTION(FixedSizeBinary); \
- ACTION(Duration); \
- ACTION(Date32); \
- ACTION(Date64); \
- ACTION(Timestamp); \
- ACTION(Time32); \
- ACTION(Time64); \
- ACTION(MonthInterval); \
- ACTION(DayTimeInterval); \
- ACTION(Decimal128); \
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Private header, not to be exported
+
+#pragma once
+
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/extension_type.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+
+#define ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(ACTION) \
+ ACTION(Int8); \
+ ACTION(UInt8); \
+ ACTION(Int16); \
+ ACTION(UInt16); \
+ ACTION(Int32); \
+ ACTION(UInt32); \
+ ACTION(Int64); \
+ ACTION(UInt64)
+
+#define ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(ACTION) \
+ ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(ACTION); \
+ ACTION(HalfFloat); \
+ ACTION(Float); \
+ ACTION(Double)
+
+#define ARROW_GENERATE_FOR_ALL_TYPES(ACTION) \
+ ACTION(Null); \
+ ACTION(Boolean); \
+ ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(ACTION); \
+ ACTION(String); \
+ ACTION(Binary); \
+ ACTION(LargeString); \
+ ACTION(LargeBinary); \
+ ACTION(FixedSizeBinary); \
+ ACTION(Duration); \
+ ACTION(Date32); \
+ ACTION(Date64); \
+ ACTION(Timestamp); \
+ ACTION(Time32); \
+ ACTION(Time64); \
+ ACTION(MonthInterval); \
+ ACTION(DayTimeInterval); \
+ ACTION(Decimal128); \
ACTION(Decimal256); \
- ACTION(List); \
- ACTION(LargeList); \
- ACTION(Map); \
- ACTION(FixedSizeList); \
- ACTION(Struct); \
- ACTION(SparseUnion); \
- ACTION(DenseUnion); \
- ACTION(Dictionary); \
- ACTION(Extension)
-
-#define TYPE_VISIT_INLINE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
- return visitor->Visit(internal::checked_cast<const TYPE_CLASS##Type&>(type));
-
-template <typename VISITOR>
-inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) {
- switch (type.id()) {
- ARROW_GENERATE_FOR_ALL_TYPES(TYPE_VISIT_INLINE);
- default:
- break;
- }
- return Status::NotImplemented("Type not implemented");
-}
-
-#undef TYPE_VISIT_INLINE
-
-#define TYPE_ID_VISIT_INLINE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: { \
- const TYPE_CLASS##Type* concrete_ptr = nullptr; \
- return visitor->Visit(concrete_ptr); \
- }
-
-// Calls `visitor` with a nullptr of the corresponding concrete type class
-template <typename VISITOR>
-inline Status VisitTypeIdInline(Type::type id, VISITOR* visitor) {
- switch (id) {
- ARROW_GENERATE_FOR_ALL_TYPES(TYPE_ID_VISIT_INLINE);
- default:
- break;
- }
- return Status::NotImplemented("Type not implemented");
-}
-
-#undef TYPE_ID_VISIT_INLINE
-
-#define ARRAY_VISIT_INLINE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
- return visitor->Visit( \
- internal::checked_cast<const typename TypeTraits<TYPE_CLASS##Type>::ArrayType&>( \
- array));
-
-template <typename VISITOR>
-inline Status VisitArrayInline(const Array& array, VISITOR* visitor) {
- switch (array.type_id()) {
- ARROW_GENERATE_FOR_ALL_TYPES(ARRAY_VISIT_INLINE);
- default:
- break;
- }
- return Status::NotImplemented("Type not implemented");
-}
-
-namespace internal {
-
-template <typename T, typename Enable = void>
-struct ArrayDataInlineVisitor {};
-
-// Numeric and primitive C-compatible types
-template <typename T>
-struct ArrayDataInlineVisitor<T, enable_if_has_c_type<T>> {
- using c_type = typename T::c_type;
-
- template <typename ValidFunc, typename NullFunc>
- static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- const c_type* data = arr.GetValues<c_type>(1);
- auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
- return VisitBitBlocks(arr.buffers[0], arr.offset, arr.length, std::move(visit_valid),
- std::forward<NullFunc>(null_func));
- }
-
- template <typename ValidFunc, typename NullFunc>
- static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- using c_type = typename T::c_type;
- const c_type* data = arr.GetValues<c_type>(1);
- auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
- VisitBitBlocksVoid(arr.buffers[0], arr.offset, arr.length, std::move(visit_valid),
- std::forward<NullFunc>(null_func));
- }
-};
-
-// Boolean
-template <>
-struct ArrayDataInlineVisitor<BooleanType> {
- using c_type = bool;
-
- template <typename ValidFunc, typename NullFunc>
- static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- int64_t offset = arr.offset;
- const uint8_t* data = arr.buffers[1]->data();
- return VisitBitBlocks(
- arr.buffers[0], offset, arr.length,
- [&](int64_t i) { return valid_func(BitUtil::GetBit(data, offset + i)); },
- std::forward<NullFunc>(null_func));
- }
-
- template <typename ValidFunc, typename NullFunc>
- static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- int64_t offset = arr.offset;
- const uint8_t* data = arr.buffers[1]->data();
- VisitBitBlocksVoid(
- arr.buffers[0], offset, arr.length,
- [&](int64_t i) { valid_func(BitUtil::GetBit(data, offset + i)); },
- std::forward<NullFunc>(null_func));
- }
-};
-
-// Binary, String...
-template <typename T>
-struct ArrayDataInlineVisitor<T, enable_if_base_binary<T>> {
- using c_type = util::string_view;
-
- template <typename ValidFunc, typename NullFunc>
- static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- using offset_type = typename T::offset_type;
- constexpr char empty_value = 0;
-
+ ACTION(List); \
+ ACTION(LargeList); \
+ ACTION(Map); \
+ ACTION(FixedSizeList); \
+ ACTION(Struct); \
+ ACTION(SparseUnion); \
+ ACTION(DenseUnion); \
+ ACTION(Dictionary); \
+ ACTION(Extension)
+
+#define TYPE_VISIT_INLINE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: \
+ return visitor->Visit(internal::checked_cast<const TYPE_CLASS##Type&>(type));
+
+template <typename VISITOR>
+inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) {
+ switch (type.id()) {
+ ARROW_GENERATE_FOR_ALL_TYPES(TYPE_VISIT_INLINE);
+ default:
+ break;
+ }
+ return Status::NotImplemented("Type not implemented");
+}
+
+#undef TYPE_VISIT_INLINE
+
+#define TYPE_ID_VISIT_INLINE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: { \
+ const TYPE_CLASS##Type* concrete_ptr = nullptr; \
+ return visitor->Visit(concrete_ptr); \
+ }
+
+// Calls `visitor` with a nullptr of the corresponding concrete type class
+template <typename VISITOR>
+inline Status VisitTypeIdInline(Type::type id, VISITOR* visitor) {
+ switch (id) {
+ ARROW_GENERATE_FOR_ALL_TYPES(TYPE_ID_VISIT_INLINE);
+ default:
+ break;
+ }
+ return Status::NotImplemented("Type not implemented");
+}
+
+#undef TYPE_ID_VISIT_INLINE
+
+#define ARRAY_VISIT_INLINE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: \
+ return visitor->Visit( \
+ internal::checked_cast<const typename TypeTraits<TYPE_CLASS##Type>::ArrayType&>( \
+ array));
+
+template <typename VISITOR>
+inline Status VisitArrayInline(const Array& array, VISITOR* visitor) {
+ switch (array.type_id()) {
+ ARROW_GENERATE_FOR_ALL_TYPES(ARRAY_VISIT_INLINE);
+ default:
+ break;
+ }
+ return Status::NotImplemented("Type not implemented");
+}
+
+namespace internal {
+
+template <typename T, typename Enable = void>
+struct ArrayDataInlineVisitor {};
+
+// Numeric and primitive C-compatible types
+template <typename T>
+struct ArrayDataInlineVisitor<T, enable_if_has_c_type<T>> {
+ using c_type = typename T::c_type;
+
+ template <typename ValidFunc, typename NullFunc>
+ static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ const c_type* data = arr.GetValues<c_type>(1);
+ auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
+ return VisitBitBlocks(arr.buffers[0], arr.offset, arr.length, std::move(visit_valid),
+ std::forward<NullFunc>(null_func));
+ }
+
+ template <typename ValidFunc, typename NullFunc>
+ static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ using c_type = typename T::c_type;
+ const c_type* data = arr.GetValues<c_type>(1);
+ auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
+ VisitBitBlocksVoid(arr.buffers[0], arr.offset, arr.length, std::move(visit_valid),
+ std::forward<NullFunc>(null_func));
+ }
+};
+
+// Boolean
+template <>
+struct ArrayDataInlineVisitor<BooleanType> {
+ using c_type = bool;
+
+ template <typename ValidFunc, typename NullFunc>
+ static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ int64_t offset = arr.offset;
+ const uint8_t* data = arr.buffers[1]->data();
+ return VisitBitBlocks(
+ arr.buffers[0], offset, arr.length,
+ [&](int64_t i) { return valid_func(BitUtil::GetBit(data, offset + i)); },
+ std::forward<NullFunc>(null_func));
+ }
+
+ template <typename ValidFunc, typename NullFunc>
+ static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ int64_t offset = arr.offset;
+ const uint8_t* data = arr.buffers[1]->data();
+ VisitBitBlocksVoid(
+ arr.buffers[0], offset, arr.length,
+ [&](int64_t i) { valid_func(BitUtil::GetBit(data, offset + i)); },
+ std::forward<NullFunc>(null_func));
+ }
+};
+
+// Binary, String...
+template <typename T>
+struct ArrayDataInlineVisitor<T, enable_if_base_binary<T>> {
+ using c_type = util::string_view;
+
+ template <typename ValidFunc, typename NullFunc>
+ static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ using offset_type = typename T::offset_type;
+ constexpr char empty_value = 0;
+
if (arr.length == 0) {
return Status::OK();
}
- const offset_type* offsets = arr.GetValues<offset_type>(1);
- const char* data;
- if (!arr.buffers[2]) {
- data = &empty_value;
- } else {
- // Do not apply the array offset to the values array; the value_offsets
- // index the non-sliced values array.
- data = arr.GetValues<char>(2, /*absolute_offset=*/0);
- }
- offset_type cur_offset = *offsets++;
- return VisitBitBlocks(
- arr.buffers[0], arr.offset, arr.length,
- [&](int64_t i) {
- ARROW_UNUSED(i);
- auto value = util::string_view(data + cur_offset, *offsets - cur_offset);
- cur_offset = *offsets++;
- return valid_func(value);
- },
- [&]() {
- cur_offset = *offsets++;
- return null_func();
- });
- }
-
- template <typename ValidFunc, typename NullFunc>
- static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- using offset_type = typename T::offset_type;
- constexpr uint8_t empty_value = 0;
-
+ const offset_type* offsets = arr.GetValues<offset_type>(1);
+ const char* data;
+ if (!arr.buffers[2]) {
+ data = &empty_value;
+ } else {
+ // Do not apply the array offset to the values array; the value_offsets
+ // index the non-sliced values array.
+ data = arr.GetValues<char>(2, /*absolute_offset=*/0);
+ }
+ offset_type cur_offset = *offsets++;
+ return VisitBitBlocks(
+ arr.buffers[0], arr.offset, arr.length,
+ [&](int64_t i) {
+ ARROW_UNUSED(i);
+ auto value = util::string_view(data + cur_offset, *offsets - cur_offset);
+ cur_offset = *offsets++;
+ return valid_func(value);
+ },
+ [&]() {
+ cur_offset = *offsets++;
+ return null_func();
+ });
+ }
+
+ template <typename ValidFunc, typename NullFunc>
+ static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ using offset_type = typename T::offset_type;
+ constexpr uint8_t empty_value = 0;
+
if (arr.length == 0) {
return;
}
- const offset_type* offsets = arr.GetValues<offset_type>(1);
- const uint8_t* data;
- if (!arr.buffers[2]) {
- data = &empty_value;
- } else {
- // Do not apply the array offset to the values array; the value_offsets
- // index the non-sliced values array.
- data = arr.GetValues<uint8_t>(2, /*absolute_offset=*/0);
- }
-
- VisitBitBlocksVoid(
- arr.buffers[0], arr.offset, arr.length,
- [&](int64_t i) {
- auto value = util::string_view(reinterpret_cast<const char*>(data + offsets[i]),
- offsets[i + 1] - offsets[i]);
- valid_func(value);
- },
- std::forward<NullFunc>(null_func));
- }
-};
-
-// FixedSizeBinary, Decimal128
-template <typename T>
-struct ArrayDataInlineVisitor<T, enable_if_fixed_size_binary<T>> {
- using c_type = util::string_view;
-
- template <typename ValidFunc, typename NullFunc>
- static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- const auto& fw_type = internal::checked_cast<const FixedSizeBinaryType&>(*arr.type);
-
- const int32_t byte_width = fw_type.byte_width();
- const char* data = arr.GetValues<char>(1,
- /*absolute_offset=*/arr.offset * byte_width);
-
- return VisitBitBlocks(
- arr.buffers[0], arr.offset, arr.length,
- [&](int64_t i) {
- auto value = util::string_view(data, byte_width);
- data += byte_width;
- return valid_func(value);
- },
- [&]() {
- data += byte_width;
- return null_func();
- });
- }
-
- template <typename ValidFunc, typename NullFunc>
- static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- const auto& fw_type = internal::checked_cast<const FixedSizeBinaryType&>(*arr.type);
-
- const int32_t byte_width = fw_type.byte_width();
- const char* data = arr.GetValues<char>(1,
- /*absolute_offset=*/arr.offset * byte_width);
-
- VisitBitBlocksVoid(
- arr.buffers[0], arr.offset, arr.length,
- [&](int64_t i) {
- valid_func(util::string_view(data, byte_width));
- data += byte_width;
- },
- [&]() {
- data += byte_width;
- null_func();
- });
- }
-};
-
-} // namespace internal
-
-// Visit an array's data values, in order, without overhead.
-//
-// The given `ValidFunc` should be a callable with either of these signatures:
-// - void(scalar_type)
-// - Status(scalar_type)
-//
-// The `NullFunc` should have the same return type as `ValidFunc`.
-//
-// ... where `scalar_type` depends on the array data type:
-// - the type's `c_type`, if any
-// - for boolean arrays, a `bool`
-// - for binary, string and fixed-size binary arrays, a `util::string_view`
-
-template <typename T, typename ValidFunc, typename NullFunc>
-typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
-VisitArrayDataInline(const ArrayData& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
- return internal::ArrayDataInlineVisitor<T>::VisitStatus(
- arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
-}
-
-template <typename T, typename ValidFunc, typename NullFunc>
-typename internal::call_traits::enable_if_return<ValidFunc, void>::type
-VisitArrayDataInline(const ArrayData& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
- return internal::ArrayDataInlineVisitor<T>::VisitVoid(
- arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
-}
-
-// Visit an array's data values, in order, without overhead.
-//
-// The Visit method's `visitor` argument should be an object with two public methods:
-// - Status VisitNull()
-// - Status VisitValue(<scalar>)
-//
-// The scalar value's type depends on the array data type:
-// - the type's `c_type`, if any
-// - for boolean arrays, a `bool`
-// - for binary, string and fixed-size binary arrays, a `util::string_view`
-
-template <typename T>
-struct ArrayDataVisitor {
- using InlineVisitorType = internal::ArrayDataInlineVisitor<T>;
- using c_type = typename InlineVisitorType::c_type;
-
- template <typename Visitor>
- static Status Visit(const ArrayData& arr, Visitor* visitor) {
- return InlineVisitorType::VisitStatus(
- arr, [visitor](c_type v) { return visitor->VisitValue(v); },
- [visitor]() { return visitor->VisitNull(); });
- }
-};
-
-#define SCALAR_VISIT_INLINE(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
- return visitor->Visit(internal::checked_cast<const TYPE_CLASS##Scalar&>(scalar));
-
-template <typename VISITOR>
-inline Status VisitScalarInline(const Scalar& scalar, VISITOR* visitor) {
- switch (scalar.type->id()) {
- ARROW_GENERATE_FOR_ALL_TYPES(SCALAR_VISIT_INLINE);
- default:
- break;
- }
- return Status::NotImplemented("Scalar visitor for type not implemented ",
- scalar.type->ToString());
-}
-
-#undef TYPE_VISIT_INLINE
-
-// Visit a null bitmap, in order, without overhead.
-//
-// The given `ValidFunc` should be a callable with either of these signatures:
-// - void()
-// - Status()
-//
-// The `NullFunc` should have the same return type as `ValidFunc`.
-
-template <typename ValidFunc, typename NullFunc>
-typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
-VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
- int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- ARROW_UNUSED(null_count);
- internal::OptionalBitBlockCounter bit_counter(valid_bits, valid_bits_offset,
- num_values);
- int64_t position = 0;
- int64_t offset_position = valid_bits_offset;
- while (position < num_values) {
- internal::BitBlockCount block = bit_counter.NextBlock();
- if (block.AllSet()) {
- for (int64_t i = 0; i < block.length; ++i) {
- ARROW_RETURN_NOT_OK(valid_func());
- }
- } else if (block.NoneSet()) {
- for (int64_t i = 0; i < block.length; ++i) {
- ARROW_RETURN_NOT_OK(null_func());
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i) {
- ARROW_RETURN_NOT_OK(BitUtil::GetBit(valid_bits, offset_position + i)
- ? valid_func()
- : null_func());
- }
- }
- position += block.length;
- offset_position += block.length;
- }
- return Status::OK();
-}
-
-template <typename ValidFunc, typename NullFunc>
-typename internal::call_traits::enable_if_return<ValidFunc, void>::type
-VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
- int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
- NullFunc&& null_func) {
- ARROW_UNUSED(null_count);
- internal::OptionalBitBlockCounter bit_counter(valid_bits, valid_bits_offset,
- num_values);
- int64_t position = 0;
- int64_t offset_position = valid_bits_offset;
- while (position < num_values) {
- internal::BitBlockCount block = bit_counter.NextBlock();
- if (block.AllSet()) {
- for (int64_t i = 0; i < block.length; ++i) {
- valid_func();
- }
- } else if (block.NoneSet()) {
- for (int64_t i = 0; i < block.length; ++i) {
- null_func();
- }
- } else {
- for (int64_t i = 0; i < block.length; ++i) {
- BitUtil::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
- }
- }
- position += block.length;
- offset_position += block.length;
- }
-}
-
-} // namespace arrow
+ const offset_type* offsets = arr.GetValues<offset_type>(1);
+ const uint8_t* data;
+ if (!arr.buffers[2]) {
+ data = &empty_value;
+ } else {
+ // Do not apply the array offset to the values array; the value_offsets
+ // index the non-sliced values array.
+ data = arr.GetValues<uint8_t>(2, /*absolute_offset=*/0);
+ }
+
+ VisitBitBlocksVoid(
+ arr.buffers[0], arr.offset, arr.length,
+ [&](int64_t i) {
+ auto value = util::string_view(reinterpret_cast<const char*>(data + offsets[i]),
+ offsets[i + 1] - offsets[i]);
+ valid_func(value);
+ },
+ std::forward<NullFunc>(null_func));
+ }
+};
+
+// FixedSizeBinary, Decimal128
+template <typename T>
+struct ArrayDataInlineVisitor<T, enable_if_fixed_size_binary<T>> {
+ using c_type = util::string_view;
+
+ template <typename ValidFunc, typename NullFunc>
+ static Status VisitStatus(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ const auto& fw_type = internal::checked_cast<const FixedSizeBinaryType&>(*arr.type);
+
+ const int32_t byte_width = fw_type.byte_width();
+ const char* data = arr.GetValues<char>(1,
+ /*absolute_offset=*/arr.offset * byte_width);
+
+ return VisitBitBlocks(
+ arr.buffers[0], arr.offset, arr.length,
+ [&](int64_t i) {
+ auto value = util::string_view(data, byte_width);
+ data += byte_width;
+ return valid_func(value);
+ },
+ [&]() {
+ data += byte_width;
+ return null_func();
+ });
+ }
+
+ template <typename ValidFunc, typename NullFunc>
+ static void VisitVoid(const ArrayData& arr, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ const auto& fw_type = internal::checked_cast<const FixedSizeBinaryType&>(*arr.type);
+
+ const int32_t byte_width = fw_type.byte_width();
+ const char* data = arr.GetValues<char>(1,
+ /*absolute_offset=*/arr.offset * byte_width);
+
+ VisitBitBlocksVoid(
+ arr.buffers[0], arr.offset, arr.length,
+ [&](int64_t i) {
+ valid_func(util::string_view(data, byte_width));
+ data += byte_width;
+ },
+ [&]() {
+ data += byte_width;
+ null_func();
+ });
+ }
+};
+
+} // namespace internal
+
+// Visit an array's data values, in order, without overhead.
+//
+// The given `ValidFunc` should be a callable with either of these signatures:
+// - void(scalar_type)
+// - Status(scalar_type)
+//
+// The `NullFunc` should have the same return type as `ValidFunc`.
+//
+// ... where `scalar_type` depends on the array data type:
+// - the type's `c_type`, if any
+// - for boolean arrays, a `bool`
+// - for binary, string and fixed-size binary arrays, a `util::string_view`
+
+template <typename T, typename ValidFunc, typename NullFunc>
+typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
+VisitArrayDataInline(const ArrayData& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
+ return internal::ArrayDataInlineVisitor<T>::VisitStatus(
+ arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
+}
+
+template <typename T, typename ValidFunc, typename NullFunc>
+typename internal::call_traits::enable_if_return<ValidFunc, void>::type
+VisitArrayDataInline(const ArrayData& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
+ return internal::ArrayDataInlineVisitor<T>::VisitVoid(
+ arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
+}
+
+// Visit an array's data values, in order, without overhead.
+//
+// The Visit method's `visitor` argument should be an object with two public methods:
+// - Status VisitNull()
+// - Status VisitValue(<scalar>)
+//
+// The scalar value's type depends on the array data type:
+// - the type's `c_type`, if any
+// - for boolean arrays, a `bool`
+// - for binary, string and fixed-size binary arrays, a `util::string_view`
+
+template <typename T>
+struct ArrayDataVisitor {
+ using InlineVisitorType = internal::ArrayDataInlineVisitor<T>;
+ using c_type = typename InlineVisitorType::c_type;
+
+ template <typename Visitor>
+ static Status Visit(const ArrayData& arr, Visitor* visitor) {
+ return InlineVisitorType::VisitStatus(
+ arr, [visitor](c_type v) { return visitor->VisitValue(v); },
+ [visitor]() { return visitor->VisitNull(); });
+ }
+};
+
+#define SCALAR_VISIT_INLINE(TYPE_CLASS) \
+ case TYPE_CLASS##Type::type_id: \
+ return visitor->Visit(internal::checked_cast<const TYPE_CLASS##Scalar&>(scalar));
+
+template <typename VISITOR>
+inline Status VisitScalarInline(const Scalar& scalar, VISITOR* visitor) {
+ switch (scalar.type->id()) {
+ ARROW_GENERATE_FOR_ALL_TYPES(SCALAR_VISIT_INLINE);
+ default:
+ break;
+ }
+ return Status::NotImplemented("Scalar visitor for type not implemented ",
+ scalar.type->ToString());
+}
+
+#undef TYPE_VISIT_INLINE
+
+// Visit a null bitmap, in order, without overhead.
+//
+// The given `ValidFunc` should be a callable with either of these signatures:
+// - void()
+// - Status()
+//
+// The `NullFunc` should have the same return type as `ValidFunc`.
+
+template <typename ValidFunc, typename NullFunc>
+typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
+VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
+ int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ ARROW_UNUSED(null_count);
+ internal::OptionalBitBlockCounter bit_counter(valid_bits, valid_bits_offset,
+ num_values);
+ int64_t position = 0;
+ int64_t offset_position = valid_bits_offset;
+ while (position < num_values) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i) {
+ ARROW_RETURN_NOT_OK(valid_func());
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i) {
+ ARROW_RETURN_NOT_OK(null_func());
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i) {
+ ARROW_RETURN_NOT_OK(BitUtil::GetBit(valid_bits, offset_position + i)
+ ? valid_func()
+ : null_func());
+ }
+ }
+ position += block.length;
+ offset_position += block.length;
+ }
+ return Status::OK();
+}
+
+template <typename ValidFunc, typename NullFunc>
+typename internal::call_traits::enable_if_return<ValidFunc, void>::type
+VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
+ int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
+ NullFunc&& null_func) {
+ ARROW_UNUSED(null_count);
+ internal::OptionalBitBlockCounter bit_counter(valid_bits, valid_bits_offset,
+ num_values);
+ int64_t position = 0;
+ int64_t offset_position = valid_bits_offset;
+ while (position < num_values) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i) {
+ valid_func();
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i) {
+ null_func();
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i) {
+ BitUtil::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
+ }
+ }
+ position += block.length;
+ offset_position += block.length;
+ }
+}
+
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/File_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/File_generated.h
index 06953c4a040..9467594c1b3 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/File_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/File_generated.h
@@ -1,200 +1,200 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
-#define FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-#include "Schema_generated.h"
-
-namespace org {
-namespace apache {
-namespace arrow {
-namespace flatbuf {
-
-struct Footer;
-struct FooterBuilder;
-
-struct Block;
-
-FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Block FLATBUFFERS_FINAL_CLASS {
- private:
- int64_t offset_;
- int32_t metaDataLength_;
- int32_t padding0__;
- int64_t bodyLength_;
-
- public:
- Block() {
- memset(static_cast<void *>(this), 0, sizeof(Block));
- }
- Block(int64_t _offset, int32_t _metaDataLength, int64_t _bodyLength)
- : offset_(flatbuffers::EndianScalar(_offset)),
- metaDataLength_(flatbuffers::EndianScalar(_metaDataLength)),
- padding0__(0),
- bodyLength_(flatbuffers::EndianScalar(_bodyLength)) {
- (void)padding0__;
- }
- /// Index to the start of the RecordBlock (note this is past the Message header)
- int64_t offset() const {
- return flatbuffers::EndianScalar(offset_);
- }
- /// Length of the metadata
- int32_t metaDataLength() const {
- return flatbuffers::EndianScalar(metaDataLength_);
- }
- /// Length of the data (this is aligned so there can be a gap between this and
- /// the metadata).
- int64_t bodyLength() const {
- return flatbuffers::EndianScalar(bodyLength_);
- }
-};
-FLATBUFFERS_STRUCT_END(Block, 24);
-
-/// ----------------------------------------------------------------------
-/// Arrow File metadata
-///
-struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef FooterBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_VERSION = 4,
- VT_SCHEMA = 6,
- VT_DICTIONARIES = 8,
- VT_RECORDBATCHES = 10,
- VT_CUSTOM_METADATA = 12
- };
- org::apache::arrow::flatbuf::MetadataVersion version() const {
- return static_cast<org::apache::arrow::flatbuf::MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
- }
- const org::apache::arrow::flatbuf::Schema *schema() const {
- return GetPointer<const org::apache::arrow::flatbuf::Schema *>(VT_SCHEMA);
- }
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *dictionaries() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *>(VT_DICTIONARIES);
- }
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *recordBatches() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *>(VT_RECORDBATCHES);
- }
- /// User-defined metadata
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_VERSION) &&
- VerifyOffset(verifier, VT_SCHEMA) &&
- verifier.VerifyTable(schema()) &&
- VerifyOffset(verifier, VT_DICTIONARIES) &&
- verifier.VerifyVector(dictionaries()) &&
- VerifyOffset(verifier, VT_RECORDBATCHES) &&
- verifier.VerifyVector(recordBatches()) &&
- VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
- verifier.VerifyVector(custom_metadata()) &&
- verifier.VerifyVectorOfTables(custom_metadata()) &&
- verifier.EndTable();
- }
-};
-
-struct FooterBuilder {
- typedef Footer Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_version(org::apache::arrow::flatbuf::MetadataVersion version) {
- fbb_.AddElement<int16_t>(Footer::VT_VERSION, static_cast<int16_t>(version), 0);
- }
- void add_schema(flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema) {
- fbb_.AddOffset(Footer::VT_SCHEMA, schema);
- }
- void add_dictionaries(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> dictionaries) {
- fbb_.AddOffset(Footer::VT_DICTIONARIES, dictionaries);
- }
- void add_recordBatches(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> recordBatches) {
- fbb_.AddOffset(Footer::VT_RECORDBATCHES, recordBatches);
- }
- void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
- fbb_.AddOffset(Footer::VT_CUSTOM_METADATA, custom_metadata);
- }
- explicit FooterBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- FooterBuilder &operator=(const FooterBuilder &);
- flatbuffers::Offset<Footer> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Footer>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Footer> CreateFooter(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> dictionaries = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> recordBatches = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
- FooterBuilder builder_(_fbb);
- builder_.add_custom_metadata(custom_metadata);
- builder_.add_recordBatches(recordBatches);
- builder_.add_dictionaries(dictionaries);
- builder_.add_schema(schema);
- builder_.add_version(version);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Footer> CreateFooterDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema = 0,
- const std::vector<org::apache::arrow::flatbuf::Block> *dictionaries = nullptr,
- const std::vector<org::apache::arrow::flatbuf::Block> *recordBatches = nullptr,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
- auto dictionaries__ = dictionaries ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Block>(*dictionaries) : 0;
- auto recordBatches__ = recordBatches ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Block>(*recordBatches) : 0;
- auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
- return org::apache::arrow::flatbuf::CreateFooter(
- _fbb,
- version,
- schema,
- dictionaries__,
- recordBatches__,
- custom_metadata__);
-}
-
-inline const org::apache::arrow::flatbuf::Footer *GetFooter(const void *buf) {
- return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Footer>(buf);
-}
-
-inline const org::apache::arrow::flatbuf::Footer *GetSizePrefixedFooter(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Footer>(buf);
-}
-
-inline bool VerifyFooterBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
-}
-
-inline bool VerifySizePrefixedFooterBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
-}
-
-inline void FinishFooterBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedFooterBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace flatbuf
-} // namespace arrow
-} // namespace apache
-} // namespace org
-
-#endif // FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
+#define FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+#include "Schema_generated.h"
+
+namespace org {
+namespace apache {
+namespace arrow {
+namespace flatbuf {
+
+struct Footer;
+struct FooterBuilder;
+
+struct Block;
+
+FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Block FLATBUFFERS_FINAL_CLASS {
+ private:
+ int64_t offset_;
+ int32_t metaDataLength_;
+ int32_t padding0__;
+ int64_t bodyLength_;
+
+ public:
+ Block() {
+ memset(static_cast<void *>(this), 0, sizeof(Block));
+ }
+ Block(int64_t _offset, int32_t _metaDataLength, int64_t _bodyLength)
+ : offset_(flatbuffers::EndianScalar(_offset)),
+ metaDataLength_(flatbuffers::EndianScalar(_metaDataLength)),
+ padding0__(0),
+ bodyLength_(flatbuffers::EndianScalar(_bodyLength)) {
+ (void)padding0__;
+ }
+ /// Index to the start of the RecordBlock (note this is past the Message header)
+ int64_t offset() const {
+ return flatbuffers::EndianScalar(offset_);
+ }
+ /// Length of the metadata
+ int32_t metaDataLength() const {
+ return flatbuffers::EndianScalar(metaDataLength_);
+ }
+ /// Length of the data (this is aligned so there can be a gap between this and
+ /// the metadata).
+ int64_t bodyLength() const {
+ return flatbuffers::EndianScalar(bodyLength_);
+ }
+};
+FLATBUFFERS_STRUCT_END(Block, 24);
+
+/// ----------------------------------------------------------------------
+/// Arrow File metadata
+///
+struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef FooterBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_VERSION = 4,
+ VT_SCHEMA = 6,
+ VT_DICTIONARIES = 8,
+ VT_RECORDBATCHES = 10,
+ VT_CUSTOM_METADATA = 12
+ };
+ org::apache::arrow::flatbuf::MetadataVersion version() const {
+ return static_cast<org::apache::arrow::flatbuf::MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
+ }
+ const org::apache::arrow::flatbuf::Schema *schema() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Schema *>(VT_SCHEMA);
+ }
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *dictionaries() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *>(VT_DICTIONARIES);
+ }
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *recordBatches() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *> *>(VT_RECORDBATCHES);
+ }
+ /// User-defined metadata
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_VERSION) &&
+ VerifyOffset(verifier, VT_SCHEMA) &&
+ verifier.VerifyTable(schema()) &&
+ VerifyOffset(verifier, VT_DICTIONARIES) &&
+ verifier.VerifyVector(dictionaries()) &&
+ VerifyOffset(verifier, VT_RECORDBATCHES) &&
+ verifier.VerifyVector(recordBatches()) &&
+ VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
+ verifier.VerifyVector(custom_metadata()) &&
+ verifier.VerifyVectorOfTables(custom_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+struct FooterBuilder {
+ typedef Footer Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_version(org::apache::arrow::flatbuf::MetadataVersion version) {
+ fbb_.AddElement<int16_t>(Footer::VT_VERSION, static_cast<int16_t>(version), 0);
+ }
+ void add_schema(flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema) {
+ fbb_.AddOffset(Footer::VT_SCHEMA, schema);
+ }
+ void add_dictionaries(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> dictionaries) {
+ fbb_.AddOffset(Footer::VT_DICTIONARIES, dictionaries);
+ }
+ void add_recordBatches(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> recordBatches) {
+ fbb_.AddOffset(Footer::VT_RECORDBATCHES, recordBatches);
+ }
+ void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
+ fbb_.AddOffset(Footer::VT_CUSTOM_METADATA, custom_metadata);
+ }
+ explicit FooterBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ FooterBuilder &operator=(const FooterBuilder &);
+ flatbuffers::Offset<Footer> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Footer>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Footer> CreateFooter(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> dictionaries = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Block *>> recordBatches = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
+ FooterBuilder builder_(_fbb);
+ builder_.add_custom_metadata(custom_metadata);
+ builder_.add_recordBatches(recordBatches);
+ builder_.add_dictionaries(dictionaries);
+ builder_.add_schema(schema);
+ builder_.add_version(version);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Footer> CreateFooterDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> schema = 0,
+ const std::vector<org::apache::arrow::flatbuf::Block> *dictionaries = nullptr,
+ const std::vector<org::apache::arrow::flatbuf::Block> *recordBatches = nullptr,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
+ auto dictionaries__ = dictionaries ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Block>(*dictionaries) : 0;
+ auto recordBatches__ = recordBatches ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Block>(*recordBatches) : 0;
+ auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
+ return org::apache::arrow::flatbuf::CreateFooter(
+ _fbb,
+ version,
+ schema,
+ dictionaries__,
+ recordBatches__,
+ custom_metadata__);
+}
+
+inline const org::apache::arrow::flatbuf::Footer *GetFooter(const void *buf) {
+ return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Footer>(buf);
+}
+
+inline const org::apache::arrow::flatbuf::Footer *GetSizePrefixedFooter(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Footer>(buf);
+}
+
+inline bool VerifyFooterBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
+}
+
+inline bool VerifySizePrefixedFooterBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
+}
+
+inline void FinishFooterBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedFooterBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace flatbuf
+} // namespace arrow
+} // namespace apache
+} // namespace org
+
+#endif // FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/Message_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/Message_generated.h
index 822bec9952b..aaac55801e2 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/Message_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/Message_generated.h
@@ -1,659 +1,659 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
-#define FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-#include "Schema_generated.h"
-#include "SparseTensor_generated.h"
-#include "Tensor_generated.h"
-
-namespace org {
-namespace apache {
-namespace arrow {
-namespace flatbuf {
-
-struct FieldNode;
-
-struct BodyCompression;
-struct BodyCompressionBuilder;
-
-struct RecordBatch;
-struct RecordBatchBuilder;
-
-struct DictionaryBatch;
-struct DictionaryBatchBuilder;
-
-struct Message;
-struct MessageBuilder;
-
-enum class CompressionType : int8_t {
- LZ4_FRAME = 0,
- ZSTD = 1,
- MIN = LZ4_FRAME,
- MAX = ZSTD
-};
-
-inline const CompressionType (&EnumValuesCompressionType())[2] {
- static const CompressionType values[] = {
- CompressionType::LZ4_FRAME,
- CompressionType::ZSTD
- };
- return values;
-}
-
-inline const char * const *EnumNamesCompressionType() {
- static const char * const names[3] = {
- "LZ4_FRAME",
- "ZSTD",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameCompressionType(CompressionType e) {
- if (flatbuffers::IsOutRange(e, CompressionType::LZ4_FRAME, CompressionType::ZSTD)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesCompressionType()[index];
-}
-
-/// Provided for forward compatibility in case we need to support different
-/// strategies for compressing the IPC message body (like whole-body
-/// compression rather than buffer-level) in the future
-enum class BodyCompressionMethod : int8_t {
- /// Each constituent buffer is first compressed with the indicated
- /// compressor, and then written with the uncompressed length in the first 8
- /// bytes as a 64-bit little-endian signed integer followed by the compressed
- /// buffer bytes (and then padding as required by the protocol). The
- /// uncompressed length may be set to -1 to indicate that the data that
- /// follows is not compressed, which can be useful for cases where
- /// compression does not yield appreciable savings.
- BUFFER = 0,
- MIN = BUFFER,
- MAX = BUFFER
-};
-
-inline const BodyCompressionMethod (&EnumValuesBodyCompressionMethod())[1] {
- static const BodyCompressionMethod values[] = {
- BodyCompressionMethod::BUFFER
- };
- return values;
-}
-
-inline const char * const *EnumNamesBodyCompressionMethod() {
- static const char * const names[2] = {
- "BUFFER",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameBodyCompressionMethod(BodyCompressionMethod e) {
- if (flatbuffers::IsOutRange(e, BodyCompressionMethod::BUFFER, BodyCompressionMethod::BUFFER)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesBodyCompressionMethod()[index];
-}
-
-/// ----------------------------------------------------------------------
-/// The root Message type
-/// This union enables us to easily send different message types without
-/// redundant storage, and in the future we can easily add new message types.
-///
-/// Arrow implementations do not need to implement all of the message types,
-/// which may include experimental metadata types. For maximum compatibility,
-/// it is best to send data using RecordBatch
-enum class MessageHeader : uint8_t {
- NONE = 0,
- Schema = 1,
- DictionaryBatch = 2,
- RecordBatch = 3,
- Tensor = 4,
- SparseTensor = 5,
- MIN = NONE,
- MAX = SparseTensor
-};
-
-inline const MessageHeader (&EnumValuesMessageHeader())[6] {
- static const MessageHeader values[] = {
- MessageHeader::NONE,
- MessageHeader::Schema,
- MessageHeader::DictionaryBatch,
- MessageHeader::RecordBatch,
- MessageHeader::Tensor,
- MessageHeader::SparseTensor
- };
- return values;
-}
-
-inline const char * const *EnumNamesMessageHeader() {
- static const char * const names[7] = {
- "NONE",
- "Schema",
- "DictionaryBatch",
- "RecordBatch",
- "Tensor",
- "SparseTensor",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameMessageHeader(MessageHeader e) {
- if (flatbuffers::IsOutRange(e, MessageHeader::NONE, MessageHeader::SparseTensor)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesMessageHeader()[index];
-}
-
-template<typename T> struct MessageHeaderTraits {
- static const MessageHeader enum_value = MessageHeader::NONE;
-};
-
-template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::Schema> {
- static const MessageHeader enum_value = MessageHeader::Schema;
-};
-
-template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::DictionaryBatch> {
- static const MessageHeader enum_value = MessageHeader::DictionaryBatch;
-};
-
-template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::RecordBatch> {
- static const MessageHeader enum_value = MessageHeader::RecordBatch;
-};
-
-template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::Tensor> {
- static const MessageHeader enum_value = MessageHeader::Tensor;
-};
-
-template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::SparseTensor> {
- static const MessageHeader enum_value = MessageHeader::SparseTensor;
-};
-
-bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type);
-bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-/// ----------------------------------------------------------------------
-/// Data structures for describing a table row batch (a collection of
-/// equal-length Arrow arrays)
-/// Metadata about a field at some level of a nested type tree (but not
-/// its children).
-///
-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
-/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
-/// null_count: 0} for its Int16 node, as separate FieldNode structs
-FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS {
- private:
- int64_t length_;
- int64_t null_count_;
-
- public:
- FieldNode() {
- memset(static_cast<void *>(this), 0, sizeof(FieldNode));
- }
- FieldNode(int64_t _length, int64_t _null_count)
- : length_(flatbuffers::EndianScalar(_length)),
- null_count_(flatbuffers::EndianScalar(_null_count)) {
- }
- /// The number of value slots in the Arrow array at this level of a nested
- /// tree
- int64_t length() const {
- return flatbuffers::EndianScalar(length_);
- }
- /// The number of observed nulls. Fields with null_count == 0 may choose not
- /// to write their physical validity bitmap out as a materialized buffer,
- /// instead setting the length of the bitmap buffer to 0.
- int64_t null_count() const {
- return flatbuffers::EndianScalar(null_count_);
- }
-};
-FLATBUFFERS_STRUCT_END(FieldNode, 16);
-
-/// Optional compression for the memory buffers constituting IPC message
-/// bodies. Intended for use with RecordBatch but could be used for other
-/// message types
-struct BodyCompression FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef BodyCompressionBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_CODEC = 4,
- VT_METHOD = 6
- };
- /// Compressor library
- org::apache::arrow::flatbuf::CompressionType codec() const {
- return static_cast<org::apache::arrow::flatbuf::CompressionType>(GetField<int8_t>(VT_CODEC, 0));
- }
- /// Indicates the way the record batch body was compressed
- org::apache::arrow::flatbuf::BodyCompressionMethod method() const {
- return static_cast<org::apache::arrow::flatbuf::BodyCompressionMethod>(GetField<int8_t>(VT_METHOD, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_CODEC) &&
- VerifyField<int8_t>(verifier, VT_METHOD) &&
- verifier.EndTable();
- }
-};
-
-struct BodyCompressionBuilder {
- typedef BodyCompression Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_codec(org::apache::arrow::flatbuf::CompressionType codec) {
- fbb_.AddElement<int8_t>(BodyCompression::VT_CODEC, static_cast<int8_t>(codec), 0);
- }
- void add_method(org::apache::arrow::flatbuf::BodyCompressionMethod method) {
- fbb_.AddElement<int8_t>(BodyCompression::VT_METHOD, static_cast<int8_t>(method), 0);
- }
- explicit BodyCompressionBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- BodyCompressionBuilder &operator=(const BodyCompressionBuilder &);
- flatbuffers::Offset<BodyCompression> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BodyCompression>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BodyCompression> CreateBodyCompression(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::CompressionType codec = org::apache::arrow::flatbuf::CompressionType::LZ4_FRAME,
- org::apache::arrow::flatbuf::BodyCompressionMethod method = org::apache::arrow::flatbuf::BodyCompressionMethod::BUFFER) {
- BodyCompressionBuilder builder_(_fbb);
- builder_.add_method(method);
- builder_.add_codec(codec);
- return builder_.Finish();
-}
-
-/// A data header describing the shared memory layout of a "record" or "row"
-/// batch. Some systems call this a "row batch" internally and others a "record
-/// batch".
-struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef RecordBatchBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_LENGTH = 4,
- VT_NODES = 6,
- VT_BUFFERS = 8,
- VT_COMPRESSION = 10
- };
- /// number of records / rows. The arrays in the batch should all have this
- /// length
- int64_t length() const {
- return GetField<int64_t>(VT_LENGTH, 0);
- }
- /// Nodes correspond to the pre-ordered flattened logical schema
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *> *nodes() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *> *>(VT_NODES);
- }
- /// Buffers correspond to the pre-ordered flattened buffer tree
- ///
- /// The number of buffers appended to this list depends on the schema. For
- /// example, most primitive arrays will have 2 buffers, 1 for the validity
- /// bitmap and 1 for the values. For struct arrays, there will only be a
- /// single buffer for the validity (nulls) bitmap
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *buffers() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_BUFFERS);
- }
- /// Optional compression of the message body
- const org::apache::arrow::flatbuf::BodyCompression *compression() const {
- return GetPointer<const org::apache::arrow::flatbuf::BodyCompression *>(VT_COMPRESSION);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int64_t>(verifier, VT_LENGTH) &&
- VerifyOffset(verifier, VT_NODES) &&
- verifier.VerifyVector(nodes()) &&
- VerifyOffset(verifier, VT_BUFFERS) &&
- verifier.VerifyVector(buffers()) &&
- VerifyOffset(verifier, VT_COMPRESSION) &&
- verifier.VerifyTable(compression()) &&
- verifier.EndTable();
- }
-};
-
-struct RecordBatchBuilder {
- typedef RecordBatch Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_length(int64_t length) {
- fbb_.AddElement<int64_t>(RecordBatch::VT_LENGTH, length, 0);
- }
- void add_nodes(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *>> nodes) {
- fbb_.AddOffset(RecordBatch::VT_NODES, nodes);
- }
- void add_buffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> buffers) {
- fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers);
- }
- void add_compression(flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression) {
- fbb_.AddOffset(RecordBatch::VT_COMPRESSION, compression);
- }
- explicit RecordBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- RecordBatchBuilder &operator=(const RecordBatchBuilder &);
- flatbuffers::Offset<RecordBatch> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<RecordBatch>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<RecordBatch> CreateRecordBatch(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t length = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *>> nodes = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> buffers = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression = 0) {
- RecordBatchBuilder builder_(_fbb);
- builder_.add_length(length);
- builder_.add_compression(compression);
- builder_.add_buffers(buffers);
- builder_.add_nodes(nodes);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<RecordBatch> CreateRecordBatchDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t length = 0,
- const std::vector<org::apache::arrow::flatbuf::FieldNode> *nodes = nullptr,
- const std::vector<org::apache::arrow::flatbuf::Buffer> *buffers = nullptr,
- flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression = 0) {
- auto nodes__ = nodes ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::FieldNode>(*nodes) : 0;
- auto buffers__ = buffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*buffers) : 0;
- return org::apache::arrow::flatbuf::CreateRecordBatch(
- _fbb,
- length,
- nodes__,
- buffers__,
- compression);
-}
-
-/// For sending dictionary encoding information. Any Field can be
-/// dictionary-encoded, but in this case none of its children may be
-/// dictionary-encoded.
-/// There is one vector / column per dictionary, but that vector / column
-/// may be spread across multiple dictionary batches by using the isDelta
-/// flag
-struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DictionaryBatchBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_ID = 4,
- VT_DATA = 6,
- VT_ISDELTA = 8
- };
- int64_t id() const {
- return GetField<int64_t>(VT_ID, 0);
- }
- const org::apache::arrow::flatbuf::RecordBatch *data() const {
- return GetPointer<const org::apache::arrow::flatbuf::RecordBatch *>(VT_DATA);
- }
- /// If isDelta is true the values in the dictionary are to be appended to a
- /// dictionary with the indicated id. If isDelta is false this dictionary
- /// should replace the existing dictionary.
- bool isDelta() const {
- return GetField<uint8_t>(VT_ISDELTA, 0) != 0;
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int64_t>(verifier, VT_ID) &&
- VerifyOffset(verifier, VT_DATA) &&
- verifier.VerifyTable(data()) &&
- VerifyField<uint8_t>(verifier, VT_ISDELTA) &&
- verifier.EndTable();
- }
-};
-
-struct DictionaryBatchBuilder {
- typedef DictionaryBatch Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_id(int64_t id) {
- fbb_.AddElement<int64_t>(DictionaryBatch::VT_ID, id, 0);
- }
- void add_data(flatbuffers::Offset<org::apache::arrow::flatbuf::RecordBatch> data) {
- fbb_.AddOffset(DictionaryBatch::VT_DATA, data);
- }
- void add_isDelta(bool isDelta) {
- fbb_.AddElement<uint8_t>(DictionaryBatch::VT_ISDELTA, static_cast<uint8_t>(isDelta), 0);
- }
- explicit DictionaryBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DictionaryBatchBuilder &operator=(const DictionaryBatchBuilder &);
- flatbuffers::Offset<DictionaryBatch> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DictionaryBatch>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DictionaryBatch> CreateDictionaryBatch(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t id = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::RecordBatch> data = 0,
- bool isDelta = false) {
- DictionaryBatchBuilder builder_(_fbb);
- builder_.add_id(id);
- builder_.add_data(data);
- builder_.add_isDelta(isDelta);
- return builder_.Finish();
-}
-
-struct Message FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef MessageBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_VERSION = 4,
- VT_HEADER_TYPE = 6,
- VT_HEADER = 8,
- VT_BODYLENGTH = 10,
- VT_CUSTOM_METADATA = 12
- };
- org::apache::arrow::flatbuf::MetadataVersion version() const {
- return static_cast<org::apache::arrow::flatbuf::MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
- }
- org::apache::arrow::flatbuf::MessageHeader header_type() const {
- return static_cast<org::apache::arrow::flatbuf::MessageHeader>(GetField<uint8_t>(VT_HEADER_TYPE, 0));
- }
- const void *header() const {
- return GetPointer<const void *>(VT_HEADER);
- }
- template<typename T> const T *header_as() const;
- const org::apache::arrow::flatbuf::Schema *header_as_Schema() const {
- return header_type() == org::apache::arrow::flatbuf::MessageHeader::Schema ? static_cast<const org::apache::arrow::flatbuf::Schema *>(header()) : nullptr;
- }
- const org::apache::arrow::flatbuf::DictionaryBatch *header_as_DictionaryBatch() const {
- return header_type() == org::apache::arrow::flatbuf::MessageHeader::DictionaryBatch ? static_cast<const org::apache::arrow::flatbuf::DictionaryBatch *>(header()) : nullptr;
- }
- const org::apache::arrow::flatbuf::RecordBatch *header_as_RecordBatch() const {
- return header_type() == org::apache::arrow::flatbuf::MessageHeader::RecordBatch ? static_cast<const org::apache::arrow::flatbuf::RecordBatch *>(header()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Tensor *header_as_Tensor() const {
- return header_type() == org::apache::arrow::flatbuf::MessageHeader::Tensor ? static_cast<const org::apache::arrow::flatbuf::Tensor *>(header()) : nullptr;
- }
- const org::apache::arrow::flatbuf::SparseTensor *header_as_SparseTensor() const {
- return header_type() == org::apache::arrow::flatbuf::MessageHeader::SparseTensor ? static_cast<const org::apache::arrow::flatbuf::SparseTensor *>(header()) : nullptr;
- }
- int64_t bodyLength() const {
- return GetField<int64_t>(VT_BODYLENGTH, 0);
- }
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_VERSION) &&
- VerifyField<uint8_t>(verifier, VT_HEADER_TYPE) &&
- VerifyOffset(verifier, VT_HEADER) &&
- VerifyMessageHeader(verifier, header(), header_type()) &&
- VerifyField<int64_t>(verifier, VT_BODYLENGTH) &&
- VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
- verifier.VerifyVector(custom_metadata()) &&
- verifier.VerifyVectorOfTables(custom_metadata()) &&
- verifier.EndTable();
- }
-};
-
-template<> inline const org::apache::arrow::flatbuf::Schema *Message::header_as<org::apache::arrow::flatbuf::Schema>() const {
- return header_as_Schema();
-}
-
-template<> inline const org::apache::arrow::flatbuf::DictionaryBatch *Message::header_as<org::apache::arrow::flatbuf::DictionaryBatch>() const {
- return header_as_DictionaryBatch();
-}
-
-template<> inline const org::apache::arrow::flatbuf::RecordBatch *Message::header_as<org::apache::arrow::flatbuf::RecordBatch>() const {
- return header_as_RecordBatch();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Tensor *Message::header_as<org::apache::arrow::flatbuf::Tensor>() const {
- return header_as_Tensor();
-}
-
-template<> inline const org::apache::arrow::flatbuf::SparseTensor *Message::header_as<org::apache::arrow::flatbuf::SparseTensor>() const {
- return header_as_SparseTensor();
-}
-
-struct MessageBuilder {
- typedef Message Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_version(org::apache::arrow::flatbuf::MetadataVersion version) {
- fbb_.AddElement<int16_t>(Message::VT_VERSION, static_cast<int16_t>(version), 0);
- }
- void add_header_type(org::apache::arrow::flatbuf::MessageHeader header_type) {
- fbb_.AddElement<uint8_t>(Message::VT_HEADER_TYPE, static_cast<uint8_t>(header_type), 0);
- }
- void add_header(flatbuffers::Offset<void> header) {
- fbb_.AddOffset(Message::VT_HEADER, header);
- }
- void add_bodyLength(int64_t bodyLength) {
- fbb_.AddElement<int64_t>(Message::VT_BODYLENGTH, bodyLength, 0);
- }
- void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
- fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata);
- }
- explicit MessageBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- MessageBuilder &operator=(const MessageBuilder &);
- flatbuffers::Offset<Message> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Message>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Message> CreateMessage(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
- org::apache::arrow::flatbuf::MessageHeader header_type = org::apache::arrow::flatbuf::MessageHeader::NONE,
- flatbuffers::Offset<void> header = 0,
- int64_t bodyLength = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
- MessageBuilder builder_(_fbb);
- builder_.add_bodyLength(bodyLength);
- builder_.add_custom_metadata(custom_metadata);
- builder_.add_header(header);
- builder_.add_version(version);
- builder_.add_header_type(header_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Message> CreateMessageDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
- org::apache::arrow::flatbuf::MessageHeader header_type = org::apache::arrow::flatbuf::MessageHeader::NONE,
- flatbuffers::Offset<void> header = 0,
- int64_t bodyLength = 0,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
- auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
- return org::apache::arrow::flatbuf::CreateMessage(
- _fbb,
- version,
- header_type,
- header,
- bodyLength,
- custom_metadata__);
-}
-
-inline bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type) {
- switch (type) {
- case MessageHeader::NONE: {
- return true;
- }
- case MessageHeader::Schema: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Schema *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case MessageHeader::DictionaryBatch: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::DictionaryBatch *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case MessageHeader::RecordBatch: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::RecordBatch *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case MessageHeader::Tensor: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Tensor *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case MessageHeader::SparseTensor: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensor *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default: return true;
- }
-}
-
-inline bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
- if (!values || !types) return !values && !types;
- if (values->size() != types->size()) return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
- if (!VerifyMessageHeader(
- verifier, values->Get(i), types->GetEnum<MessageHeader>(i))) {
- return false;
- }
- }
- return true;
-}
-
-inline const org::apache::arrow::flatbuf::Message *GetMessage(const void *buf) {
- return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Message>(buf);
-}
-
-inline const org::apache::arrow::flatbuf::Message *GetSizePrefixedMessage(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Message>(buf);
-}
-
-inline bool VerifyMessageBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
-}
-
-inline bool VerifySizePrefixedMessageBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
-}
-
-inline void FinishMessageBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedMessageBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace flatbuf
-} // namespace arrow
-} // namespace apache
-} // namespace org
-
-#endif // FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
+#define FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+#include "Schema_generated.h"
+#include "SparseTensor_generated.h"
+#include "Tensor_generated.h"
+
+namespace org {
+namespace apache {
+namespace arrow {
+namespace flatbuf {
+
+struct FieldNode;
+
+struct BodyCompression;
+struct BodyCompressionBuilder;
+
+struct RecordBatch;
+struct RecordBatchBuilder;
+
+struct DictionaryBatch;
+struct DictionaryBatchBuilder;
+
+struct Message;
+struct MessageBuilder;
+
+enum class CompressionType : int8_t {
+ LZ4_FRAME = 0,
+ ZSTD = 1,
+ MIN = LZ4_FRAME,
+ MAX = ZSTD
+};
+
+inline const CompressionType (&EnumValuesCompressionType())[2] {
+ static const CompressionType values[] = {
+ CompressionType::LZ4_FRAME,
+ CompressionType::ZSTD
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesCompressionType() {
+ static const char * const names[3] = {
+ "LZ4_FRAME",
+ "ZSTD",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameCompressionType(CompressionType e) {
+ if (flatbuffers::IsOutRange(e, CompressionType::LZ4_FRAME, CompressionType::ZSTD)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesCompressionType()[index];
+}
+
+/// Provided for forward compatibility in case we need to support different
+/// strategies for compressing the IPC message body (like whole-body
+/// compression rather than buffer-level) in the future
+enum class BodyCompressionMethod : int8_t {
+ /// Each constituent buffer is first compressed with the indicated
+ /// compressor, and then written with the uncompressed length in the first 8
+ /// bytes as a 64-bit little-endian signed integer followed by the compressed
+ /// buffer bytes (and then padding as required by the protocol). The
+ /// uncompressed length may be set to -1 to indicate that the data that
+ /// follows is not compressed, which can be useful for cases where
+ /// compression does not yield appreciable savings.
+ BUFFER = 0,
+ MIN = BUFFER,
+ MAX = BUFFER
+};
+
+inline const BodyCompressionMethod (&EnumValuesBodyCompressionMethod())[1] {
+ static const BodyCompressionMethod values[] = {
+ BodyCompressionMethod::BUFFER
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesBodyCompressionMethod() {
+ static const char * const names[2] = {
+ "BUFFER",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameBodyCompressionMethod(BodyCompressionMethod e) {
+ if (flatbuffers::IsOutRange(e, BodyCompressionMethod::BUFFER, BodyCompressionMethod::BUFFER)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesBodyCompressionMethod()[index];
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+///
+/// Arrow implementations do not need to implement all of the message types,
+/// which may include experimental metadata types. For maximum compatibility,
+/// it is best to send data using RecordBatch
+enum class MessageHeader : uint8_t {
+ NONE = 0,
+ Schema = 1,
+ DictionaryBatch = 2,
+ RecordBatch = 3,
+ Tensor = 4,
+ SparseTensor = 5,
+ MIN = NONE,
+ MAX = SparseTensor
+};
+
+inline const MessageHeader (&EnumValuesMessageHeader())[6] {
+ static const MessageHeader values[] = {
+ MessageHeader::NONE,
+ MessageHeader::Schema,
+ MessageHeader::DictionaryBatch,
+ MessageHeader::RecordBatch,
+ MessageHeader::Tensor,
+ MessageHeader::SparseTensor
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesMessageHeader() {
+ static const char * const names[7] = {
+ "NONE",
+ "Schema",
+ "DictionaryBatch",
+ "RecordBatch",
+ "Tensor",
+ "SparseTensor",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameMessageHeader(MessageHeader e) {
+ if (flatbuffers::IsOutRange(e, MessageHeader::NONE, MessageHeader::SparseTensor)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesMessageHeader()[index];
+}
+
+template<typename T> struct MessageHeaderTraits {
+ static const MessageHeader enum_value = MessageHeader::NONE;
+};
+
+template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::Schema> {
+ static const MessageHeader enum_value = MessageHeader::Schema;
+};
+
+template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::DictionaryBatch> {
+ static const MessageHeader enum_value = MessageHeader::DictionaryBatch;
+};
+
+template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::RecordBatch> {
+ static const MessageHeader enum_value = MessageHeader::RecordBatch;
+};
+
+template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::Tensor> {
+ static const MessageHeader enum_value = MessageHeader::Tensor;
+};
+
+template<> struct MessageHeaderTraits<org::apache::arrow::flatbuf::SparseTensor> {
+ static const MessageHeader enum_value = MessageHeader::SparseTensor;
+};
+
+bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type);
+bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS {
+ private:
+ int64_t length_;
+ int64_t null_count_;
+
+ public:
+ FieldNode() {
+ memset(static_cast<void *>(this), 0, sizeof(FieldNode));
+ }
+ FieldNode(int64_t _length, int64_t _null_count)
+ : length_(flatbuffers::EndianScalar(_length)),
+ null_count_(flatbuffers::EndianScalar(_null_count)) {
+ }
+ /// The number of value slots in the Arrow array at this level of a nested
+ /// tree
+ int64_t length() const {
+ return flatbuffers::EndianScalar(length_);
+ }
+ /// The number of observed nulls. Fields with null_count == 0 may choose not
+ /// to write their physical validity bitmap out as a materialized buffer,
+ /// instead setting the length of the bitmap buffer to 0.
+ int64_t null_count() const {
+ return flatbuffers::EndianScalar(null_count_);
+ }
+};
+FLATBUFFERS_STRUCT_END(FieldNode, 16);
+
+/// Optional compression for the memory buffers constituting IPC message
+/// bodies. Intended for use with RecordBatch but could be used for other
+/// message types
+struct BodyCompression FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef BodyCompressionBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_CODEC = 4,
+ VT_METHOD = 6
+ };
+ /// Compressor library
+ org::apache::arrow::flatbuf::CompressionType codec() const {
+ return static_cast<org::apache::arrow::flatbuf::CompressionType>(GetField<int8_t>(VT_CODEC, 0));
+ }
+ /// Indicates the way the record batch body was compressed
+ org::apache::arrow::flatbuf::BodyCompressionMethod method() const {
+ return static_cast<org::apache::arrow::flatbuf::BodyCompressionMethod>(GetField<int8_t>(VT_METHOD, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_CODEC) &&
+ VerifyField<int8_t>(verifier, VT_METHOD) &&
+ verifier.EndTable();
+ }
+};
+
+struct BodyCompressionBuilder {
+ typedef BodyCompression Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_codec(org::apache::arrow::flatbuf::CompressionType codec) {
+ fbb_.AddElement<int8_t>(BodyCompression::VT_CODEC, static_cast<int8_t>(codec), 0);
+ }
+ void add_method(org::apache::arrow::flatbuf::BodyCompressionMethod method) {
+ fbb_.AddElement<int8_t>(BodyCompression::VT_METHOD, static_cast<int8_t>(method), 0);
+ }
+ explicit BodyCompressionBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ BodyCompressionBuilder &operator=(const BodyCompressionBuilder &);
+ flatbuffers::Offset<BodyCompression> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BodyCompression>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BodyCompression> CreateBodyCompression(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::CompressionType codec = org::apache::arrow::flatbuf::CompressionType::LZ4_FRAME,
+ org::apache::arrow::flatbuf::BodyCompressionMethod method = org::apache::arrow::flatbuf::BodyCompressionMethod::BUFFER) {
+ BodyCompressionBuilder builder_(_fbb);
+ builder_.add_method(method);
+ builder_.add_codec(codec);
+ return builder_.Finish();
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef RecordBatchBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_LENGTH = 4,
+ VT_NODES = 6,
+ VT_BUFFERS = 8,
+ VT_COMPRESSION = 10
+ };
+ /// number of records / rows. The arrays in the batch should all have this
+ /// length
+ int64_t length() const {
+ return GetField<int64_t>(VT_LENGTH, 0);
+ }
+ /// Nodes correspond to the pre-ordered flattened logical schema
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *> *nodes() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *> *>(VT_NODES);
+ }
+ /// Buffers correspond to the pre-ordered flattened buffer tree
+ ///
+ /// The number of buffers appended to this list depends on the schema. For
+ /// example, most primitive arrays will have 2 buffers, 1 for the validity
+ /// bitmap and 1 for the values. For struct arrays, there will only be a
+ /// single buffer for the validity (nulls) bitmap
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *buffers() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_BUFFERS);
+ }
+ /// Optional compression of the message body
+ const org::apache::arrow::flatbuf::BodyCompression *compression() const {
+ return GetPointer<const org::apache::arrow::flatbuf::BodyCompression *>(VT_COMPRESSION);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int64_t>(verifier, VT_LENGTH) &&
+ VerifyOffset(verifier, VT_NODES) &&
+ verifier.VerifyVector(nodes()) &&
+ VerifyOffset(verifier, VT_BUFFERS) &&
+ verifier.VerifyVector(buffers()) &&
+ VerifyOffset(verifier, VT_COMPRESSION) &&
+ verifier.VerifyTable(compression()) &&
+ verifier.EndTable();
+ }
+};
+
+struct RecordBatchBuilder {
+ typedef RecordBatch Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_length(int64_t length) {
+ fbb_.AddElement<int64_t>(RecordBatch::VT_LENGTH, length, 0);
+ }
+ void add_nodes(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *>> nodes) {
+ fbb_.AddOffset(RecordBatch::VT_NODES, nodes);
+ }
+ void add_buffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> buffers) {
+ fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers);
+ }
+ void add_compression(flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression) {
+ fbb_.AddOffset(RecordBatch::VT_COMPRESSION, compression);
+ }
+ explicit RecordBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ RecordBatchBuilder &operator=(const RecordBatchBuilder &);
+ flatbuffers::Offset<RecordBatch> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RecordBatch>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RecordBatch> CreateRecordBatch(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t length = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::FieldNode *>> nodes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> buffers = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression = 0) {
+ RecordBatchBuilder builder_(_fbb);
+ builder_.add_length(length);
+ builder_.add_compression(compression);
+ builder_.add_buffers(buffers);
+ builder_.add_nodes(nodes);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<RecordBatch> CreateRecordBatchDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t length = 0,
+ const std::vector<org::apache::arrow::flatbuf::FieldNode> *nodes = nullptr,
+ const std::vector<org::apache::arrow::flatbuf::Buffer> *buffers = nullptr,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::BodyCompression> compression = 0) {
+ auto nodes__ = nodes ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::FieldNode>(*nodes) : 0;
+ auto buffers__ = buffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*buffers) : 0;
+ return org::apache::arrow::flatbuf::CreateRecordBatch(
+ _fbb,
+ length,
+ nodes__,
+ buffers__,
+ compression);
+}
+
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+/// There is one vector / column per dictionary, but that vector / column
+/// may be spread across multiple dictionary batches by using the isDelta
+/// flag
+struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DictionaryBatchBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_ID = 4,
+ VT_DATA = 6,
+ VT_ISDELTA = 8
+ };
+ int64_t id() const {
+ return GetField<int64_t>(VT_ID, 0);
+ }
+ const org::apache::arrow::flatbuf::RecordBatch *data() const {
+ return GetPointer<const org::apache::arrow::flatbuf::RecordBatch *>(VT_DATA);
+ }
+ /// If isDelta is true the values in the dictionary are to be appended to a
+ /// dictionary with the indicated id. If isDelta is false this dictionary
+ /// should replace the existing dictionary.
+ bool isDelta() const {
+ return GetField<uint8_t>(VT_ISDELTA, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int64_t>(verifier, VT_ID) &&
+ VerifyOffset(verifier, VT_DATA) &&
+ verifier.VerifyTable(data()) &&
+ VerifyField<uint8_t>(verifier, VT_ISDELTA) &&
+ verifier.EndTable();
+ }
+};
+
+struct DictionaryBatchBuilder {
+ typedef DictionaryBatch Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_id(int64_t id) {
+ fbb_.AddElement<int64_t>(DictionaryBatch::VT_ID, id, 0);
+ }
+ void add_data(flatbuffers::Offset<org::apache::arrow::flatbuf::RecordBatch> data) {
+ fbb_.AddOffset(DictionaryBatch::VT_DATA, data);
+ }
+ void add_isDelta(bool isDelta) {
+ fbb_.AddElement<uint8_t>(DictionaryBatch::VT_ISDELTA, static_cast<uint8_t>(isDelta), 0);
+ }
+ explicit DictionaryBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DictionaryBatchBuilder &operator=(const DictionaryBatchBuilder &);
+ flatbuffers::Offset<DictionaryBatch> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DictionaryBatch>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DictionaryBatch> CreateDictionaryBatch(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t id = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::RecordBatch> data = 0,
+ bool isDelta = false) {
+ DictionaryBatchBuilder builder_(_fbb);
+ builder_.add_id(id);
+ builder_.add_data(data);
+ builder_.add_isDelta(isDelta);
+ return builder_.Finish();
+}
+
+struct Message FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef MessageBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_VERSION = 4,
+ VT_HEADER_TYPE = 6,
+ VT_HEADER = 8,
+ VT_BODYLENGTH = 10,
+ VT_CUSTOM_METADATA = 12
+ };
+ org::apache::arrow::flatbuf::MetadataVersion version() const {
+ return static_cast<org::apache::arrow::flatbuf::MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
+ }
+ org::apache::arrow::flatbuf::MessageHeader header_type() const {
+ return static_cast<org::apache::arrow::flatbuf::MessageHeader>(GetField<uint8_t>(VT_HEADER_TYPE, 0));
+ }
+ const void *header() const {
+ return GetPointer<const void *>(VT_HEADER);
+ }
+ template<typename T> const T *header_as() const;
+ const org::apache::arrow::flatbuf::Schema *header_as_Schema() const {
+ return header_type() == org::apache::arrow::flatbuf::MessageHeader::Schema ? static_cast<const org::apache::arrow::flatbuf::Schema *>(header()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::DictionaryBatch *header_as_DictionaryBatch() const {
+ return header_type() == org::apache::arrow::flatbuf::MessageHeader::DictionaryBatch ? static_cast<const org::apache::arrow::flatbuf::DictionaryBatch *>(header()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::RecordBatch *header_as_RecordBatch() const {
+ return header_type() == org::apache::arrow::flatbuf::MessageHeader::RecordBatch ? static_cast<const org::apache::arrow::flatbuf::RecordBatch *>(header()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Tensor *header_as_Tensor() const {
+ return header_type() == org::apache::arrow::flatbuf::MessageHeader::Tensor ? static_cast<const org::apache::arrow::flatbuf::Tensor *>(header()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::SparseTensor *header_as_SparseTensor() const {
+ return header_type() == org::apache::arrow::flatbuf::MessageHeader::SparseTensor ? static_cast<const org::apache::arrow::flatbuf::SparseTensor *>(header()) : nullptr;
+ }
+ int64_t bodyLength() const {
+ return GetField<int64_t>(VT_BODYLENGTH, 0);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_VERSION) &&
+ VerifyField<uint8_t>(verifier, VT_HEADER_TYPE) &&
+ VerifyOffset(verifier, VT_HEADER) &&
+ VerifyMessageHeader(verifier, header(), header_type()) &&
+ VerifyField<int64_t>(verifier, VT_BODYLENGTH) &&
+ VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
+ verifier.VerifyVector(custom_metadata()) &&
+ verifier.VerifyVectorOfTables(custom_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+template<> inline const org::apache::arrow::flatbuf::Schema *Message::header_as<org::apache::arrow::flatbuf::Schema>() const {
+ return header_as_Schema();
+}
+
+template<> inline const org::apache::arrow::flatbuf::DictionaryBatch *Message::header_as<org::apache::arrow::flatbuf::DictionaryBatch>() const {
+ return header_as_DictionaryBatch();
+}
+
+template<> inline const org::apache::arrow::flatbuf::RecordBatch *Message::header_as<org::apache::arrow::flatbuf::RecordBatch>() const {
+ return header_as_RecordBatch();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Tensor *Message::header_as<org::apache::arrow::flatbuf::Tensor>() const {
+ return header_as_Tensor();
+}
+
+template<> inline const org::apache::arrow::flatbuf::SparseTensor *Message::header_as<org::apache::arrow::flatbuf::SparseTensor>() const {
+ return header_as_SparseTensor();
+}
+
+struct MessageBuilder {
+ typedef Message Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_version(org::apache::arrow::flatbuf::MetadataVersion version) {
+ fbb_.AddElement<int16_t>(Message::VT_VERSION, static_cast<int16_t>(version), 0);
+ }
+ void add_header_type(org::apache::arrow::flatbuf::MessageHeader header_type) {
+ fbb_.AddElement<uint8_t>(Message::VT_HEADER_TYPE, static_cast<uint8_t>(header_type), 0);
+ }
+ void add_header(flatbuffers::Offset<void> header) {
+ fbb_.AddOffset(Message::VT_HEADER, header);
+ }
+ void add_bodyLength(int64_t bodyLength) {
+ fbb_.AddElement<int64_t>(Message::VT_BODYLENGTH, bodyLength, 0);
+ }
+ void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
+ fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata);
+ }
+ explicit MessageBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ MessageBuilder &operator=(const MessageBuilder &);
+ flatbuffers::Offset<Message> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Message>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Message> CreateMessage(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
+ org::apache::arrow::flatbuf::MessageHeader header_type = org::apache::arrow::flatbuf::MessageHeader::NONE,
+ flatbuffers::Offset<void> header = 0,
+ int64_t bodyLength = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
+ MessageBuilder builder_(_fbb);
+ builder_.add_bodyLength(bodyLength);
+ builder_.add_custom_metadata(custom_metadata);
+ builder_.add_header(header);
+ builder_.add_version(version);
+ builder_.add_header_type(header_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Message> CreateMessageDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::MetadataVersion version = org::apache::arrow::flatbuf::MetadataVersion::V1,
+ org::apache::arrow::flatbuf::MessageHeader header_type = org::apache::arrow::flatbuf::MessageHeader::NONE,
+ flatbuffers::Offset<void> header = 0,
+ int64_t bodyLength = 0,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
+ auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
+ return org::apache::arrow::flatbuf::CreateMessage(
+ _fbb,
+ version,
+ header_type,
+ header,
+ bodyLength,
+ custom_metadata__);
+}
+
+inline bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type) {
+ switch (type) {
+ case MessageHeader::NONE: {
+ return true;
+ }
+ case MessageHeader::Schema: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Schema *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case MessageHeader::DictionaryBatch: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::DictionaryBatch *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case MessageHeader::RecordBatch: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::RecordBatch *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case MessageHeader::Tensor: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Tensor *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case MessageHeader::SparseTensor: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensor *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default: return true;
+ }
+}
+
+inline bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+ if (!values || !types) return !values && !types;
+ if (values->size() != types->size()) return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+ if (!VerifyMessageHeader(
+ verifier, values->Get(i), types->GetEnum<MessageHeader>(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const org::apache::arrow::flatbuf::Message *GetMessage(const void *buf) {
+ return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Message>(buf);
+}
+
+inline const org::apache::arrow::flatbuf::Message *GetSizePrefixedMessage(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Message>(buf);
+}
+
+inline bool VerifyMessageBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
+}
+
+inline bool VerifySizePrefixedMessageBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
+}
+
+inline void FinishMessageBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedMessageBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace flatbuf
+} // namespace arrow
+} // namespace apache
+} // namespace org
+
+#endif // FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/Schema_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/Schema_generated.h
index 91e01d33758..4ffb64ecea3 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/Schema_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/Schema_generated.h
@@ -1,2265 +1,2265 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
-#define FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-namespace org {
-namespace apache {
-namespace arrow {
-namespace flatbuf {
-
-struct Null;
-struct NullBuilder;
-
-struct Struct_;
-struct Struct_Builder;
-
-struct List;
-struct ListBuilder;
-
-struct LargeList;
-struct LargeListBuilder;
-
-struct FixedSizeList;
-struct FixedSizeListBuilder;
-
-struct Map;
-struct MapBuilder;
-
-struct Union;
-struct UnionBuilder;
-
-struct Int;
-struct IntBuilder;
-
-struct FloatingPoint;
-struct FloatingPointBuilder;
-
-struct Utf8;
-struct Utf8Builder;
-
-struct Binary;
-struct BinaryBuilder;
-
-struct LargeUtf8;
-struct LargeUtf8Builder;
-
-struct LargeBinary;
-struct LargeBinaryBuilder;
-
-struct FixedSizeBinary;
-struct FixedSizeBinaryBuilder;
-
-struct Bool;
-struct BoolBuilder;
-
-struct Decimal;
-struct DecimalBuilder;
-
-struct Date;
-struct DateBuilder;
-
-struct Time;
-struct TimeBuilder;
-
-struct Timestamp;
-struct TimestampBuilder;
-
-struct Interval;
-struct IntervalBuilder;
-
-struct Duration;
-struct DurationBuilder;
-
-struct KeyValue;
-struct KeyValueBuilder;
-
-struct DictionaryEncoding;
-struct DictionaryEncodingBuilder;
-
-struct Field;
-struct FieldBuilder;
-
-struct Buffer;
-
-struct Schema;
-struct SchemaBuilder;
-
-enum class MetadataVersion : int16_t {
- /// 0.1.0 (October 2016).
- V1 = 0,
- /// 0.2.0 (February 2017). Non-backwards compatible with V1.
- V2 = 1,
- /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
- V3 = 2,
- /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
- V4 = 3,
- /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
- /// metadata and IPC messages). Implementations are recommended to provide a
- /// V4 compatibility mode with V5 format changes disabled.
- ///
- /// Incompatible changes between V4 and V5:
- /// - Union buffer layout has changed. In V5, Unions don't have a validity
- /// bitmap buffer.
- V5 = 4,
- MIN = V1,
- MAX = V5
-};
-
-inline const MetadataVersion (&EnumValuesMetadataVersion())[5] {
- static const MetadataVersion values[] = {
- MetadataVersion::V1,
- MetadataVersion::V2,
- MetadataVersion::V3,
- MetadataVersion::V4,
- MetadataVersion::V5
- };
- return values;
-}
-
-inline const char * const *EnumNamesMetadataVersion() {
- static const char * const names[6] = {
- "V1",
- "V2",
- "V3",
- "V4",
- "V5",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameMetadataVersion(MetadataVersion e) {
- if (flatbuffers::IsOutRange(e, MetadataVersion::V1, MetadataVersion::V5)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesMetadataVersion()[index];
-}
-
-/// Represents Arrow Features that might not have full support
-/// within implementations. This is intended to be used in
-/// two scenarios:
-/// 1. A mechanism for readers of Arrow Streams
-/// and files to understand that the stream or file makes
-/// use of a feature that isn't supported or unknown to
-/// the implementation (and therefore can meet the Arrow
-/// forward compatibility guarantees).
-/// 2. A means of negotiating between a client and server
-/// what features a stream is allowed to use. The enums
-/// values here are intented to represent higher level
-/// features, additional details maybe negotiated
-/// with key-value pairs specific to the protocol.
-///
-/// Enums added to this list should be assigned power-of-two values
-/// to facilitate exchanging and comparing bitmaps for supported
-/// features.
-enum class Feature : int64_t {
- /// Needed to make flatbuffers happy.
- UNUSED = 0,
- /// The stream makes use of multiple full dictionaries with the
- /// same ID and assumes clients implement dictionary replacement
- /// correctly.
- DICTIONARY_REPLACEMENT = 1LL,
- /// The stream makes use of compressed bodies as described
- /// in Message.fbs.
- COMPRESSED_BODY = 2LL,
- MIN = UNUSED,
- MAX = COMPRESSED_BODY
-};
-
-inline const Feature (&EnumValuesFeature())[3] {
- static const Feature values[] = {
- Feature::UNUSED,
- Feature::DICTIONARY_REPLACEMENT,
- Feature::COMPRESSED_BODY
- };
- return values;
-}
-
-inline const char * const *EnumNamesFeature() {
- static const char * const names[4] = {
- "UNUSED",
- "DICTIONARY_REPLACEMENT",
- "COMPRESSED_BODY",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameFeature(Feature e) {
- if (flatbuffers::IsOutRange(e, Feature::UNUSED, Feature::COMPRESSED_BODY)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesFeature()[index];
-}
-
-enum class UnionMode : int16_t {
- Sparse = 0,
- Dense = 1,
- MIN = Sparse,
- MAX = Dense
-};
-
-inline const UnionMode (&EnumValuesUnionMode())[2] {
- static const UnionMode values[] = {
- UnionMode::Sparse,
- UnionMode::Dense
- };
- return values;
-}
-
-inline const char * const *EnumNamesUnionMode() {
- static const char * const names[3] = {
- "Sparse",
- "Dense",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameUnionMode(UnionMode e) {
- if (flatbuffers::IsOutRange(e, UnionMode::Sparse, UnionMode::Dense)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesUnionMode()[index];
-}
-
-enum class Precision : int16_t {
- HALF = 0,
- SINGLE = 1,
- DOUBLE = 2,
- MIN = HALF,
- MAX = DOUBLE
-};
-
-inline const Precision (&EnumValuesPrecision())[3] {
- static const Precision values[] = {
- Precision::HALF,
- Precision::SINGLE,
- Precision::DOUBLE
- };
- return values;
-}
-
-inline const char * const *EnumNamesPrecision() {
- static const char * const names[4] = {
- "HALF",
- "SINGLE",
- "DOUBLE",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNamePrecision(Precision e) {
- if (flatbuffers::IsOutRange(e, Precision::HALF, Precision::DOUBLE)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesPrecision()[index];
-}
-
-enum class DateUnit : int16_t {
- DAY = 0,
- MILLISECOND = 1,
- MIN = DAY,
- MAX = MILLISECOND
-};
-
-inline const DateUnit (&EnumValuesDateUnit())[2] {
- static const DateUnit values[] = {
- DateUnit::DAY,
- DateUnit::MILLISECOND
- };
- return values;
-}
-
-inline const char * const *EnumNamesDateUnit() {
- static const char * const names[3] = {
- "DAY",
- "MILLISECOND",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameDateUnit(DateUnit e) {
- if (flatbuffers::IsOutRange(e, DateUnit::DAY, DateUnit::MILLISECOND)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesDateUnit()[index];
-}
-
-enum class TimeUnit : int16_t {
- SECOND = 0,
- MILLISECOND = 1,
- MICROSECOND = 2,
- NANOSECOND = 3,
- MIN = SECOND,
- MAX = NANOSECOND
-};
-
-inline const TimeUnit (&EnumValuesTimeUnit())[4] {
- static const TimeUnit values[] = {
- TimeUnit::SECOND,
- TimeUnit::MILLISECOND,
- TimeUnit::MICROSECOND,
- TimeUnit::NANOSECOND
- };
- return values;
-}
-
-inline const char * const *EnumNamesTimeUnit() {
- static const char * const names[5] = {
- "SECOND",
- "MILLISECOND",
- "MICROSECOND",
- "NANOSECOND",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameTimeUnit(TimeUnit e) {
- if (flatbuffers::IsOutRange(e, TimeUnit::SECOND, TimeUnit::NANOSECOND)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesTimeUnit()[index];
-}
-
-enum class IntervalUnit : int16_t {
- YEAR_MONTH = 0,
- DAY_TIME = 1,
- MIN = YEAR_MONTH,
- MAX = DAY_TIME
-};
-
-inline const IntervalUnit (&EnumValuesIntervalUnit())[2] {
- static const IntervalUnit values[] = {
- IntervalUnit::YEAR_MONTH,
- IntervalUnit::DAY_TIME
- };
- return values;
-}
-
-inline const char * const *EnumNamesIntervalUnit() {
- static const char * const names[3] = {
- "YEAR_MONTH",
- "DAY_TIME",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameIntervalUnit(IntervalUnit e) {
- if (flatbuffers::IsOutRange(e, IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesIntervalUnit()[index];
-}
-
-/// ----------------------------------------------------------------------
-/// Top-level Type value, enabling extensible type-specific metadata. We can
-/// add new logical types to Type without breaking backwards compatibility
-enum class Type : uint8_t {
- NONE = 0,
- Null = 1,
- Int = 2,
- FloatingPoint = 3,
- Binary = 4,
- Utf8 = 5,
- Bool = 6,
- Decimal = 7,
- Date = 8,
- Time = 9,
- Timestamp = 10,
- Interval = 11,
- List = 12,
- Struct_ = 13,
- Union = 14,
- FixedSizeBinary = 15,
- FixedSizeList = 16,
- Map = 17,
- Duration = 18,
- LargeBinary = 19,
- LargeUtf8 = 20,
- LargeList = 21,
- MIN = NONE,
- MAX = LargeList
-};
-
-inline const Type (&EnumValuesType())[22] {
- static const Type values[] = {
- Type::NONE,
- Type::Null,
- Type::Int,
- Type::FloatingPoint,
- Type::Binary,
- Type::Utf8,
- Type::Bool,
- Type::Decimal,
- Type::Date,
- Type::Time,
- Type::Timestamp,
- Type::Interval,
- Type::List,
- Type::Struct_,
- Type::Union,
- Type::FixedSizeBinary,
- Type::FixedSizeList,
- Type::Map,
- Type::Duration,
- Type::LargeBinary,
- Type::LargeUtf8,
- Type::LargeList
- };
- return values;
-}
-
-inline const char * const *EnumNamesType() {
- static const char * const names[23] = {
- "NONE",
- "Null",
- "Int",
- "FloatingPoint",
- "Binary",
- "Utf8",
- "Bool",
- "Decimal",
- "Date",
- "Time",
- "Timestamp",
- "Interval",
- "List",
- "Struct_",
- "Union",
- "FixedSizeBinary",
- "FixedSizeList",
- "Map",
- "Duration",
- "LargeBinary",
- "LargeUtf8",
- "LargeList",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameType(Type e) {
- if (flatbuffers::IsOutRange(e, Type::NONE, Type::LargeList)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesType()[index];
-}
-
-template<typename T> struct TypeTraits {
- static const Type enum_value = Type::NONE;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Null> {
- static const Type enum_value = Type::Null;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Int> {
- static const Type enum_value = Type::Int;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::FloatingPoint> {
- static const Type enum_value = Type::FloatingPoint;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Binary> {
- static const Type enum_value = Type::Binary;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Utf8> {
- static const Type enum_value = Type::Utf8;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Bool> {
- static const Type enum_value = Type::Bool;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Decimal> {
- static const Type enum_value = Type::Decimal;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Date> {
- static const Type enum_value = Type::Date;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Time> {
- static const Type enum_value = Type::Time;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Timestamp> {
- static const Type enum_value = Type::Timestamp;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Interval> {
- static const Type enum_value = Type::Interval;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::List> {
- static const Type enum_value = Type::List;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Struct_> {
- static const Type enum_value = Type::Struct_;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Union> {
- static const Type enum_value = Type::Union;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::FixedSizeBinary> {
- static const Type enum_value = Type::FixedSizeBinary;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::FixedSizeList> {
- static const Type enum_value = Type::FixedSizeList;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Map> {
- static const Type enum_value = Type::Map;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::Duration> {
- static const Type enum_value = Type::Duration;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeBinary> {
- static const Type enum_value = Type::LargeBinary;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeUtf8> {
- static const Type enum_value = Type::LargeUtf8;
-};
-
-template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeList> {
- static const Type enum_value = Type::LargeList;
-};
-
-bool VerifyType(flatbuffers::Verifier &verifier, const void *obj, Type type);
-bool VerifyTypeVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-/// ----------------------------------------------------------------------
-/// Dictionary encoding metadata
-/// Maintained for forwards compatibility, in the future
-/// Dictionaries might be explicit maps between integers and values
-/// allowing for non-contiguous index values
-enum class DictionaryKind : int16_t {
- DenseArray = 0,
- MIN = DenseArray,
- MAX = DenseArray
-};
-
-inline const DictionaryKind (&EnumValuesDictionaryKind())[1] {
- static const DictionaryKind values[] = {
- DictionaryKind::DenseArray
- };
- return values;
-}
-
-inline const char * const *EnumNamesDictionaryKind() {
- static const char * const names[2] = {
- "DenseArray",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameDictionaryKind(DictionaryKind e) {
- if (flatbuffers::IsOutRange(e, DictionaryKind::DenseArray, DictionaryKind::DenseArray)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesDictionaryKind()[index];
-}
-
-/// ----------------------------------------------------------------------
-/// Endianness of the platform producing the data
-enum class Endianness : int16_t {
- Little = 0,
- Big = 1,
- MIN = Little,
- MAX = Big
-};
-
-inline const Endianness (&EnumValuesEndianness())[2] {
- static const Endianness values[] = {
- Endianness::Little,
- Endianness::Big
- };
- return values;
-}
-
-inline const char * const *EnumNamesEndianness() {
- static const char * const names[3] = {
- "Little",
- "Big",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameEndianness(Endianness e) {
- if (flatbuffers::IsOutRange(e, Endianness::Little, Endianness::Big)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesEndianness()[index];
-}
-
-/// ----------------------------------------------------------------------
-/// A Buffer represents a single contiguous memory segment
-FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Buffer FLATBUFFERS_FINAL_CLASS {
- private:
- int64_t offset_;
- int64_t length_;
-
- public:
- Buffer() {
- memset(static_cast<void *>(this), 0, sizeof(Buffer));
- }
- Buffer(int64_t _offset, int64_t _length)
- : offset_(flatbuffers::EndianScalar(_offset)),
- length_(flatbuffers::EndianScalar(_length)) {
- }
- /// The relative offset into the shared memory page where the bytes for this
- /// buffer starts
- int64_t offset() const {
- return flatbuffers::EndianScalar(offset_);
- }
- /// The absolute length (in bytes) of the memory buffer. The memory is found
- /// from offset (inclusive) to offset + length (non-inclusive). When building
- /// messages using the encapsulated IPC message, padding bytes may be written
- /// after a buffer, but such padding bytes do not need to be accounted for in
- /// the size here.
- int64_t length() const {
- return flatbuffers::EndianScalar(length_);
- }
-};
-FLATBUFFERS_STRUCT_END(Buffer, 16);
-
-/// These are stored in the flatbuffer in the Type union below
-struct Null FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef NullBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct NullBuilder {
- typedef Null Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit NullBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- NullBuilder &operator=(const NullBuilder &);
- flatbuffers::Offset<Null> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Null>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Null> CreateNull(
- flatbuffers::FlatBufferBuilder &_fbb) {
- NullBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
-/// (according to the physical memory layout). We used Struct_ here as
-/// Struct is a reserved word in Flatbuffers
-struct Struct_ FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef Struct_Builder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct Struct_Builder {
- typedef Struct_ Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit Struct_Builder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- Struct_Builder &operator=(const Struct_Builder &);
- flatbuffers::Offset<Struct_> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Struct_>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Struct_> CreateStruct_(
- flatbuffers::FlatBufferBuilder &_fbb) {
- Struct_Builder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct List FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef ListBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct ListBuilder {
- typedef List Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- ListBuilder &operator=(const ListBuilder &);
- flatbuffers::Offset<List> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<List>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<List> CreateList(
- flatbuffers::FlatBufferBuilder &_fbb) {
- ListBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// Same as List, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-struct LargeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef LargeListBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct LargeListBuilder {
- typedef LargeList Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LargeListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- LargeListBuilder &operator=(const LargeListBuilder &);
- flatbuffers::Offset<LargeList> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LargeList>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LargeList> CreateLargeList(
- flatbuffers::FlatBufferBuilder &_fbb) {
- LargeListBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef FixedSizeListBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_LISTSIZE = 4
- };
- /// Number of list items per value
- int32_t listSize() const {
- return GetField<int32_t>(VT_LISTSIZE, 0);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int32_t>(verifier, VT_LISTSIZE) &&
- verifier.EndTable();
- }
-};
-
-struct FixedSizeListBuilder {
- typedef FixedSizeList Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_listSize(int32_t listSize) {
- fbb_.AddElement<int32_t>(FixedSizeList::VT_LISTSIZE, listSize, 0);
- }
- explicit FixedSizeListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- FixedSizeListBuilder &operator=(const FixedSizeListBuilder &);
- flatbuffers::Offset<FixedSizeList> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FixedSizeList>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FixedSizeList> CreateFixedSizeList(
- flatbuffers::FlatBufferBuilder &_fbb,
- int32_t listSize = 0) {
- FixedSizeListBuilder builder_(_fbb);
- builder_.add_listSize(listSize);
- return builder_.Finish();
-}
-
-/// A Map is a logical nested type that is represented as
-///
-/// List<entries: Struct<key: K, value: V>>
-///
-/// In this layout, the keys and values are each respectively contiguous. We do
-/// not constrain the key and value types, so the application is responsible
-/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
-/// may be set in the metadata for this field.
-///
-/// In a field with Map type, the field has a child Struct field, which then
-/// has two children: key type and the second the value type. The names of the
-/// child fields may be respectively "entries", "key", and "value", but this is
-/// not enforced.
-///
-/// Map
-/// - child[0] entries: Struct
-/// - child[0] key: K
-/// - child[1] value: V
-///
-/// Neither the "entries" field nor the "key" field may be nullable.
-///
-/// The metadata is structured so that Arrow systems without special handling
-/// for Map can make Map an alias for List. The "layout" attribute for the Map
-/// field must have the same contents as a List.
-struct Map FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef MapBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_KEYSSORTED = 4
- };
- /// Set to true if the keys within each value are sorted
- bool keysSorted() const {
- return GetField<uint8_t>(VT_KEYSSORTED, 0) != 0;
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<uint8_t>(verifier, VT_KEYSSORTED) &&
- verifier.EndTable();
- }
-};
-
-struct MapBuilder {
- typedef Map Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_keysSorted(bool keysSorted) {
- fbb_.AddElement<uint8_t>(Map::VT_KEYSSORTED, static_cast<uint8_t>(keysSorted), 0);
- }
- explicit MapBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- MapBuilder &operator=(const MapBuilder &);
- flatbuffers::Offset<Map> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Map>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Map> CreateMap(
- flatbuffers::FlatBufferBuilder &_fbb,
- bool keysSorted = false) {
- MapBuilder builder_(_fbb);
- builder_.add_keysSorted(keysSorted);
- return builder_.Finish();
-}
-
-/// A union is a complex type with children in Field
-/// By default ids in the type vector refer to the offsets in the children
-/// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child typeIds[offset] is the id used in the type vector
-struct Union FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef UnionBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_MODE = 4,
- VT_TYPEIDS = 6
- };
- org::apache::arrow::flatbuf::UnionMode mode() const {
- return static_cast<org::apache::arrow::flatbuf::UnionMode>(GetField<int16_t>(VT_MODE, 0));
- }
- const flatbuffers::Vector<int32_t> *typeIds() const {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TYPEIDS);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_MODE) &&
- VerifyOffset(verifier, VT_TYPEIDS) &&
- verifier.VerifyVector(typeIds()) &&
- verifier.EndTable();
- }
-};
-
-struct UnionBuilder {
- typedef Union Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_mode(org::apache::arrow::flatbuf::UnionMode mode) {
- fbb_.AddElement<int16_t>(Union::VT_MODE, static_cast<int16_t>(mode), 0);
- }
- void add_typeIds(flatbuffers::Offset<flatbuffers::Vector<int32_t>> typeIds) {
- fbb_.AddOffset(Union::VT_TYPEIDS, typeIds);
- }
- explicit UnionBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- UnionBuilder &operator=(const UnionBuilder &);
- flatbuffers::Offset<Union> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Union>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Union> CreateUnion(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::UnionMode mode = org::apache::arrow::flatbuf::UnionMode::Sparse,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> typeIds = 0) {
- UnionBuilder builder_(_fbb);
- builder_.add_typeIds(typeIds);
- builder_.add_mode(mode);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Union> CreateUnionDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::UnionMode mode = org::apache::arrow::flatbuf::UnionMode::Sparse,
- const std::vector<int32_t> *typeIds = nullptr) {
- auto typeIds__ = typeIds ? _fbb.CreateVector<int32_t>(*typeIds) : 0;
- return org::apache::arrow::flatbuf::CreateUnion(
- _fbb,
- mode,
- typeIds__);
-}
-
-struct Int FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef IntBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_BITWIDTH = 4,
- VT_IS_SIGNED = 6
- };
- int32_t bitWidth() const {
- return GetField<int32_t>(VT_BITWIDTH, 0);
- }
- bool is_signed() const {
- return GetField<uint8_t>(VT_IS_SIGNED, 0) != 0;
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
- VerifyField<uint8_t>(verifier, VT_IS_SIGNED) &&
- verifier.EndTable();
- }
-};
-
-struct IntBuilder {
- typedef Int Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_bitWidth(int32_t bitWidth) {
- fbb_.AddElement<int32_t>(Int::VT_BITWIDTH, bitWidth, 0);
- }
- void add_is_signed(bool is_signed) {
- fbb_.AddElement<uint8_t>(Int::VT_IS_SIGNED, static_cast<uint8_t>(is_signed), 0);
- }
- explicit IntBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- IntBuilder &operator=(const IntBuilder &);
- flatbuffers::Offset<Int> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Int>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Int> CreateInt(
- flatbuffers::FlatBufferBuilder &_fbb,
- int32_t bitWidth = 0,
- bool is_signed = false) {
- IntBuilder builder_(_fbb);
- builder_.add_bitWidth(bitWidth);
- builder_.add_is_signed(is_signed);
- return builder_.Finish();
-}
-
-struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef FloatingPointBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_PRECISION = 4
- };
- org::apache::arrow::flatbuf::Precision precision() const {
- return static_cast<org::apache::arrow::flatbuf::Precision>(GetField<int16_t>(VT_PRECISION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_PRECISION) &&
- verifier.EndTable();
- }
-};
-
-struct FloatingPointBuilder {
- typedef FloatingPoint Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_precision(org::apache::arrow::flatbuf::Precision precision) {
- fbb_.AddElement<int16_t>(FloatingPoint::VT_PRECISION, static_cast<int16_t>(precision), 0);
- }
- explicit FloatingPointBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- FloatingPointBuilder &operator=(const FloatingPointBuilder &);
- flatbuffers::Offset<FloatingPoint> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FloatingPoint>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FloatingPoint> CreateFloatingPoint(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Precision precision = org::apache::arrow::flatbuf::Precision::HALF) {
- FloatingPointBuilder builder_(_fbb);
- builder_.add_precision(precision);
- return builder_.Finish();
-}
-
-/// Unicode with UTF-8 encoding
-struct Utf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef Utf8Builder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct Utf8Builder {
- typedef Utf8 Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit Utf8Builder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- Utf8Builder &operator=(const Utf8Builder &);
- flatbuffers::Offset<Utf8> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Utf8>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Utf8> CreateUtf8(
- flatbuffers::FlatBufferBuilder &_fbb) {
- Utf8Builder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// Opaque binary data
-struct Binary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef BinaryBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct BinaryBuilder {
- typedef Binary Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit BinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- BinaryBuilder &operator=(const BinaryBuilder &);
- flatbuffers::Offset<Binary> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Binary>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Binary> CreateBinary(
- flatbuffers::FlatBufferBuilder &_fbb) {
- BinaryBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// Same as Utf8, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef LargeUtf8Builder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct LargeUtf8Builder {
- typedef LargeUtf8 Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LargeUtf8Builder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- LargeUtf8Builder &operator=(const LargeUtf8Builder &);
- flatbuffers::Offset<LargeUtf8> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LargeUtf8>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LargeUtf8> CreateLargeUtf8(
- flatbuffers::FlatBufferBuilder &_fbb) {
- LargeUtf8Builder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// Same as Binary, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-struct LargeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef LargeBinaryBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct LargeBinaryBuilder {
- typedef LargeBinary Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LargeBinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- LargeBinaryBuilder &operator=(const LargeBinaryBuilder &);
- flatbuffers::Offset<LargeBinary> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LargeBinary>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LargeBinary> CreateLargeBinary(
- flatbuffers::FlatBufferBuilder &_fbb) {
- LargeBinaryBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef FixedSizeBinaryBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_BYTEWIDTH = 4
- };
- /// Number of bytes per value
- int32_t byteWidth() const {
- return GetField<int32_t>(VT_BYTEWIDTH, 0);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int32_t>(verifier, VT_BYTEWIDTH) &&
- verifier.EndTable();
- }
-};
-
-struct FixedSizeBinaryBuilder {
- typedef FixedSizeBinary Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_byteWidth(int32_t byteWidth) {
- fbb_.AddElement<int32_t>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
- }
- explicit FixedSizeBinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- FixedSizeBinaryBuilder &operator=(const FixedSizeBinaryBuilder &);
- flatbuffers::Offset<FixedSizeBinary> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FixedSizeBinary>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FixedSizeBinary> CreateFixedSizeBinary(
- flatbuffers::FlatBufferBuilder &_fbb,
- int32_t byteWidth = 0) {
- FixedSizeBinaryBuilder builder_(_fbb);
- builder_.add_byteWidth(byteWidth);
- return builder_.Finish();
-}
-
-struct Bool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef BoolBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct BoolBuilder {
- typedef Bool Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit BoolBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- BoolBuilder &operator=(const BoolBuilder &);
- flatbuffers::Offset<Bool> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Bool>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Bool> CreateBool(
- flatbuffers::FlatBufferBuilder &_fbb) {
- BoolBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-/// Exact decimal value represented as an integer value in two's
-/// complement. Currently only 128-bit (16-byte) integers are used but this may
-/// be expanded in the future. The representation uses the endianness indicated
-/// in the Schema.
-struct Decimal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DecimalBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_PRECISION = 4,
- VT_SCALE = 6,
- VT_BITWIDTH = 8
- };
- /// Total number of decimal digits
- int32_t precision() const {
- return GetField<int32_t>(VT_PRECISION, 0);
- }
- /// Number of digits after the decimal point "."
- int32_t scale() const {
- return GetField<int32_t>(VT_SCALE, 0);
- }
- /// Number of bits per value. The only accepted width right now is 128 but
- /// this field exists for forward compatibility so that other bit widths may
- /// be supported in future format versions. We use bitWidth for consistency
- /// with Int::bitWidth.
- int32_t bitWidth() const {
- return GetField<int32_t>(VT_BITWIDTH, 128);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int32_t>(verifier, VT_PRECISION) &&
- VerifyField<int32_t>(verifier, VT_SCALE) &&
- VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
- verifier.EndTable();
- }
-};
-
-struct DecimalBuilder {
- typedef Decimal Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_precision(int32_t precision) {
- fbb_.AddElement<int32_t>(Decimal::VT_PRECISION, precision, 0);
- }
- void add_scale(int32_t scale) {
- fbb_.AddElement<int32_t>(Decimal::VT_SCALE, scale, 0);
- }
- void add_bitWidth(int32_t bitWidth) {
- fbb_.AddElement<int32_t>(Decimal::VT_BITWIDTH, bitWidth, 128);
- }
- explicit DecimalBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DecimalBuilder &operator=(const DecimalBuilder &);
- flatbuffers::Offset<Decimal> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Decimal>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Decimal> CreateDecimal(
- flatbuffers::FlatBufferBuilder &_fbb,
- int32_t precision = 0,
- int32_t scale = 0,
- int32_t bitWidth = 128) {
- DecimalBuilder builder_(_fbb);
- builder_.add_bitWidth(bitWidth);
- builder_.add_scale(scale);
- builder_.add_precision(precision);
- return builder_.Finish();
-}
-
-/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
-/// epoch (1970-01-01), stored in either of two units:
-///
-/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
-/// leap seconds), where the values are evenly divisible by 86400000
-/// * Days (32 bits) since the UNIX epoch
-struct Date FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DateBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4
- };
- org::apache::arrow::flatbuf::DateUnit unit() const {
- return static_cast<org::apache::arrow::flatbuf::DateUnit>(GetField<int16_t>(VT_UNIT, 1));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_UNIT) &&
- verifier.EndTable();
- }
-};
-
-struct DateBuilder {
- typedef Date Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(org::apache::arrow::flatbuf::DateUnit unit) {
- fbb_.AddElement<int16_t>(Date::VT_UNIT, static_cast<int16_t>(unit), 1);
- }
- explicit DateBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DateBuilder &operator=(const DateBuilder &);
- flatbuffers::Offset<Date> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Date>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Date> CreateDate(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::DateUnit unit = org::apache::arrow::flatbuf::DateUnit::MILLISECOND) {
- DateBuilder builder_(_fbb);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-/// Time type. The physical storage type depends on the unit
-/// - SECOND and MILLISECOND: 32 bits
-/// - MICROSECOND and NANOSECOND: 64 bits
-struct Time FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TimeBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4,
- VT_BITWIDTH = 6
- };
- org::apache::arrow::flatbuf::TimeUnit unit() const {
- return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 1));
- }
- int32_t bitWidth() const {
- return GetField<int32_t>(VT_BITWIDTH, 32);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_UNIT) &&
- VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
- verifier.EndTable();
- }
-};
-
-struct TimeBuilder {
- typedef Time Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
- fbb_.AddElement<int16_t>(Time::VT_UNIT, static_cast<int16_t>(unit), 1);
- }
- void add_bitWidth(int32_t bitWidth) {
- fbb_.AddElement<int32_t>(Time::VT_BITWIDTH, bitWidth, 32);
- }
- explicit TimeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TimeBuilder &operator=(const TimeBuilder &);
- flatbuffers::Offset<Time> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Time>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Time> CreateTime(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::MILLISECOND,
- int32_t bitWidth = 32) {
- TimeBuilder builder_(_fbb);
- builder_.add_bitWidth(bitWidth);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
-/// leap seconds, as a 64-bit integer. Note that UNIX time does not include
-/// leap seconds.
-///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
-struct Timestamp FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TimestampBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4,
- VT_TIMEZONE = 6
- };
- org::apache::arrow::flatbuf::TimeUnit unit() const {
- return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 0));
- }
- /// The time zone is a string indicating the name of a time zone, one of:
- ///
- /// * As used in the Olson time zone database (the "tz database" or
- /// "tzdata"), such as "America/New_York"
- /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
- ///
- /// Whether a timezone string is present indicates different semantics about
- /// the data:
- ///
- /// * If the time zone is null or equal to an empty string, the data is "time
- /// zone naive" and shall be displayed *as is* to the user, not localized
- /// to the locale of the user. This data can be though of as UTC but
- /// without having "UTC" as the time zone, it is not considered to be
- /// localized to any time zone
- ///
- /// * If the time zone is set to a valid value, values can be displayed as
- /// "localized" to that time zone, even though the underlying 64-bit
- /// integers are identical to the same data stored in UTC. Converting
- /// between time zones is a metadata-only operation and does not change the
- /// underlying values
- const flatbuffers::String *timezone() const {
- return GetPointer<const flatbuffers::String *>(VT_TIMEZONE);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_UNIT) &&
- VerifyOffset(verifier, VT_TIMEZONE) &&
- verifier.VerifyString(timezone()) &&
- verifier.EndTable();
- }
-};
-
-struct TimestampBuilder {
- typedef Timestamp Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
- fbb_.AddElement<int16_t>(Timestamp::VT_UNIT, static_cast<int16_t>(unit), 0);
- }
- void add_timezone(flatbuffers::Offset<flatbuffers::String> timezone) {
- fbb_.AddOffset(Timestamp::VT_TIMEZONE, timezone);
- }
- explicit TimestampBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TimestampBuilder &operator=(const TimestampBuilder &);
- flatbuffers::Offset<Timestamp> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Timestamp>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Timestamp> CreateTimestamp(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::SECOND,
- flatbuffers::Offset<flatbuffers::String> timezone = 0) {
- TimestampBuilder builder_(_fbb);
- builder_.add_timezone(timezone);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Timestamp> CreateTimestampDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::SECOND,
- const char *timezone = nullptr) {
- auto timezone__ = timezone ? _fbb.CreateString(timezone) : 0;
- return org::apache::arrow::flatbuf::CreateTimestamp(
- _fbb,
- unit,
- timezone__);
-}
-
-struct Interval FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef IntervalBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4
- };
- org::apache::arrow::flatbuf::IntervalUnit unit() const {
- return static_cast<org::apache::arrow::flatbuf::IntervalUnit>(GetField<int16_t>(VT_UNIT, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_UNIT) &&
- verifier.EndTable();
- }
-};
-
-struct IntervalBuilder {
- typedef Interval Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(org::apache::arrow::flatbuf::IntervalUnit unit) {
- fbb_.AddElement<int16_t>(Interval::VT_UNIT, static_cast<int16_t>(unit), 0);
- }
- explicit IntervalBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- IntervalBuilder &operator=(const IntervalBuilder &);
- flatbuffers::Offset<Interval> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Interval>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Interval> CreateInterval(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::IntervalUnit unit = org::apache::arrow::flatbuf::IntervalUnit::YEAR_MONTH) {
- IntervalBuilder builder_(_fbb);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-struct Duration FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DurationBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4
- };
- org::apache::arrow::flatbuf::TimeUnit unit() const {
- return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 1));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_UNIT) &&
- verifier.EndTable();
- }
-};
-
-struct DurationBuilder {
- typedef Duration Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
- fbb_.AddElement<int16_t>(Duration::VT_UNIT, static_cast<int16_t>(unit), 1);
- }
- explicit DurationBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DurationBuilder &operator=(const DurationBuilder &);
- flatbuffers::Offset<Duration> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Duration>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Duration> CreateDuration(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::MILLISECOND) {
- DurationBuilder builder_(_fbb);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-/// ----------------------------------------------------------------------
-/// user defined key value pairs to add custom metadata to arrow
-/// key namespacing is the responsibility of the user
-struct KeyValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef KeyValueBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_KEY = 4,
- VT_VALUE = 6
- };
- const flatbuffers::String *key() const {
- return GetPointer<const flatbuffers::String *>(VT_KEY);
- }
- const flatbuffers::String *value() const {
- return GetPointer<const flatbuffers::String *>(VT_VALUE);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_KEY) &&
- verifier.VerifyString(key()) &&
- VerifyOffset(verifier, VT_VALUE) &&
- verifier.VerifyString(value()) &&
- verifier.EndTable();
- }
-};
-
-struct KeyValueBuilder {
- typedef KeyValue Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_key(flatbuffers::Offset<flatbuffers::String> key) {
- fbb_.AddOffset(KeyValue::VT_KEY, key);
- }
- void add_value(flatbuffers::Offset<flatbuffers::String> value) {
- fbb_.AddOffset(KeyValue::VT_VALUE, value);
- }
- explicit KeyValueBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- KeyValueBuilder &operator=(const KeyValueBuilder &);
- flatbuffers::Offset<KeyValue> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<KeyValue>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<KeyValue> CreateKeyValue(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::String> key = 0,
- flatbuffers::Offset<flatbuffers::String> value = 0) {
- KeyValueBuilder builder_(_fbb);
- builder_.add_value(value);
- builder_.add_key(key);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<KeyValue> CreateKeyValueDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const char *key = nullptr,
- const char *value = nullptr) {
- auto key__ = key ? _fbb.CreateString(key) : 0;
- auto value__ = value ? _fbb.CreateString(value) : 0;
- return org::apache::arrow::flatbuf::CreateKeyValue(
- _fbb,
- key__,
- value__);
-}
-
-struct DictionaryEncoding FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DictionaryEncodingBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_ID = 4,
- VT_INDEXTYPE = 6,
- VT_ISORDERED = 8,
- VT_DICTIONARYKIND = 10
- };
- /// The known dictionary id in the application where this data is used. In
- /// the file or streaming formats, the dictionary ids are found in the
- /// DictionaryBatch messages
- int64_t id() const {
- return GetField<int64_t>(VT_ID, 0);
- }
- /// The dictionary indices are constrained to be non-negative integers. If
- /// this field is null, the indices must be signed int32. To maximize
- /// cross-language compatibility and performance, implementations are
- /// recommended to prefer signed integer types over unsigned integer types
- /// and to avoid uint64 indices unless they are required by an application.
- const org::apache::arrow::flatbuf::Int *indexType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDEXTYPE);
- }
- /// By default, dictionaries are not ordered, or the order does not have
- /// semantic meaning. In some statistical, applications, dictionary-encoding
- /// is used to represent ordered categorical data, and we provide a way to
- /// preserve that metadata here
- bool isOrdered() const {
- return GetField<uint8_t>(VT_ISORDERED, 0) != 0;
- }
- org::apache::arrow::flatbuf::DictionaryKind dictionaryKind() const {
- return static_cast<org::apache::arrow::flatbuf::DictionaryKind>(GetField<int16_t>(VT_DICTIONARYKIND, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int64_t>(verifier, VT_ID) &&
- VerifyOffset(verifier, VT_INDEXTYPE) &&
- verifier.VerifyTable(indexType()) &&
- VerifyField<uint8_t>(verifier, VT_ISORDERED) &&
- VerifyField<int16_t>(verifier, VT_DICTIONARYKIND) &&
- verifier.EndTable();
- }
-};
-
-struct DictionaryEncodingBuilder {
- typedef DictionaryEncoding Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_id(int64_t id) {
- fbb_.AddElement<int64_t>(DictionaryEncoding::VT_ID, id, 0);
- }
- void add_indexType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indexType) {
- fbb_.AddOffset(DictionaryEncoding::VT_INDEXTYPE, indexType);
- }
- void add_isOrdered(bool isOrdered) {
- fbb_.AddElement<uint8_t>(DictionaryEncoding::VT_ISORDERED, static_cast<uint8_t>(isOrdered), 0);
- }
- void add_dictionaryKind(org::apache::arrow::flatbuf::DictionaryKind dictionaryKind) {
- fbb_.AddElement<int16_t>(DictionaryEncoding::VT_DICTIONARYKIND, static_cast<int16_t>(dictionaryKind), 0);
- }
- explicit DictionaryEncodingBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DictionaryEncodingBuilder &operator=(const DictionaryEncodingBuilder &);
- flatbuffers::Offset<DictionaryEncoding> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DictionaryEncoding>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DictionaryEncoding> CreateDictionaryEncoding(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t id = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indexType = 0,
- bool isOrdered = false,
- org::apache::arrow::flatbuf::DictionaryKind dictionaryKind = org::apache::arrow::flatbuf::DictionaryKind::DenseArray) {
- DictionaryEncodingBuilder builder_(_fbb);
- builder_.add_id(id);
- builder_.add_indexType(indexType);
- builder_.add_dictionaryKind(dictionaryKind);
- builder_.add_isOrdered(isOrdered);
- return builder_.Finish();
-}
-
-/// ----------------------------------------------------------------------
-/// A field represents a named column in a record / row batch or child of a
-/// nested type.
-struct Field FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef FieldBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_NAME = 4,
- VT_NULLABLE = 6,
- VT_TYPE_TYPE = 8,
- VT_TYPE = 10,
- VT_DICTIONARY = 12,
- VT_CHILDREN = 14,
- VT_CUSTOM_METADATA = 16
- };
- /// Name is not required, in i.e. a List
- const flatbuffers::String *name() const {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- /// Whether or not this field can contain nulls. Should be true in general.
- bool nullable() const {
- return GetField<uint8_t>(VT_NULLABLE, 0) != 0;
- }
- org::apache::arrow::flatbuf::Type type_type() const {
- return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
- }
- /// This is the type of the decoded value if the field is dictionary encoded.
- const void *type() const {
- return GetPointer<const void *>(VT_TYPE);
- }
- template<typename T> const T *type_as() const;
- const org::apache::arrow::flatbuf::Null *type_as_Null() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Int *type_as_Int() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Date *type_as_Date() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Time *type_as_Time() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::List *type_as_List() const {
- return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Union *type_as_Union() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Map *type_as_Map() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
- }
- /// Present only if the field is dictionary encoded.
- const org::apache::arrow::flatbuf::DictionaryEncoding *dictionary() const {
- return GetPointer<const org::apache::arrow::flatbuf::DictionaryEncoding *>(VT_DICTIONARY);
- }
- /// children apply only to nested data types like Struct, List and Union. For
- /// primitive types children will have length 0.
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *children() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *>(VT_CHILDREN);
- }
- /// User-defined metadata
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) &&
- VerifyField<uint8_t>(verifier, VT_NULLABLE) &&
- VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
- VerifyOffset(verifier, VT_TYPE) &&
- VerifyType(verifier, type(), type_type()) &&
- VerifyOffset(verifier, VT_DICTIONARY) &&
- verifier.VerifyTable(dictionary()) &&
- VerifyOffset(verifier, VT_CHILDREN) &&
- verifier.VerifyVector(children()) &&
- verifier.VerifyVectorOfTables(children()) &&
- VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
- verifier.VerifyVector(custom_metadata()) &&
- verifier.VerifyVectorOfTables(custom_metadata()) &&
- verifier.EndTable();
- }
-};
-
-template<> inline const org::apache::arrow::flatbuf::Null *Field::type_as<org::apache::arrow::flatbuf::Null>() const {
- return type_as_Null();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Int *Field::type_as<org::apache::arrow::flatbuf::Int>() const {
- return type_as_Int();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FloatingPoint *Field::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
- return type_as_FloatingPoint();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Binary *Field::type_as<org::apache::arrow::flatbuf::Binary>() const {
- return type_as_Binary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Utf8 *Field::type_as<org::apache::arrow::flatbuf::Utf8>() const {
- return type_as_Utf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Bool *Field::type_as<org::apache::arrow::flatbuf::Bool>() const {
- return type_as_Bool();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Decimal *Field::type_as<org::apache::arrow::flatbuf::Decimal>() const {
- return type_as_Decimal();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Date *Field::type_as<org::apache::arrow::flatbuf::Date>() const {
- return type_as_Date();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Time *Field::type_as<org::apache::arrow::flatbuf::Time>() const {
- return type_as_Time();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Timestamp *Field::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
- return type_as_Timestamp();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Interval *Field::type_as<org::apache::arrow::flatbuf::Interval>() const {
- return type_as_Interval();
-}
-
-template<> inline const org::apache::arrow::flatbuf::List *Field::type_as<org::apache::arrow::flatbuf::List>() const {
- return type_as_List();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Struct_ *Field::type_as<org::apache::arrow::flatbuf::Struct_>() const {
- return type_as_Struct_();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Union *Field::type_as<org::apache::arrow::flatbuf::Union>() const {
- return type_as_Union();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *Field::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
- return type_as_FixedSizeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeList *Field::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
- return type_as_FixedSizeList();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Map *Field::type_as<org::apache::arrow::flatbuf::Map>() const {
- return type_as_Map();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Duration *Field::type_as<org::apache::arrow::flatbuf::Duration>() const {
- return type_as_Duration();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeBinary *Field::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
- return type_as_LargeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *Field::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
- return type_as_LargeUtf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeList *Field::type_as<org::apache::arrow::flatbuf::LargeList>() const {
- return type_as_LargeList();
-}
-
-struct FieldBuilder {
- typedef Field Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_name(flatbuffers::Offset<flatbuffers::String> name) {
- fbb_.AddOffset(Field::VT_NAME, name);
- }
- void add_nullable(bool nullable) {
- fbb_.AddElement<uint8_t>(Field::VT_NULLABLE, static_cast<uint8_t>(nullable), 0);
- }
- void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
- fbb_.AddElement<uint8_t>(Field::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
- }
- void add_type(flatbuffers::Offset<void> type) {
- fbb_.AddOffset(Field::VT_TYPE, type);
- }
- void add_dictionary(flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary) {
- fbb_.AddOffset(Field::VT_DICTIONARY, dictionary);
- }
- void add_children(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> children) {
- fbb_.AddOffset(Field::VT_CHILDREN, children);
- }
- void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
- fbb_.AddOffset(Field::VT_CUSTOM_METADATA, custom_metadata);
- }
- explicit FieldBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- FieldBuilder &operator=(const FieldBuilder &);
- flatbuffers::Offset<Field> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Field>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Field> CreateField(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- bool nullable = false,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> children = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
- FieldBuilder builder_(_fbb);
- builder_.add_custom_metadata(custom_metadata);
- builder_.add_children(children);
- builder_.add_dictionary(dictionary);
- builder_.add_type(type);
- builder_.add_name(name);
- builder_.add_type_type(type_type);
- builder_.add_nullable(nullable);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Field> CreateFieldDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const char *name = nullptr,
- bool nullable = false,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary = 0,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *children = nullptr,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
- auto name__ = name ? _fbb.CreateString(name) : 0;
- auto children__ = children ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>(*children) : 0;
- auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
- return org::apache::arrow::flatbuf::CreateField(
- _fbb,
- name__,
- nullable,
- type_type,
- type,
- dictionary,
- children__,
- custom_metadata__);
-}
-
-/// ----------------------------------------------------------------------
-/// A Schema describes the columns in a row batch
-struct Schema FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef SchemaBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_ENDIANNESS = 4,
- VT_FIELDS = 6,
- VT_CUSTOM_METADATA = 8,
- VT_FEATURES = 10
- };
- /// endianness of the buffer
- /// it is Little Endian by default
- /// if endianness doesn't match the underlying system then the vectors need to be converted
- org::apache::arrow::flatbuf::Endianness endianness() const {
- return static_cast<org::apache::arrow::flatbuf::Endianness>(GetField<int16_t>(VT_ENDIANNESS, 0));
- }
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *fields() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *>(VT_FIELDS);
- }
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
- }
- /// Features used in the stream/file.
- const flatbuffers::Vector<int64_t> *features() const {
- return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_FEATURES);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_ENDIANNESS) &&
- VerifyOffset(verifier, VT_FIELDS) &&
- verifier.VerifyVector(fields()) &&
- verifier.VerifyVectorOfTables(fields()) &&
- VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
- verifier.VerifyVector(custom_metadata()) &&
- verifier.VerifyVectorOfTables(custom_metadata()) &&
- VerifyOffset(verifier, VT_FEATURES) &&
- verifier.VerifyVector(features()) &&
- verifier.EndTable();
- }
-};
-
-struct SchemaBuilder {
- typedef Schema Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_endianness(org::apache::arrow::flatbuf::Endianness endianness) {
- fbb_.AddElement<int16_t>(Schema::VT_ENDIANNESS, static_cast<int16_t>(endianness), 0);
- }
- void add_fields(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> fields) {
- fbb_.AddOffset(Schema::VT_FIELDS, fields);
- }
- void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
- fbb_.AddOffset(Schema::VT_CUSTOM_METADATA, custom_metadata);
- }
- void add_features(flatbuffers::Offset<flatbuffers::Vector<int64_t>> features) {
- fbb_.AddOffset(Schema::VT_FEATURES, features);
- }
- explicit SchemaBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- SchemaBuilder &operator=(const SchemaBuilder &);
- flatbuffers::Offset<Schema> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Schema>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Schema> CreateSchema(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Endianness endianness = org::apache::arrow::flatbuf::Endianness::Little,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> fields = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> features = 0) {
- SchemaBuilder builder_(_fbb);
- builder_.add_features(features);
- builder_.add_custom_metadata(custom_metadata);
- builder_.add_fields(fields);
- builder_.add_endianness(endianness);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Schema> CreateSchemaDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Endianness endianness = org::apache::arrow::flatbuf::Endianness::Little,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *fields = nullptr,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr,
- const std::vector<int64_t> *features = nullptr) {
- auto fields__ = fields ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>(*fields) : 0;
- auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
- auto features__ = features ? _fbb.CreateVector<int64_t>(*features) : 0;
- return org::apache::arrow::flatbuf::CreateSchema(
- _fbb,
- endianness,
- fields__,
- custom_metadata__,
- features__);
-}
-
-inline bool VerifyType(flatbuffers::Verifier &verifier, const void *obj, Type type) {
- switch (type) {
- case Type::NONE: {
- return true;
- }
- case Type::Null: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Null *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Int: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Int *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::FloatingPoint: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Binary: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Binary *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Utf8: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Utf8 *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Bool: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Bool *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Decimal: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Decimal *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Date: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Date *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Time: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Time *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Timestamp: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Timestamp *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Interval: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Interval *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::List: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::List *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Struct_: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Struct_ *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Union: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Union *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::FixedSizeBinary: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::FixedSizeList: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Map: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Map *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::Duration: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Duration *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::LargeBinary: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeBinary *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::LargeUtf8: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case Type::LargeList: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeList *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default: return true;
- }
-}
-
-inline bool VerifyTypeVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
- if (!values || !types) return !values && !types;
- if (values->size() != types->size()) return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
- if (!VerifyType(
- verifier, values->Get(i), types->GetEnum<Type>(i))) {
- return false;
- }
- }
- return true;
-}
-
-inline const org::apache::arrow::flatbuf::Schema *GetSchema(const void *buf) {
- return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Schema>(buf);
-}
-
-inline const org::apache::arrow::flatbuf::Schema *GetSizePrefixedSchema(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Schema>(buf);
-}
-
-inline bool VerifySchemaBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Schema>(nullptr);
-}
-
-inline bool VerifySizePrefixedSchemaBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Schema>(nullptr);
-}
-
-inline void FinishSchemaBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedSchemaBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace flatbuf
-} // namespace arrow
-} // namespace apache
-} // namespace org
-
-#endif // FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
+#define FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace org {
+namespace apache {
+namespace arrow {
+namespace flatbuf {
+
+struct Null;
+struct NullBuilder;
+
+struct Struct_;
+struct Struct_Builder;
+
+struct List;
+struct ListBuilder;
+
+struct LargeList;
+struct LargeListBuilder;
+
+struct FixedSizeList;
+struct FixedSizeListBuilder;
+
+struct Map;
+struct MapBuilder;
+
+struct Union;
+struct UnionBuilder;
+
+struct Int;
+struct IntBuilder;
+
+struct FloatingPoint;
+struct FloatingPointBuilder;
+
+struct Utf8;
+struct Utf8Builder;
+
+struct Binary;
+struct BinaryBuilder;
+
+struct LargeUtf8;
+struct LargeUtf8Builder;
+
+struct LargeBinary;
+struct LargeBinaryBuilder;
+
+struct FixedSizeBinary;
+struct FixedSizeBinaryBuilder;
+
+struct Bool;
+struct BoolBuilder;
+
+struct Decimal;
+struct DecimalBuilder;
+
+struct Date;
+struct DateBuilder;
+
+struct Time;
+struct TimeBuilder;
+
+struct Timestamp;
+struct TimestampBuilder;
+
+struct Interval;
+struct IntervalBuilder;
+
+struct Duration;
+struct DurationBuilder;
+
+struct KeyValue;
+struct KeyValueBuilder;
+
+struct DictionaryEncoding;
+struct DictionaryEncodingBuilder;
+
+struct Field;
+struct FieldBuilder;
+
+struct Buffer;
+
+struct Schema;
+struct SchemaBuilder;
+
+enum class MetadataVersion : int16_t {
+ /// 0.1.0 (October 2016).
+ V1 = 0,
+ /// 0.2.0 (February 2017). Non-backwards compatible with V1.
+ V2 = 1,
+ /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+ V3 = 2,
+ /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+ V4 = 3,
+ /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+ /// metadata and IPC messages). Implementations are recommended to provide a
+ /// V4 compatibility mode with V5 format changes disabled.
+ ///
+ /// Incompatible changes between V4 and V5:
+ /// - Union buffer layout has changed. In V5, Unions don't have a validity
+ /// bitmap buffer.
+ V5 = 4,
+ MIN = V1,
+ MAX = V5
+};
+
+inline const MetadataVersion (&EnumValuesMetadataVersion())[5] {
+ static const MetadataVersion values[] = {
+ MetadataVersion::V1,
+ MetadataVersion::V2,
+ MetadataVersion::V3,
+ MetadataVersion::V4,
+ MetadataVersion::V5
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesMetadataVersion() {
+ static const char * const names[6] = {
+ "V1",
+ "V2",
+ "V3",
+ "V4",
+ "V5",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameMetadataVersion(MetadataVersion e) {
+ if (flatbuffers::IsOutRange(e, MetadataVersion::V1, MetadataVersion::V5)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesMetadataVersion()[index];
+}
+
+/// Represents Arrow Features that might not have full support
+/// within implementations. This is intended to be used in
+/// two scenarios:
+/// 1. A mechanism for readers of Arrow Streams
+/// and files to understand that the stream or file makes
+/// use of a feature that isn't supported or unknown to
+/// the implementation (and therefore can meet the Arrow
+/// forward compatibility guarantees).
+/// 2. A means of negotiating between a client and server
+/// what features a stream is allowed to use. The enums
+/// values here are intented to represent higher level
+/// features, additional details maybe negotiated
+/// with key-value pairs specific to the protocol.
+///
+/// Enums added to this list should be assigned power-of-two values
+/// to facilitate exchanging and comparing bitmaps for supported
+/// features.
+enum class Feature : int64_t {
+ /// Needed to make flatbuffers happy.
+ UNUSED = 0,
+ /// The stream makes use of multiple full dictionaries with the
+ /// same ID and assumes clients implement dictionary replacement
+ /// correctly.
+ DICTIONARY_REPLACEMENT = 1LL,
+ /// The stream makes use of compressed bodies as described
+ /// in Message.fbs.
+ COMPRESSED_BODY = 2LL,
+ MIN = UNUSED,
+ MAX = COMPRESSED_BODY
+};
+
+inline const Feature (&EnumValuesFeature())[3] {
+ static const Feature values[] = {
+ Feature::UNUSED,
+ Feature::DICTIONARY_REPLACEMENT,
+ Feature::COMPRESSED_BODY
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesFeature() {
+ static const char * const names[4] = {
+ "UNUSED",
+ "DICTIONARY_REPLACEMENT",
+ "COMPRESSED_BODY",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameFeature(Feature e) {
+ if (flatbuffers::IsOutRange(e, Feature::UNUSED, Feature::COMPRESSED_BODY)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesFeature()[index];
+}
+
+enum class UnionMode : int16_t {
+ Sparse = 0,
+ Dense = 1,
+ MIN = Sparse,
+ MAX = Dense
+};
+
+inline const UnionMode (&EnumValuesUnionMode())[2] {
+ static const UnionMode values[] = {
+ UnionMode::Sparse,
+ UnionMode::Dense
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesUnionMode() {
+ static const char * const names[3] = {
+ "Sparse",
+ "Dense",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameUnionMode(UnionMode e) {
+ if (flatbuffers::IsOutRange(e, UnionMode::Sparse, UnionMode::Dense)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesUnionMode()[index];
+}
+
+enum class Precision : int16_t {
+ HALF = 0,
+ SINGLE = 1,
+ DOUBLE = 2,
+ MIN = HALF,
+ MAX = DOUBLE
+};
+
+inline const Precision (&EnumValuesPrecision())[3] {
+ static const Precision values[] = {
+ Precision::HALF,
+ Precision::SINGLE,
+ Precision::DOUBLE
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesPrecision() {
+ static const char * const names[4] = {
+ "HALF",
+ "SINGLE",
+ "DOUBLE",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNamePrecision(Precision e) {
+ if (flatbuffers::IsOutRange(e, Precision::HALF, Precision::DOUBLE)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesPrecision()[index];
+}
+
+enum class DateUnit : int16_t {
+ DAY = 0,
+ MILLISECOND = 1,
+ MIN = DAY,
+ MAX = MILLISECOND
+};
+
+inline const DateUnit (&EnumValuesDateUnit())[2] {
+ static const DateUnit values[] = {
+ DateUnit::DAY,
+ DateUnit::MILLISECOND
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesDateUnit() {
+ static const char * const names[3] = {
+ "DAY",
+ "MILLISECOND",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameDateUnit(DateUnit e) {
+ if (flatbuffers::IsOutRange(e, DateUnit::DAY, DateUnit::MILLISECOND)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesDateUnit()[index];
+}
+
+enum class TimeUnit : int16_t {
+ SECOND = 0,
+ MILLISECOND = 1,
+ MICROSECOND = 2,
+ NANOSECOND = 3,
+ MIN = SECOND,
+ MAX = NANOSECOND
+};
+
+inline const TimeUnit (&EnumValuesTimeUnit())[4] {
+ static const TimeUnit values[] = {
+ TimeUnit::SECOND,
+ TimeUnit::MILLISECOND,
+ TimeUnit::MICROSECOND,
+ TimeUnit::NANOSECOND
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesTimeUnit() {
+ static const char * const names[5] = {
+ "SECOND",
+ "MILLISECOND",
+ "MICROSECOND",
+ "NANOSECOND",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameTimeUnit(TimeUnit e) {
+ if (flatbuffers::IsOutRange(e, TimeUnit::SECOND, TimeUnit::NANOSECOND)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesTimeUnit()[index];
+}
+
+enum class IntervalUnit : int16_t {
+ YEAR_MONTH = 0,
+ DAY_TIME = 1,
+ MIN = YEAR_MONTH,
+ MAX = DAY_TIME
+};
+
+inline const IntervalUnit (&EnumValuesIntervalUnit())[2] {
+ static const IntervalUnit values[] = {
+ IntervalUnit::YEAR_MONTH,
+ IntervalUnit::DAY_TIME
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesIntervalUnit() {
+ static const char * const names[3] = {
+ "YEAR_MONTH",
+ "DAY_TIME",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameIntervalUnit(IntervalUnit e) {
+ if (flatbuffers::IsOutRange(e, IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesIntervalUnit()[index];
+}
+
+/// ----------------------------------------------------------------------
+/// Top-level Type value, enabling extensible type-specific metadata. We can
+/// add new logical types to Type without breaking backwards compatibility
+enum class Type : uint8_t {
+ NONE = 0,
+ Null = 1,
+ Int = 2,
+ FloatingPoint = 3,
+ Binary = 4,
+ Utf8 = 5,
+ Bool = 6,
+ Decimal = 7,
+ Date = 8,
+ Time = 9,
+ Timestamp = 10,
+ Interval = 11,
+ List = 12,
+ Struct_ = 13,
+ Union = 14,
+ FixedSizeBinary = 15,
+ FixedSizeList = 16,
+ Map = 17,
+ Duration = 18,
+ LargeBinary = 19,
+ LargeUtf8 = 20,
+ LargeList = 21,
+ MIN = NONE,
+ MAX = LargeList
+};
+
+inline const Type (&EnumValuesType())[22] {
+ static const Type values[] = {
+ Type::NONE,
+ Type::Null,
+ Type::Int,
+ Type::FloatingPoint,
+ Type::Binary,
+ Type::Utf8,
+ Type::Bool,
+ Type::Decimal,
+ Type::Date,
+ Type::Time,
+ Type::Timestamp,
+ Type::Interval,
+ Type::List,
+ Type::Struct_,
+ Type::Union,
+ Type::FixedSizeBinary,
+ Type::FixedSizeList,
+ Type::Map,
+ Type::Duration,
+ Type::LargeBinary,
+ Type::LargeUtf8,
+ Type::LargeList
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesType() {
+ static const char * const names[23] = {
+ "NONE",
+ "Null",
+ "Int",
+ "FloatingPoint",
+ "Binary",
+ "Utf8",
+ "Bool",
+ "Decimal",
+ "Date",
+ "Time",
+ "Timestamp",
+ "Interval",
+ "List",
+ "Struct_",
+ "Union",
+ "FixedSizeBinary",
+ "FixedSizeList",
+ "Map",
+ "Duration",
+ "LargeBinary",
+ "LargeUtf8",
+ "LargeList",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameType(Type e) {
+ if (flatbuffers::IsOutRange(e, Type::NONE, Type::LargeList)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesType()[index];
+}
+
+template<typename T> struct TypeTraits {
+ static const Type enum_value = Type::NONE;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Null> {
+ static const Type enum_value = Type::Null;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Int> {
+ static const Type enum_value = Type::Int;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::FloatingPoint> {
+ static const Type enum_value = Type::FloatingPoint;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Binary> {
+ static const Type enum_value = Type::Binary;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Utf8> {
+ static const Type enum_value = Type::Utf8;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Bool> {
+ static const Type enum_value = Type::Bool;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Decimal> {
+ static const Type enum_value = Type::Decimal;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Date> {
+ static const Type enum_value = Type::Date;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Time> {
+ static const Type enum_value = Type::Time;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Timestamp> {
+ static const Type enum_value = Type::Timestamp;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Interval> {
+ static const Type enum_value = Type::Interval;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::List> {
+ static const Type enum_value = Type::List;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Struct_> {
+ static const Type enum_value = Type::Struct_;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Union> {
+ static const Type enum_value = Type::Union;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::FixedSizeBinary> {
+ static const Type enum_value = Type::FixedSizeBinary;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::FixedSizeList> {
+ static const Type enum_value = Type::FixedSizeList;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Map> {
+ static const Type enum_value = Type::Map;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::Duration> {
+ static const Type enum_value = Type::Duration;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeBinary> {
+ static const Type enum_value = Type::LargeBinary;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeUtf8> {
+ static const Type enum_value = Type::LargeUtf8;
+};
+
+template<> struct TypeTraits<org::apache::arrow::flatbuf::LargeList> {
+ static const Type enum_value = Type::LargeList;
+};
+
+bool VerifyType(flatbuffers::Verifier &verifier, const void *obj, Type type);
+bool VerifyTypeVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+/// ----------------------------------------------------------------------
+/// Dictionary encoding metadata
+/// Maintained for forwards compatibility, in the future
+/// Dictionaries might be explicit maps between integers and values
+/// allowing for non-contiguous index values
+enum class DictionaryKind : int16_t {
+ DenseArray = 0,
+ MIN = DenseArray,
+ MAX = DenseArray
+};
+
+inline const DictionaryKind (&EnumValuesDictionaryKind())[1] {
+ static const DictionaryKind values[] = {
+ DictionaryKind::DenseArray
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesDictionaryKind() {
+ static const char * const names[2] = {
+ "DenseArray",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameDictionaryKind(DictionaryKind e) {
+ if (flatbuffers::IsOutRange(e, DictionaryKind::DenseArray, DictionaryKind::DenseArray)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesDictionaryKind()[index];
+}
+
+/// ----------------------------------------------------------------------
+/// Endianness of the platform producing the data
+enum class Endianness : int16_t {
+ Little = 0,
+ Big = 1,
+ MIN = Little,
+ MAX = Big
+};
+
+inline const Endianness (&EnumValuesEndianness())[2] {
+ static const Endianness values[] = {
+ Endianness::Little,
+ Endianness::Big
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesEndianness() {
+ static const char * const names[3] = {
+ "Little",
+ "Big",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameEndianness(Endianness e) {
+ if (flatbuffers::IsOutRange(e, Endianness::Little, Endianness::Big)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesEndianness()[index];
+}
+
+/// ----------------------------------------------------------------------
+/// A Buffer represents a single contiguous memory segment
+FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Buffer FLATBUFFERS_FINAL_CLASS {
+ private:
+ int64_t offset_;
+ int64_t length_;
+
+ public:
+ Buffer() {
+ memset(static_cast<void *>(this), 0, sizeof(Buffer));
+ }
+ Buffer(int64_t _offset, int64_t _length)
+ : offset_(flatbuffers::EndianScalar(_offset)),
+ length_(flatbuffers::EndianScalar(_length)) {
+ }
+ /// The relative offset into the shared memory page where the bytes for this
+ /// buffer starts
+ int64_t offset() const {
+ return flatbuffers::EndianScalar(offset_);
+ }
+ /// The absolute length (in bytes) of the memory buffer. The memory is found
+ /// from offset (inclusive) to offset + length (non-inclusive). When building
+ /// messages using the encapsulated IPC message, padding bytes may be written
+ /// after a buffer, but such padding bytes do not need to be accounted for in
+ /// the size here.
+ int64_t length() const {
+ return flatbuffers::EndianScalar(length_);
+ }
+};
+FLATBUFFERS_STRUCT_END(Buffer, 16);
+
+/// These are stored in the flatbuffer in the Type union below
+struct Null FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef NullBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct NullBuilder {
+ typedef Null Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NullBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ NullBuilder &operator=(const NullBuilder &);
+ flatbuffers::Offset<Null> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Null>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Null> CreateNull(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ NullBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+/// (according to the physical memory layout). We used Struct_ here as
+/// Struct is a reserved word in Flatbuffers
+struct Struct_ FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef Struct_Builder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct Struct_Builder {
+ typedef Struct_ Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit Struct_Builder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ Struct_Builder &operator=(const Struct_Builder &);
+ flatbuffers::Offset<Struct_> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Struct_>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Struct_> CreateStruct_(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ Struct_Builder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct List FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef ListBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct ListBuilder {
+ typedef List Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ ListBuilder &operator=(const ListBuilder &);
+ flatbuffers::Offset<List> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<List>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<List> CreateList(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ ListBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// Same as List, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+struct LargeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef LargeListBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct LargeListBuilder {
+ typedef LargeList Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LargeListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ LargeListBuilder &operator=(const LargeListBuilder &);
+ flatbuffers::Offset<LargeList> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LargeList>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LargeList> CreateLargeList(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ LargeListBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef FixedSizeListBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_LISTSIZE = 4
+ };
+ /// Number of list items per value
+ int32_t listSize() const {
+ return GetField<int32_t>(VT_LISTSIZE, 0);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int32_t>(verifier, VT_LISTSIZE) &&
+ verifier.EndTable();
+ }
+};
+
+struct FixedSizeListBuilder {
+ typedef FixedSizeList Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_listSize(int32_t listSize) {
+ fbb_.AddElement<int32_t>(FixedSizeList::VT_LISTSIZE, listSize, 0);
+ }
+ explicit FixedSizeListBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ FixedSizeListBuilder &operator=(const FixedSizeListBuilder &);
+ flatbuffers::Offset<FixedSizeList> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FixedSizeList>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FixedSizeList> CreateFixedSizeList(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t listSize = 0) {
+ FixedSizeListBuilder builder_(_fbb);
+ builder_.add_listSize(listSize);
+ return builder_.Finish();
+}
+
+/// A Map is a logical nested type that is represented as
+///
+/// List<entries: Struct<key: K, value: V>>
+///
+/// In this layout, the keys and values are each respectively contiguous. We do
+/// not constrain the key and value types, so the application is responsible
+/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
+/// may be set in the metadata for this field.
+///
+/// In a field with Map type, the field has a child Struct field, which then
+/// has two children: key type and the second the value type. The names of the
+/// child fields may be respectively "entries", "key", and "value", but this is
+/// not enforced.
+///
+/// Map
+/// - child[0] entries: Struct
+/// - child[0] key: K
+/// - child[1] value: V
+///
+/// Neither the "entries" field nor the "key" field may be nullable.
+///
+/// The metadata is structured so that Arrow systems without special handling
+/// for Map can make Map an alias for List. The "layout" attribute for the Map
+/// field must have the same contents as a List.
+struct Map FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef MapBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_KEYSSORTED = 4
+ };
+ /// Set to true if the keys within each value are sorted
+ bool keysSorted() const {
+ return GetField<uint8_t>(VT_KEYSSORTED, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<uint8_t>(verifier, VT_KEYSSORTED) &&
+ verifier.EndTable();
+ }
+};
+
+struct MapBuilder {
+ typedef Map Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_keysSorted(bool keysSorted) {
+ fbb_.AddElement<uint8_t>(Map::VT_KEYSSORTED, static_cast<uint8_t>(keysSorted), 0);
+ }
+ explicit MapBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ MapBuilder &operator=(const MapBuilder &);
+ flatbuffers::Offset<Map> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Map>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Map> CreateMap(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ bool keysSorted = false) {
+ MapBuilder builder_(_fbb);
+ builder_.add_keysSorted(keysSorted);
+ return builder_.Finish();
+}
+
+/// A union is a complex type with children in Field
+/// By default ids in the type vector refer to the offsets in the children
+/// optionally typeIds provides an indirection between the child offset and the type id
+/// for each child typeIds[offset] is the id used in the type vector
+struct Union FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef UnionBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_MODE = 4,
+ VT_TYPEIDS = 6
+ };
+ org::apache::arrow::flatbuf::UnionMode mode() const {
+ return static_cast<org::apache::arrow::flatbuf::UnionMode>(GetField<int16_t>(VT_MODE, 0));
+ }
+ const flatbuffers::Vector<int32_t> *typeIds() const {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TYPEIDS);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_MODE) &&
+ VerifyOffset(verifier, VT_TYPEIDS) &&
+ verifier.VerifyVector(typeIds()) &&
+ verifier.EndTable();
+ }
+};
+
+struct UnionBuilder {
+ typedef Union Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_mode(org::apache::arrow::flatbuf::UnionMode mode) {
+ fbb_.AddElement<int16_t>(Union::VT_MODE, static_cast<int16_t>(mode), 0);
+ }
+ void add_typeIds(flatbuffers::Offset<flatbuffers::Vector<int32_t>> typeIds) {
+ fbb_.AddOffset(Union::VT_TYPEIDS, typeIds);
+ }
+ explicit UnionBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ UnionBuilder &operator=(const UnionBuilder &);
+ flatbuffers::Offset<Union> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Union>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Union> CreateUnion(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::UnionMode mode = org::apache::arrow::flatbuf::UnionMode::Sparse,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> typeIds = 0) {
+ UnionBuilder builder_(_fbb);
+ builder_.add_typeIds(typeIds);
+ builder_.add_mode(mode);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Union> CreateUnionDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::UnionMode mode = org::apache::arrow::flatbuf::UnionMode::Sparse,
+ const std::vector<int32_t> *typeIds = nullptr) {
+ auto typeIds__ = typeIds ? _fbb.CreateVector<int32_t>(*typeIds) : 0;
+ return org::apache::arrow::flatbuf::CreateUnion(
+ _fbb,
+ mode,
+ typeIds__);
+}
+
+struct Int FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef IntBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_BITWIDTH = 4,
+ VT_IS_SIGNED = 6
+ };
+ int32_t bitWidth() const {
+ return GetField<int32_t>(VT_BITWIDTH, 0);
+ }
+ bool is_signed() const {
+ return GetField<uint8_t>(VT_IS_SIGNED, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
+ VerifyField<uint8_t>(verifier, VT_IS_SIGNED) &&
+ verifier.EndTable();
+ }
+};
+
+struct IntBuilder {
+ typedef Int Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_bitWidth(int32_t bitWidth) {
+ fbb_.AddElement<int32_t>(Int::VT_BITWIDTH, bitWidth, 0);
+ }
+ void add_is_signed(bool is_signed) {
+ fbb_.AddElement<uint8_t>(Int::VT_IS_SIGNED, static_cast<uint8_t>(is_signed), 0);
+ }
+ explicit IntBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ IntBuilder &operator=(const IntBuilder &);
+ flatbuffers::Offset<Int> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Int>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Int> CreateInt(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t bitWidth = 0,
+ bool is_signed = false) {
+ IntBuilder builder_(_fbb);
+ builder_.add_bitWidth(bitWidth);
+ builder_.add_is_signed(is_signed);
+ return builder_.Finish();
+}
+
+struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef FloatingPointBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_PRECISION = 4
+ };
+ org::apache::arrow::flatbuf::Precision precision() const {
+ return static_cast<org::apache::arrow::flatbuf::Precision>(GetField<int16_t>(VT_PRECISION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_PRECISION) &&
+ verifier.EndTable();
+ }
+};
+
+struct FloatingPointBuilder {
+ typedef FloatingPoint Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_precision(org::apache::arrow::flatbuf::Precision precision) {
+ fbb_.AddElement<int16_t>(FloatingPoint::VT_PRECISION, static_cast<int16_t>(precision), 0);
+ }
+ explicit FloatingPointBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ FloatingPointBuilder &operator=(const FloatingPointBuilder &);
+ flatbuffers::Offset<FloatingPoint> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FloatingPoint>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FloatingPoint> CreateFloatingPoint(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Precision precision = org::apache::arrow::flatbuf::Precision::HALF) {
+ FloatingPointBuilder builder_(_fbb);
+ builder_.add_precision(precision);
+ return builder_.Finish();
+}
+
+/// Unicode with UTF-8 encoding
+struct Utf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef Utf8Builder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct Utf8Builder {
+ typedef Utf8 Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit Utf8Builder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ Utf8Builder &operator=(const Utf8Builder &);
+ flatbuffers::Offset<Utf8> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Utf8>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Utf8> CreateUtf8(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ Utf8Builder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// Opaque binary data
+struct Binary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef BinaryBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct BinaryBuilder {
+ typedef Binary Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ BinaryBuilder &operator=(const BinaryBuilder &);
+ flatbuffers::Offset<Binary> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Binary>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Binary> CreateBinary(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ BinaryBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// Same as Utf8, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef LargeUtf8Builder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct LargeUtf8Builder {
+ typedef LargeUtf8 Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LargeUtf8Builder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ LargeUtf8Builder &operator=(const LargeUtf8Builder &);
+ flatbuffers::Offset<LargeUtf8> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LargeUtf8>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LargeUtf8> CreateLargeUtf8(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ LargeUtf8Builder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// Same as Binary, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+struct LargeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef LargeBinaryBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct LargeBinaryBuilder {
+ typedef LargeBinary Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LargeBinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ LargeBinaryBuilder &operator=(const LargeBinaryBuilder &);
+ flatbuffers::Offset<LargeBinary> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LargeBinary>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LargeBinary> CreateLargeBinary(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ LargeBinaryBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef FixedSizeBinaryBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_BYTEWIDTH = 4
+ };
+ /// Number of bytes per value
+ int32_t byteWidth() const {
+ return GetField<int32_t>(VT_BYTEWIDTH, 0);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int32_t>(verifier, VT_BYTEWIDTH) &&
+ verifier.EndTable();
+ }
+};
+
+struct FixedSizeBinaryBuilder {
+ typedef FixedSizeBinary Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_byteWidth(int32_t byteWidth) {
+ fbb_.AddElement<int32_t>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
+ }
+ explicit FixedSizeBinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ FixedSizeBinaryBuilder &operator=(const FixedSizeBinaryBuilder &);
+ flatbuffers::Offset<FixedSizeBinary> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FixedSizeBinary>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FixedSizeBinary> CreateFixedSizeBinary(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t byteWidth = 0) {
+ FixedSizeBinaryBuilder builder_(_fbb);
+ builder_.add_byteWidth(byteWidth);
+ return builder_.Finish();
+}
+
+struct Bool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef BoolBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct BoolBuilder {
+ typedef Bool Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BoolBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ BoolBuilder &operator=(const BoolBuilder &);
+ flatbuffers::Offset<Bool> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Bool>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Bool> CreateBool(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ BoolBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+/// Exact decimal value represented as an integer value in two's
+/// complement. Currently only 128-bit (16-byte) integers are used but this may
+/// be expanded in the future. The representation uses the endianness indicated
+/// in the Schema.
+struct Decimal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DecimalBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_PRECISION = 4,
+ VT_SCALE = 6,
+ VT_BITWIDTH = 8
+ };
+ /// Total number of decimal digits
+ int32_t precision() const {
+ return GetField<int32_t>(VT_PRECISION, 0);
+ }
+ /// Number of digits after the decimal point "."
+ int32_t scale() const {
+ return GetField<int32_t>(VT_SCALE, 0);
+ }
+ /// Number of bits per value. The only accepted width right now is 128 but
+ /// this field exists for forward compatibility so that other bit widths may
+ /// be supported in future format versions. We use bitWidth for consistency
+ /// with Int::bitWidth.
+ int32_t bitWidth() const {
+ return GetField<int32_t>(VT_BITWIDTH, 128);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int32_t>(verifier, VT_PRECISION) &&
+ VerifyField<int32_t>(verifier, VT_SCALE) &&
+ VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
+ verifier.EndTable();
+ }
+};
+
+struct DecimalBuilder {
+ typedef Decimal Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_precision(int32_t precision) {
+ fbb_.AddElement<int32_t>(Decimal::VT_PRECISION, precision, 0);
+ }
+ void add_scale(int32_t scale) {
+ fbb_.AddElement<int32_t>(Decimal::VT_SCALE, scale, 0);
+ }
+ void add_bitWidth(int32_t bitWidth) {
+ fbb_.AddElement<int32_t>(Decimal::VT_BITWIDTH, bitWidth, 128);
+ }
+ explicit DecimalBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DecimalBuilder &operator=(const DecimalBuilder &);
+ flatbuffers::Offset<Decimal> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Decimal>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Decimal> CreateDecimal(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t precision = 0,
+ int32_t scale = 0,
+ int32_t bitWidth = 128) {
+ DecimalBuilder builder_(_fbb);
+ builder_.add_bitWidth(bitWidth);
+ builder_.add_scale(scale);
+ builder_.add_precision(precision);
+ return builder_.Finish();
+}
+
+/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+/// epoch (1970-01-01), stored in either of two units:
+///
+/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+/// leap seconds), where the values are evenly divisible by 86400000
+/// * Days (32 bits) since the UNIX epoch
+struct Date FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DateBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4
+ };
+ org::apache::arrow::flatbuf::DateUnit unit() const {
+ return static_cast<org::apache::arrow::flatbuf::DateUnit>(GetField<int16_t>(VT_UNIT, 1));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_UNIT) &&
+ verifier.EndTable();
+ }
+};
+
+struct DateBuilder {
+ typedef Date Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(org::apache::arrow::flatbuf::DateUnit unit) {
+ fbb_.AddElement<int16_t>(Date::VT_UNIT, static_cast<int16_t>(unit), 1);
+ }
+ explicit DateBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DateBuilder &operator=(const DateBuilder &);
+ flatbuffers::Offset<Date> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Date>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Date> CreateDate(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::DateUnit unit = org::apache::arrow::flatbuf::DateUnit::MILLISECOND) {
+ DateBuilder builder_(_fbb);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+/// Time type. The physical storage type depends on the unit
+/// - SECOND and MILLISECOND: 32 bits
+/// - MICROSECOND and NANOSECOND: 64 bits
+struct Time FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TimeBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4,
+ VT_BITWIDTH = 6
+ };
+ org::apache::arrow::flatbuf::TimeUnit unit() const {
+ return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 1));
+ }
+ int32_t bitWidth() const {
+ return GetField<int32_t>(VT_BITWIDTH, 32);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_UNIT) &&
+ VerifyField<int32_t>(verifier, VT_BITWIDTH) &&
+ verifier.EndTable();
+ }
+};
+
+struct TimeBuilder {
+ typedef Time Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
+ fbb_.AddElement<int16_t>(Time::VT_UNIT, static_cast<int16_t>(unit), 1);
+ }
+ void add_bitWidth(int32_t bitWidth) {
+ fbb_.AddElement<int32_t>(Time::VT_BITWIDTH, bitWidth, 32);
+ }
+ explicit TimeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TimeBuilder &operator=(const TimeBuilder &);
+ flatbuffers::Offset<Time> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Time>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Time> CreateTime(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::MILLISECOND,
+ int32_t bitWidth = 32) {
+ TimeBuilder builder_(_fbb);
+ builder_.add_bitWidth(bitWidth);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
+/// leap seconds, as a 64-bit integer. Note that UNIX time does not include
+/// leap seconds.
+///
+/// The Timestamp metadata supports both "time zone naive" and "time zone
+/// aware" timestamps. Read about the timezone attribute for more detail
+struct Timestamp FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TimestampBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4,
+ VT_TIMEZONE = 6
+ };
+ org::apache::arrow::flatbuf::TimeUnit unit() const {
+ return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 0));
+ }
+ /// The time zone is a string indicating the name of a time zone, one of:
+ ///
+ /// * As used in the Olson time zone database (the "tz database" or
+ /// "tzdata"), such as "America/New_York"
+ /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+ ///
+ /// Whether a timezone string is present indicates different semantics about
+ /// the data:
+ ///
+ /// * If the time zone is null or equal to an empty string, the data is "time
+ /// zone naive" and shall be displayed *as is* to the user, not localized
+ /// to the locale of the user. This data can be though of as UTC but
+ /// without having "UTC" as the time zone, it is not considered to be
+ /// localized to any time zone
+ ///
+ /// * If the time zone is set to a valid value, values can be displayed as
+ /// "localized" to that time zone, even though the underlying 64-bit
+ /// integers are identical to the same data stored in UTC. Converting
+ /// between time zones is a metadata-only operation and does not change the
+ /// underlying values
+ const flatbuffers::String *timezone() const {
+ return GetPointer<const flatbuffers::String *>(VT_TIMEZONE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_UNIT) &&
+ VerifyOffset(verifier, VT_TIMEZONE) &&
+ verifier.VerifyString(timezone()) &&
+ verifier.EndTable();
+ }
+};
+
+struct TimestampBuilder {
+ typedef Timestamp Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
+ fbb_.AddElement<int16_t>(Timestamp::VT_UNIT, static_cast<int16_t>(unit), 0);
+ }
+ void add_timezone(flatbuffers::Offset<flatbuffers::String> timezone) {
+ fbb_.AddOffset(Timestamp::VT_TIMEZONE, timezone);
+ }
+ explicit TimestampBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TimestampBuilder &operator=(const TimestampBuilder &);
+ flatbuffers::Offset<Timestamp> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Timestamp>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Timestamp> CreateTimestamp(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::SECOND,
+ flatbuffers::Offset<flatbuffers::String> timezone = 0) {
+ TimestampBuilder builder_(_fbb);
+ builder_.add_timezone(timezone);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Timestamp> CreateTimestampDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::SECOND,
+ const char *timezone = nullptr) {
+ auto timezone__ = timezone ? _fbb.CreateString(timezone) : 0;
+ return org::apache::arrow::flatbuf::CreateTimestamp(
+ _fbb,
+ unit,
+ timezone__);
+}
+
+struct Interval FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef IntervalBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4
+ };
+ org::apache::arrow::flatbuf::IntervalUnit unit() const {
+ return static_cast<org::apache::arrow::flatbuf::IntervalUnit>(GetField<int16_t>(VT_UNIT, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_UNIT) &&
+ verifier.EndTable();
+ }
+};
+
+struct IntervalBuilder {
+ typedef Interval Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(org::apache::arrow::flatbuf::IntervalUnit unit) {
+ fbb_.AddElement<int16_t>(Interval::VT_UNIT, static_cast<int16_t>(unit), 0);
+ }
+ explicit IntervalBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ IntervalBuilder &operator=(const IntervalBuilder &);
+ flatbuffers::Offset<Interval> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Interval>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Interval> CreateInterval(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::IntervalUnit unit = org::apache::arrow::flatbuf::IntervalUnit::YEAR_MONTH) {
+ IntervalBuilder builder_(_fbb);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+struct Duration FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DurationBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4
+ };
+ org::apache::arrow::flatbuf::TimeUnit unit() const {
+ return static_cast<org::apache::arrow::flatbuf::TimeUnit>(GetField<int16_t>(VT_UNIT, 1));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_UNIT) &&
+ verifier.EndTable();
+ }
+};
+
+struct DurationBuilder {
+ typedef Duration Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) {
+ fbb_.AddElement<int16_t>(Duration::VT_UNIT, static_cast<int16_t>(unit), 1);
+ }
+ explicit DurationBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DurationBuilder &operator=(const DurationBuilder &);
+ flatbuffers::Offset<Duration> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Duration>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Duration> CreateDuration(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::TimeUnit unit = org::apache::arrow::flatbuf::TimeUnit::MILLISECOND) {
+ DurationBuilder builder_(_fbb);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+/// ----------------------------------------------------------------------
+/// user defined key value pairs to add custom metadata to arrow
+/// key namespacing is the responsibility of the user
+struct KeyValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef KeyValueBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_KEY = 4,
+ VT_VALUE = 6
+ };
+ const flatbuffers::String *key() const {
+ return GetPointer<const flatbuffers::String *>(VT_KEY);
+ }
+ const flatbuffers::String *value() const {
+ return GetPointer<const flatbuffers::String *>(VT_VALUE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffset(verifier, VT_KEY) &&
+ verifier.VerifyString(key()) &&
+ VerifyOffset(verifier, VT_VALUE) &&
+ verifier.VerifyString(value()) &&
+ verifier.EndTable();
+ }
+};
+
+struct KeyValueBuilder {
+ typedef KeyValue Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_key(flatbuffers::Offset<flatbuffers::String> key) {
+ fbb_.AddOffset(KeyValue::VT_KEY, key);
+ }
+ void add_value(flatbuffers::Offset<flatbuffers::String> value) {
+ fbb_.AddOffset(KeyValue::VT_VALUE, value);
+ }
+ explicit KeyValueBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ KeyValueBuilder &operator=(const KeyValueBuilder &);
+ flatbuffers::Offset<KeyValue> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<KeyValue>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<KeyValue> CreateKeyValue(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> key = 0,
+ flatbuffers::Offset<flatbuffers::String> value = 0) {
+ KeyValueBuilder builder_(_fbb);
+ builder_.add_value(value);
+ builder_.add_key(key);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<KeyValue> CreateKeyValueDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const char *key = nullptr,
+ const char *value = nullptr) {
+ auto key__ = key ? _fbb.CreateString(key) : 0;
+ auto value__ = value ? _fbb.CreateString(value) : 0;
+ return org::apache::arrow::flatbuf::CreateKeyValue(
+ _fbb,
+ key__,
+ value__);
+}
+
+struct DictionaryEncoding FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DictionaryEncodingBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_ID = 4,
+ VT_INDEXTYPE = 6,
+ VT_ISORDERED = 8,
+ VT_DICTIONARYKIND = 10
+ };
+ /// The known dictionary id in the application where this data is used. In
+ /// the file or streaming formats, the dictionary ids are found in the
+ /// DictionaryBatch messages
+ int64_t id() const {
+ return GetField<int64_t>(VT_ID, 0);
+ }
+ /// The dictionary indices are constrained to be non-negative integers. If
+ /// this field is null, the indices must be signed int32. To maximize
+ /// cross-language compatibility and performance, implementations are
+ /// recommended to prefer signed integer types over unsigned integer types
+ /// and to avoid uint64 indices unless they are required by an application.
+ const org::apache::arrow::flatbuf::Int *indexType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDEXTYPE);
+ }
+ /// By default, dictionaries are not ordered, or the order does not have
+ /// semantic meaning. In some statistical, applications, dictionary-encoding
+ /// is used to represent ordered categorical data, and we provide a way to
+ /// preserve that metadata here
+ bool isOrdered() const {
+ return GetField<uint8_t>(VT_ISORDERED, 0) != 0;
+ }
+ org::apache::arrow::flatbuf::DictionaryKind dictionaryKind() const {
+ return static_cast<org::apache::arrow::flatbuf::DictionaryKind>(GetField<int16_t>(VT_DICTIONARYKIND, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int64_t>(verifier, VT_ID) &&
+ VerifyOffset(verifier, VT_INDEXTYPE) &&
+ verifier.VerifyTable(indexType()) &&
+ VerifyField<uint8_t>(verifier, VT_ISORDERED) &&
+ VerifyField<int16_t>(verifier, VT_DICTIONARYKIND) &&
+ verifier.EndTable();
+ }
+};
+
+struct DictionaryEncodingBuilder {
+ typedef DictionaryEncoding Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_id(int64_t id) {
+ fbb_.AddElement<int64_t>(DictionaryEncoding::VT_ID, id, 0);
+ }
+ void add_indexType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indexType) {
+ fbb_.AddOffset(DictionaryEncoding::VT_INDEXTYPE, indexType);
+ }
+ void add_isOrdered(bool isOrdered) {
+ fbb_.AddElement<uint8_t>(DictionaryEncoding::VT_ISORDERED, static_cast<uint8_t>(isOrdered), 0);
+ }
+ void add_dictionaryKind(org::apache::arrow::flatbuf::DictionaryKind dictionaryKind) {
+ fbb_.AddElement<int16_t>(DictionaryEncoding::VT_DICTIONARYKIND, static_cast<int16_t>(dictionaryKind), 0);
+ }
+ explicit DictionaryEncodingBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DictionaryEncodingBuilder &operator=(const DictionaryEncodingBuilder &);
+ flatbuffers::Offset<DictionaryEncoding> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DictionaryEncoding>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DictionaryEncoding> CreateDictionaryEncoding(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t id = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indexType = 0,
+ bool isOrdered = false,
+ org::apache::arrow::flatbuf::DictionaryKind dictionaryKind = org::apache::arrow::flatbuf::DictionaryKind::DenseArray) {
+ DictionaryEncodingBuilder builder_(_fbb);
+ builder_.add_id(id);
+ builder_.add_indexType(indexType);
+ builder_.add_dictionaryKind(dictionaryKind);
+ builder_.add_isOrdered(isOrdered);
+ return builder_.Finish();
+}
+
+/// ----------------------------------------------------------------------
+/// A field represents a named column in a record / row batch or child of a
+/// nested type.
+struct Field FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef FieldBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_NAME = 4,
+ VT_NULLABLE = 6,
+ VT_TYPE_TYPE = 8,
+ VT_TYPE = 10,
+ VT_DICTIONARY = 12,
+ VT_CHILDREN = 14,
+ VT_CUSTOM_METADATA = 16
+ };
+ /// Name is not required, in i.e. a List
+ const flatbuffers::String *name() const {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ /// Whether or not this field can contain nulls. Should be true in general.
+ bool nullable() const {
+ return GetField<uint8_t>(VT_NULLABLE, 0) != 0;
+ }
+ org::apache::arrow::flatbuf::Type type_type() const {
+ return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
+ }
+ /// This is the type of the decoded value if the field is dictionary encoded.
+ const void *type() const {
+ return GetPointer<const void *>(VT_TYPE);
+ }
+ template<typename T> const T *type_as() const;
+ const org::apache::arrow::flatbuf::Null *type_as_Null() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Int *type_as_Int() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Date *type_as_Date() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Time *type_as_Time() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::List *type_as_List() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Union *type_as_Union() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Map *type_as_Map() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
+ }
+ /// Present only if the field is dictionary encoded.
+ const org::apache::arrow::flatbuf::DictionaryEncoding *dictionary() const {
+ return GetPointer<const org::apache::arrow::flatbuf::DictionaryEncoding *>(VT_DICTIONARY);
+ }
+ /// children apply only to nested data types like Struct, List and Union. For
+ /// primitive types children will have length 0.
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *children() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *>(VT_CHILDREN);
+ }
+ /// User-defined metadata
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) &&
+ VerifyField<uint8_t>(verifier, VT_NULLABLE) &&
+ VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
+ VerifyOffset(verifier, VT_TYPE) &&
+ VerifyType(verifier, type(), type_type()) &&
+ VerifyOffset(verifier, VT_DICTIONARY) &&
+ verifier.VerifyTable(dictionary()) &&
+ VerifyOffset(verifier, VT_CHILDREN) &&
+ verifier.VerifyVector(children()) &&
+ verifier.VerifyVectorOfTables(children()) &&
+ VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
+ verifier.VerifyVector(custom_metadata()) &&
+ verifier.VerifyVectorOfTables(custom_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+template<> inline const org::apache::arrow::flatbuf::Null *Field::type_as<org::apache::arrow::flatbuf::Null>() const {
+ return type_as_Null();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Int *Field::type_as<org::apache::arrow::flatbuf::Int>() const {
+ return type_as_Int();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FloatingPoint *Field::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
+ return type_as_FloatingPoint();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Binary *Field::type_as<org::apache::arrow::flatbuf::Binary>() const {
+ return type_as_Binary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Utf8 *Field::type_as<org::apache::arrow::flatbuf::Utf8>() const {
+ return type_as_Utf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Bool *Field::type_as<org::apache::arrow::flatbuf::Bool>() const {
+ return type_as_Bool();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Decimal *Field::type_as<org::apache::arrow::flatbuf::Decimal>() const {
+ return type_as_Decimal();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Date *Field::type_as<org::apache::arrow::flatbuf::Date>() const {
+ return type_as_Date();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Time *Field::type_as<org::apache::arrow::flatbuf::Time>() const {
+ return type_as_Time();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Timestamp *Field::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
+ return type_as_Timestamp();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Interval *Field::type_as<org::apache::arrow::flatbuf::Interval>() const {
+ return type_as_Interval();
+}
+
+template<> inline const org::apache::arrow::flatbuf::List *Field::type_as<org::apache::arrow::flatbuf::List>() const {
+ return type_as_List();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Struct_ *Field::type_as<org::apache::arrow::flatbuf::Struct_>() const {
+ return type_as_Struct_();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Union *Field::type_as<org::apache::arrow::flatbuf::Union>() const {
+ return type_as_Union();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *Field::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
+ return type_as_FixedSizeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeList *Field::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
+ return type_as_FixedSizeList();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Map *Field::type_as<org::apache::arrow::flatbuf::Map>() const {
+ return type_as_Map();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Duration *Field::type_as<org::apache::arrow::flatbuf::Duration>() const {
+ return type_as_Duration();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeBinary *Field::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
+ return type_as_LargeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *Field::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
+ return type_as_LargeUtf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeList *Field::type_as<org::apache::arrow::flatbuf::LargeList>() const {
+ return type_as_LargeList();
+}
+
+struct FieldBuilder {
+ typedef Field Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+ fbb_.AddOffset(Field::VT_NAME, name);
+ }
+ void add_nullable(bool nullable) {
+ fbb_.AddElement<uint8_t>(Field::VT_NULLABLE, static_cast<uint8_t>(nullable), 0);
+ }
+ void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
+ fbb_.AddElement<uint8_t>(Field::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
+ }
+ void add_type(flatbuffers::Offset<void> type) {
+ fbb_.AddOffset(Field::VT_TYPE, type);
+ }
+ void add_dictionary(flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary) {
+ fbb_.AddOffset(Field::VT_DICTIONARY, dictionary);
+ }
+ void add_children(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> children) {
+ fbb_.AddOffset(Field::VT_CHILDREN, children);
+ }
+ void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
+ fbb_.AddOffset(Field::VT_CUSTOM_METADATA, custom_metadata);
+ }
+ explicit FieldBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ FieldBuilder &operator=(const FieldBuilder &);
+ flatbuffers::Offset<Field> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Field>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Field> CreateField(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ bool nullable = false,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> children = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0) {
+ FieldBuilder builder_(_fbb);
+ builder_.add_custom_metadata(custom_metadata);
+ builder_.add_children(children);
+ builder_.add_dictionary(dictionary);
+ builder_.add_type(type);
+ builder_.add_name(name);
+ builder_.add_type_type(type_type);
+ builder_.add_nullable(nullable);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Field> CreateFieldDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ bool nullable = false,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionary = 0,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *children = nullptr,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr) {
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ auto children__ = children ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>(*children) : 0;
+ auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
+ return org::apache::arrow::flatbuf::CreateField(
+ _fbb,
+ name__,
+ nullable,
+ type_type,
+ type,
+ dictionary,
+ children__,
+ custom_metadata__);
+}
+
+/// ----------------------------------------------------------------------
+/// A Schema describes the columns in a row batch
+struct Schema FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef SchemaBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_ENDIANNESS = 4,
+ VT_FIELDS = 6,
+ VT_CUSTOM_METADATA = 8,
+ VT_FEATURES = 10
+ };
+ /// endianness of the buffer
+ /// it is Little Endian by default
+ /// if endianness doesn't match the underlying system then the vectors need to be converted
+ org::apache::arrow::flatbuf::Endianness endianness() const {
+ return static_cast<org::apache::arrow::flatbuf::Endianness>(GetField<int16_t>(VT_ENDIANNESS, 0));
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *fields() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *>(VT_FIELDS);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *>(VT_CUSTOM_METADATA);
+ }
+ /// Features used in the stream/file.
+ const flatbuffers::Vector<int64_t> *features() const {
+ return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_FEATURES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_ENDIANNESS) &&
+ VerifyOffset(verifier, VT_FIELDS) &&
+ verifier.VerifyVector(fields()) &&
+ verifier.VerifyVectorOfTables(fields()) &&
+ VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
+ verifier.VerifyVector(custom_metadata()) &&
+ verifier.VerifyVectorOfTables(custom_metadata()) &&
+ VerifyOffset(verifier, VT_FEATURES) &&
+ verifier.VerifyVector(features()) &&
+ verifier.EndTable();
+ }
+};
+
+struct SchemaBuilder {
+ typedef Schema Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_endianness(org::apache::arrow::flatbuf::Endianness endianness) {
+ fbb_.AddElement<int16_t>(Schema::VT_ENDIANNESS, static_cast<int16_t>(endianness), 0);
+ }
+ void add_fields(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> fields) {
+ fbb_.AddOffset(Schema::VT_FIELDS, fields);
+ }
+ void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata) {
+ fbb_.AddOffset(Schema::VT_CUSTOM_METADATA, custom_metadata);
+ }
+ void add_features(flatbuffers::Offset<flatbuffers::Vector<int64_t>> features) {
+ fbb_.AddOffset(Schema::VT_FEATURES, features);
+ }
+ explicit SchemaBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ SchemaBuilder &operator=(const SchemaBuilder &);
+ flatbuffers::Offset<Schema> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Schema>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Schema> CreateSchema(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Endianness endianness = org::apache::arrow::flatbuf::Endianness::Little,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>> fields = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>> custom_metadata = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> features = 0) {
+ SchemaBuilder builder_(_fbb);
+ builder_.add_features(features);
+ builder_.add_custom_metadata(custom_metadata);
+ builder_.add_fields(fields);
+ builder_.add_endianness(endianness);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Schema> CreateSchemaDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Endianness endianness = org::apache::arrow::flatbuf::Endianness::Little,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> *fields = nullptr,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> *custom_metadata = nullptr,
+ const std::vector<int64_t> *features = nullptr) {
+ auto fields__ = fields ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>(*fields) : 0;
+ auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>(*custom_metadata) : 0;
+ auto features__ = features ? _fbb.CreateVector<int64_t>(*features) : 0;
+ return org::apache::arrow::flatbuf::CreateSchema(
+ _fbb,
+ endianness,
+ fields__,
+ custom_metadata__,
+ features__);
+}
+
+inline bool VerifyType(flatbuffers::Verifier &verifier, const void *obj, Type type) {
+ switch (type) {
+ case Type::NONE: {
+ return true;
+ }
+ case Type::Null: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Null *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Int: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Int *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::FloatingPoint: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Binary: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Binary *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Utf8: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Utf8 *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Bool: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Bool *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Decimal: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Decimal *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Date: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Date *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Time: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Time *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Timestamp: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Timestamp *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Interval: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Interval *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::List: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::List *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Struct_: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Struct_ *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Union: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Union *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::FixedSizeBinary: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::FixedSizeList: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Map: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Map *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::Duration: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::Duration *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::LargeBinary: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeBinary *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::LargeUtf8: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case Type::LargeList: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::LargeList *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default: return true;
+ }
+}
+
+inline bool VerifyTypeVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+ if (!values || !types) return !values && !types;
+ if (values->size() != types->size()) return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+ if (!VerifyType(
+ verifier, values->Get(i), types->GetEnum<Type>(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const org::apache::arrow::flatbuf::Schema *GetSchema(const void *buf) {
+ return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Schema>(buf);
+}
+
+inline const org::apache::arrow::flatbuf::Schema *GetSizePrefixedSchema(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Schema>(buf);
+}
+
+inline bool VerifySchemaBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Schema>(nullptr);
+}
+
+inline bool VerifySizePrefixedSchemaBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Schema>(nullptr);
+}
+
+inline void FinishSchemaBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedSchemaBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace flatbuf
+} // namespace arrow
+} // namespace apache
+} // namespace org
+
+#endif // FLATBUFFERS_GENERATED_SCHEMA_ORG_APACHE_ARROW_FLATBUF_H_
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/SparseTensor_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/SparseTensor_generated.h
index ec4d414d4fe..45d4ebc7735 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/SparseTensor_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/SparseTensor_generated.h
@@ -1,913 +1,913 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
-#define FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-#include "Schema_generated.h"
-#include "Tensor_generated.h"
-
-namespace org {
-namespace apache {
-namespace arrow {
-namespace flatbuf {
-
-struct SparseTensorIndexCOO;
-struct SparseTensorIndexCOOBuilder;
-
-struct SparseMatrixIndexCSX;
-struct SparseMatrixIndexCSXBuilder;
-
-struct SparseTensorIndexCSF;
-struct SparseTensorIndexCSFBuilder;
-
-struct SparseTensor;
-struct SparseTensorBuilder;
-
-enum class SparseMatrixCompressedAxis : int16_t {
- Row = 0,
- Column = 1,
- MIN = Row,
- MAX = Column
-};
-
-inline const SparseMatrixCompressedAxis (&EnumValuesSparseMatrixCompressedAxis())[2] {
- static const SparseMatrixCompressedAxis values[] = {
- SparseMatrixCompressedAxis::Row,
- SparseMatrixCompressedAxis::Column
- };
- return values;
-}
-
-inline const char * const *EnumNamesSparseMatrixCompressedAxis() {
- static const char * const names[3] = {
- "Row",
- "Column",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameSparseMatrixCompressedAxis(SparseMatrixCompressedAxis e) {
- if (flatbuffers::IsOutRange(e, SparseMatrixCompressedAxis::Row, SparseMatrixCompressedAxis::Column)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesSparseMatrixCompressedAxis()[index];
-}
-
-enum class SparseTensorIndex : uint8_t {
- NONE = 0,
- SparseTensorIndexCOO = 1,
- SparseMatrixIndexCSX = 2,
- SparseTensorIndexCSF = 3,
- MIN = NONE,
- MAX = SparseTensorIndexCSF
-};
-
-inline const SparseTensorIndex (&EnumValuesSparseTensorIndex())[4] {
- static const SparseTensorIndex values[] = {
- SparseTensorIndex::NONE,
- SparseTensorIndex::SparseTensorIndexCOO,
- SparseTensorIndex::SparseMatrixIndexCSX,
- SparseTensorIndex::SparseTensorIndexCSF
- };
- return values;
-}
-
-inline const char * const *EnumNamesSparseTensorIndex() {
- static const char * const names[5] = {
- "NONE",
- "SparseTensorIndexCOO",
- "SparseMatrixIndexCSX",
- "SparseTensorIndexCSF",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameSparseTensorIndex(SparseTensorIndex e) {
- if (flatbuffers::IsOutRange(e, SparseTensorIndex::NONE, SparseTensorIndex::SparseTensorIndexCSF)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesSparseTensorIndex()[index];
-}
-
-template<typename T> struct SparseTensorIndexTraits {
- static const SparseTensorIndex enum_value = SparseTensorIndex::NONE;
-};
-
-template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseTensorIndexCOO> {
- static const SparseTensorIndex enum_value = SparseTensorIndex::SparseTensorIndexCOO;
-};
-
-template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseMatrixIndexCSX> {
- static const SparseTensorIndex enum_value = SparseTensorIndex::SparseMatrixIndexCSX;
-};
-
-template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseTensorIndexCSF> {
- static const SparseTensorIndex enum_value = SparseTensorIndex::SparseTensorIndexCSF;
-};
-
-bool VerifySparseTensorIndex(flatbuffers::Verifier &verifier, const void *obj, SparseTensorIndex type);
-bool VerifySparseTensorIndexVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-/// ----------------------------------------------------------------------
-/// EXPERIMENTAL: Data structures for sparse tensors
-/// Coordinate (COO) format of sparse tensor index.
-///
-/// COO's index list are represented as a NxM matrix,
-/// where N is the number of non-zero values,
-/// and M is the number of dimensions of a sparse tensor.
-///
-/// indicesBuffer stores the location and size of the data of this indices
-/// matrix. The value type and the stride of the indices matrix is
-/// specified in indicesType and indicesStrides fields.
-///
-/// For example, let X be a 2x3x4x5 tensor, and it has the following
-/// 6 non-zero values:
-///
-/// X[0, 1, 2, 0] := 1
-/// X[1, 1, 2, 3] := 2
-/// X[0, 2, 1, 0] := 3
-/// X[0, 1, 3, 0] := 4
-/// X[0, 1, 2, 1] := 5
-/// X[1, 2, 0, 4] := 6
-///
-/// In COO format, the index matrix of X is the following 4x6 matrix:
-///
-/// [[0, 0, 0, 0, 1, 1],
-/// [1, 1, 1, 2, 1, 2],
-/// [2, 2, 3, 1, 2, 0],
-/// [0, 1, 0, 0, 3, 4]]
-///
-/// Note that the indices are sorted in lexicographical order.
-struct SparseTensorIndexCOO FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef SparseTensorIndexCOOBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_INDICESTYPE = 4,
- VT_INDICESSTRIDES = 6,
- VT_INDICESBUFFER = 8,
- VT_ISCANONICAL = 10
- };
- /// The type of values in indicesBuffer
- const org::apache::arrow::flatbuf::Int *indicesType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
- }
- /// Non-negative byte offsets to advance one value cell along each dimension
- /// If omitted, default to row-major order (C-like).
- const flatbuffers::Vector<int64_t> *indicesStrides() const {
- return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_INDICESSTRIDES);
- }
- /// The location and size of the indices matrix's data
- const org::apache::arrow::flatbuf::Buffer *indicesBuffer() const {
- return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDICESBUFFER);
- }
- /// The canonicality flag
- bool isCanonical() const {
- return GetField<uint8_t>(VT_ISCANONICAL, 0) != 0;
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
- verifier.VerifyTable(indicesType()) &&
- VerifyOffset(verifier, VT_INDICESSTRIDES) &&
- verifier.VerifyVector(indicesStrides()) &&
- VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDICESBUFFER) &&
- VerifyField<uint8_t>(verifier, VT_ISCANONICAL) &&
- verifier.EndTable();
- }
-};
-
-struct SparseTensorIndexCOOBuilder {
- typedef SparseTensorIndexCOO Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
- fbb_.AddOffset(SparseTensorIndexCOO::VT_INDICESTYPE, indicesType);
- }
- void add_indicesStrides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> indicesStrides) {
- fbb_.AddOffset(SparseTensorIndexCOO::VT_INDICESSTRIDES, indicesStrides);
- }
- void add_indicesBuffer(const org::apache::arrow::flatbuf::Buffer *indicesBuffer) {
- fbb_.AddStruct(SparseTensorIndexCOO::VT_INDICESBUFFER, indicesBuffer);
- }
- void add_isCanonical(bool isCanonical) {
- fbb_.AddElement<uint8_t>(SparseTensorIndexCOO::VT_ISCANONICAL, static_cast<uint8_t>(isCanonical), 0);
- }
- explicit SparseTensorIndexCOOBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- SparseTensorIndexCOOBuilder &operator=(const SparseTensorIndexCOOBuilder &);
- flatbuffers::Offset<SparseTensorIndexCOO> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseTensorIndexCOO>(end);
- fbb_.Required(o, SparseTensorIndexCOO::VT_INDICESTYPE);
- fbb_.Required(o, SparseTensorIndexCOO::VT_INDICESBUFFER);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseTensorIndexCOO> CreateSparseTensorIndexCOO(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> indicesStrides = 0,
- const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0,
- bool isCanonical = false) {
- SparseTensorIndexCOOBuilder builder_(_fbb);
- builder_.add_indicesBuffer(indicesBuffer);
- builder_.add_indicesStrides(indicesStrides);
- builder_.add_indicesType(indicesType);
- builder_.add_isCanonical(isCanonical);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SparseTensorIndexCOO> CreateSparseTensorIndexCOODirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
- const std::vector<int64_t> *indicesStrides = nullptr,
- const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0,
- bool isCanonical = false) {
- auto indicesStrides__ = indicesStrides ? _fbb.CreateVector<int64_t>(*indicesStrides) : 0;
- return org::apache::arrow::flatbuf::CreateSparseTensorIndexCOO(
- _fbb,
- indicesType,
- indicesStrides__,
- indicesBuffer,
- isCanonical);
-}
-
-/// Compressed Sparse format, that is matrix-specific.
-struct SparseMatrixIndexCSX FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef SparseMatrixIndexCSXBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_COMPRESSEDAXIS = 4,
- VT_INDPTRTYPE = 6,
- VT_INDPTRBUFFER = 8,
- VT_INDICESTYPE = 10,
- VT_INDICESBUFFER = 12
- };
- /// Which axis, row or column, is compressed
- org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis() const {
- return static_cast<org::apache::arrow::flatbuf::SparseMatrixCompressedAxis>(GetField<int16_t>(VT_COMPRESSEDAXIS, 0));
- }
- /// The type of values in indptrBuffer
- const org::apache::arrow::flatbuf::Int *indptrType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDPTRTYPE);
- }
- /// indptrBuffer stores the location and size of indptr array that
- /// represents the range of the rows.
- /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
- /// The length of this array is 1 + (the number of rows), and the type
- /// of index value is long.
- ///
- /// For example, let X be the following 6x4 matrix:
- ///
- /// X := [[0, 1, 2, 0],
- /// [0, 0, 3, 0],
- /// [0, 4, 0, 5],
- /// [0, 0, 0, 0],
- /// [6, 0, 7, 8],
- /// [0, 9, 0, 0]].
- ///
- /// The array of non-zero values in X is:
- ///
- /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
- ///
- /// And the indptr of X is:
- ///
- /// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
- const org::apache::arrow::flatbuf::Buffer *indptrBuffer() const {
- return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDPTRBUFFER);
- }
- /// The type of values in indicesBuffer
- const org::apache::arrow::flatbuf::Int *indicesType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
- }
- /// indicesBuffer stores the location and size of the array that
- /// contains the column indices of the corresponding non-zero values.
- /// The type of index value is long.
- ///
- /// For example, the indices of the above X is:
- ///
- /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
- ///
- /// Note that the indices are sorted in lexicographical order for each row.
- const org::apache::arrow::flatbuf::Buffer *indicesBuffer() const {
- return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDICESBUFFER);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int16_t>(verifier, VT_COMPRESSEDAXIS) &&
- VerifyOffsetRequired(verifier, VT_INDPTRTYPE) &&
- verifier.VerifyTable(indptrType()) &&
- VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDPTRBUFFER) &&
- VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
- verifier.VerifyTable(indicesType()) &&
- VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDICESBUFFER) &&
- verifier.EndTable();
- }
-};
-
-struct SparseMatrixIndexCSXBuilder {
- typedef SparseMatrixIndexCSX Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_compressedAxis(org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis) {
- fbb_.AddElement<int16_t>(SparseMatrixIndexCSX::VT_COMPRESSEDAXIS, static_cast<int16_t>(compressedAxis), 0);
- }
- void add_indptrType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType) {
- fbb_.AddOffset(SparseMatrixIndexCSX::VT_INDPTRTYPE, indptrType);
- }
- void add_indptrBuffer(const org::apache::arrow::flatbuf::Buffer *indptrBuffer) {
- fbb_.AddStruct(SparseMatrixIndexCSX::VT_INDPTRBUFFER, indptrBuffer);
- }
- void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
- fbb_.AddOffset(SparseMatrixIndexCSX::VT_INDICESTYPE, indicesType);
- }
- void add_indicesBuffer(const org::apache::arrow::flatbuf::Buffer *indicesBuffer) {
- fbb_.AddStruct(SparseMatrixIndexCSX::VT_INDICESBUFFER, indicesBuffer);
- }
- explicit SparseMatrixIndexCSXBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- SparseMatrixIndexCSXBuilder &operator=(const SparseMatrixIndexCSXBuilder &);
- flatbuffers::Offset<SparseMatrixIndexCSX> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseMatrixIndexCSX>(end);
- fbb_.Required(o, SparseMatrixIndexCSX::VT_INDPTRTYPE);
- fbb_.Required(o, SparseMatrixIndexCSX::VT_INDPTRBUFFER);
- fbb_.Required(o, SparseMatrixIndexCSX::VT_INDICESTYPE);
- fbb_.Required(o, SparseMatrixIndexCSX::VT_INDICESBUFFER);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseMatrixIndexCSX> CreateSparseMatrixIndexCSX(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis = org::apache::arrow::flatbuf::SparseMatrixCompressedAxis::Row,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
- const org::apache::arrow::flatbuf::Buffer *indptrBuffer = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
- const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0) {
- SparseMatrixIndexCSXBuilder builder_(_fbb);
- builder_.add_indicesBuffer(indicesBuffer);
- builder_.add_indicesType(indicesType);
- builder_.add_indptrBuffer(indptrBuffer);
- builder_.add_indptrType(indptrType);
- builder_.add_compressedAxis(compressedAxis);
- return builder_.Finish();
-}
-
-/// Compressed Sparse Fiber (CSF) sparse tensor index.
-struct SparseTensorIndexCSF FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef SparseTensorIndexCSFBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_INDPTRTYPE = 4,
- VT_INDPTRBUFFERS = 6,
- VT_INDICESTYPE = 8,
- VT_INDICESBUFFERS = 10,
- VT_AXISORDER = 12
- };
- /// CSF is a generalization of compressed sparse row (CSR) index.
- /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
- ///
- /// CSF index recursively compresses each dimension of a tensor into a set
- /// of prefix trees. Each path from a root to leaf forms one tensor
- /// non-zero index. CSF is implemented with two arrays of buffers and one
- /// arrays of integers.
- ///
- /// For example, let X be a 2x3x4x5 tensor and let it have the following
- /// 8 non-zero values:
- ///
- /// X[0, 0, 0, 1] := 1
- /// X[0, 0, 0, 2] := 2
- /// X[0, 1, 0, 0] := 3
- /// X[0, 1, 0, 2] := 4
- /// X[0, 1, 1, 0] := 5
- /// X[1, 1, 1, 0] := 6
- /// X[1, 1, 1, 1] := 7
- /// X[1, 1, 1, 2] := 8
- ///
- /// As a prefix tree this would be represented as:
- ///
- /// 0 1
- /// / \ |
- /// 0 1 1
- /// / / \ |
- /// 0 0 1 1
- /// /| /| | /| |
- /// 1 2 0 2 0 0 1 2
- /// The type of values in indptrBuffers
- const org::apache::arrow::flatbuf::Int *indptrType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDPTRTYPE);
- }
- /// indptrBuffers stores the sparsity structure.
- /// Each two consecutive dimensions in a tensor correspond to a buffer in
- /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
- /// and indptrBuffers[dim][i + 1] signify a range of nodes in
- /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
- ///
- /// For example, the indptrBuffers for the above X is:
- ///
- /// indptrBuffer(X) = [
- /// [0, 2, 3],
- /// [0, 1, 3, 4],
- /// [0, 2, 4, 5, 8]
- /// ].
- ///
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *indptrBuffers() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_INDPTRBUFFERS);
- }
- /// The type of values in indicesBuffers
- const org::apache::arrow::flatbuf::Int *indicesType() const {
- return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
- }
- /// indicesBuffers stores values of nodes.
- /// Each tensor dimension corresponds to a buffer in indicesBuffers.
- /// For example, the indicesBuffers for the above X is:
- ///
- /// indicesBuffer(X) = [
- /// [0, 1],
- /// [0, 1, 1],
- /// [0, 0, 1, 1],
- /// [1, 2, 0, 2, 0, 0, 1, 2]
- /// ].
- ///
- const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *indicesBuffers() const {
- return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_INDICESBUFFERS);
- }
- /// axisOrder stores the sequence in which dimensions were traversed to
- /// produce the prefix tree.
- /// For example, the axisOrder for the above X is:
- ///
- /// axisOrder(X) = [0, 1, 2, 3].
- ///
- const flatbuffers::Vector<int32_t> *axisOrder() const {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AXISORDER);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffsetRequired(verifier, VT_INDPTRTYPE) &&
- verifier.VerifyTable(indptrType()) &&
- VerifyOffsetRequired(verifier, VT_INDPTRBUFFERS) &&
- verifier.VerifyVector(indptrBuffers()) &&
- VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
- verifier.VerifyTable(indicesType()) &&
- VerifyOffsetRequired(verifier, VT_INDICESBUFFERS) &&
- verifier.VerifyVector(indicesBuffers()) &&
- VerifyOffsetRequired(verifier, VT_AXISORDER) &&
- verifier.VerifyVector(axisOrder()) &&
- verifier.EndTable();
- }
-};
-
-struct SparseTensorIndexCSFBuilder {
- typedef SparseTensorIndexCSF Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_indptrType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType) {
- fbb_.AddOffset(SparseTensorIndexCSF::VT_INDPTRTYPE, indptrType);
- }
- void add_indptrBuffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indptrBuffers) {
- fbb_.AddOffset(SparseTensorIndexCSF::VT_INDPTRBUFFERS, indptrBuffers);
- }
- void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
- fbb_.AddOffset(SparseTensorIndexCSF::VT_INDICESTYPE, indicesType);
- }
- void add_indicesBuffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indicesBuffers) {
- fbb_.AddOffset(SparseTensorIndexCSF::VT_INDICESBUFFERS, indicesBuffers);
- }
- void add_axisOrder(flatbuffers::Offset<flatbuffers::Vector<int32_t>> axisOrder) {
- fbb_.AddOffset(SparseTensorIndexCSF::VT_AXISORDER, axisOrder);
- }
- explicit SparseTensorIndexCSFBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- SparseTensorIndexCSFBuilder &operator=(const SparseTensorIndexCSFBuilder &);
- flatbuffers::Offset<SparseTensorIndexCSF> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseTensorIndexCSF>(end);
- fbb_.Required(o, SparseTensorIndexCSF::VT_INDPTRTYPE);
- fbb_.Required(o, SparseTensorIndexCSF::VT_INDPTRBUFFERS);
- fbb_.Required(o, SparseTensorIndexCSF::VT_INDICESTYPE);
- fbb_.Required(o, SparseTensorIndexCSF::VT_INDICESBUFFERS);
- fbb_.Required(o, SparseTensorIndexCSF::VT_AXISORDER);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseTensorIndexCSF> CreateSparseTensorIndexCSF(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indptrBuffers = 0,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
- flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indicesBuffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> axisOrder = 0) {
- SparseTensorIndexCSFBuilder builder_(_fbb);
- builder_.add_axisOrder(axisOrder);
- builder_.add_indicesBuffers(indicesBuffers);
- builder_.add_indicesType(indicesType);
- builder_.add_indptrBuffers(indptrBuffers);
- builder_.add_indptrType(indptrType);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SparseTensorIndexCSF> CreateSparseTensorIndexCSFDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
- const std::vector<org::apache::arrow::flatbuf::Buffer> *indptrBuffers = nullptr,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
- const std::vector<org::apache::arrow::flatbuf::Buffer> *indicesBuffers = nullptr,
- const std::vector<int32_t> *axisOrder = nullptr) {
- auto indptrBuffers__ = indptrBuffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*indptrBuffers) : 0;
- auto indicesBuffers__ = indicesBuffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*indicesBuffers) : 0;
- auto axisOrder__ = axisOrder ? _fbb.CreateVector<int32_t>(*axisOrder) : 0;
- return org::apache::arrow::flatbuf::CreateSparseTensorIndexCSF(
- _fbb,
- indptrType,
- indptrBuffers__,
- indicesType,
- indicesBuffers__,
- axisOrder__);
-}
-
-struct SparseTensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef SparseTensorBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_TYPE_TYPE = 4,
- VT_TYPE = 6,
- VT_SHAPE = 8,
- VT_NON_ZERO_LENGTH = 10,
- VT_SPARSEINDEX_TYPE = 12,
- VT_SPARSEINDEX = 14,
- VT_DATA = 16
- };
- org::apache::arrow::flatbuf::Type type_type() const {
- return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
- }
- /// The type of data contained in a value cell.
- /// Currently only fixed-width value types are supported,
- /// no strings or nested types.
- const void *type() const {
- return GetPointer<const void *>(VT_TYPE);
- }
- template<typename T> const T *type_as() const;
- const org::apache::arrow::flatbuf::Null *type_as_Null() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Int *type_as_Int() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Date *type_as_Date() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Time *type_as_Time() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::List *type_as_List() const {
- return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Union *type_as_Union() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Map *type_as_Map() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
- }
- /// The dimensions of the tensor, optionally named.
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *>(VT_SHAPE);
- }
- /// The number of non-zero values in a sparse tensor.
- int64_t non_zero_length() const {
- return GetField<int64_t>(VT_NON_ZERO_LENGTH, 0);
- }
- org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type() const {
- return static_cast<org::apache::arrow::flatbuf::SparseTensorIndex>(GetField<uint8_t>(VT_SPARSEINDEX_TYPE, 0));
- }
- /// Sparse tensor index
- const void *sparseIndex() const {
- return GetPointer<const void *>(VT_SPARSEINDEX);
- }
- template<typename T> const T *sparseIndex_as() const;
- const org::apache::arrow::flatbuf::SparseTensorIndexCOO *sparseIndex_as_SparseTensorIndexCOO() const {
- return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseTensorIndexCOO ? static_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCOO *>(sparseIndex()) : nullptr;
- }
- const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *sparseIndex_as_SparseMatrixIndexCSX() const {
- return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseMatrixIndexCSX ? static_cast<const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *>(sparseIndex()) : nullptr;
- }
- const org::apache::arrow::flatbuf::SparseTensorIndexCSF *sparseIndex_as_SparseTensorIndexCSF() const {
- return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseTensorIndexCSF ? static_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCSF *>(sparseIndex()) : nullptr;
- }
- /// The location and size of the tensor's data
- const org::apache::arrow::flatbuf::Buffer *data() const {
- return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_DATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
- VerifyOffsetRequired(verifier, VT_TYPE) &&
- VerifyType(verifier, type(), type_type()) &&
- VerifyOffsetRequired(verifier, VT_SHAPE) &&
- verifier.VerifyVector(shape()) &&
- verifier.VerifyVectorOfTables(shape()) &&
- VerifyField<int64_t>(verifier, VT_NON_ZERO_LENGTH) &&
- VerifyField<uint8_t>(verifier, VT_SPARSEINDEX_TYPE) &&
- VerifyOffsetRequired(verifier, VT_SPARSEINDEX) &&
- VerifySparseTensorIndex(verifier, sparseIndex(), sparseIndex_type()) &&
- VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_DATA) &&
- verifier.EndTable();
- }
-};
-
-template<> inline const org::apache::arrow::flatbuf::Null *SparseTensor::type_as<org::apache::arrow::flatbuf::Null>() const {
- return type_as_Null();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Int *SparseTensor::type_as<org::apache::arrow::flatbuf::Int>() const {
- return type_as_Int();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FloatingPoint *SparseTensor::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
- return type_as_FloatingPoint();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Binary *SparseTensor::type_as<org::apache::arrow::flatbuf::Binary>() const {
- return type_as_Binary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Utf8 *SparseTensor::type_as<org::apache::arrow::flatbuf::Utf8>() const {
- return type_as_Utf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Bool *SparseTensor::type_as<org::apache::arrow::flatbuf::Bool>() const {
- return type_as_Bool();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Decimal *SparseTensor::type_as<org::apache::arrow::flatbuf::Decimal>() const {
- return type_as_Decimal();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Date *SparseTensor::type_as<org::apache::arrow::flatbuf::Date>() const {
- return type_as_Date();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Time *SparseTensor::type_as<org::apache::arrow::flatbuf::Time>() const {
- return type_as_Time();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Timestamp *SparseTensor::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
- return type_as_Timestamp();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Interval *SparseTensor::type_as<org::apache::arrow::flatbuf::Interval>() const {
- return type_as_Interval();
-}
-
-template<> inline const org::apache::arrow::flatbuf::List *SparseTensor::type_as<org::apache::arrow::flatbuf::List>() const {
- return type_as_List();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Struct_ *SparseTensor::type_as<org::apache::arrow::flatbuf::Struct_>() const {
- return type_as_Struct_();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Union *SparseTensor::type_as<org::apache::arrow::flatbuf::Union>() const {
- return type_as_Union();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *SparseTensor::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
- return type_as_FixedSizeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeList *SparseTensor::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
- return type_as_FixedSizeList();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Map *SparseTensor::type_as<org::apache::arrow::flatbuf::Map>() const {
- return type_as_Map();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Duration *SparseTensor::type_as<org::apache::arrow::flatbuf::Duration>() const {
- return type_as_Duration();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeBinary *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
- return type_as_LargeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
- return type_as_LargeUtf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeList *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeList>() const {
- return type_as_LargeList();
-}
-
-template<> inline const org::apache::arrow::flatbuf::SparseTensorIndexCOO *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseTensorIndexCOO>() const {
- return sparseIndex_as_SparseTensorIndexCOO();
-}
-
-template<> inline const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseMatrixIndexCSX>() const {
- return sparseIndex_as_SparseMatrixIndexCSX();
-}
-
-template<> inline const org::apache::arrow::flatbuf::SparseTensorIndexCSF *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseTensorIndexCSF>() const {
- return sparseIndex_as_SparseTensorIndexCSF();
-}
-
-struct SparseTensorBuilder {
- typedef SparseTensor Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
- fbb_.AddElement<uint8_t>(SparseTensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
- }
- void add_type(flatbuffers::Offset<void> type) {
- fbb_.AddOffset(SparseTensor::VT_TYPE, type);
- }
- void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape) {
- fbb_.AddOffset(SparseTensor::VT_SHAPE, shape);
- }
- void add_non_zero_length(int64_t non_zero_length) {
- fbb_.AddElement<int64_t>(SparseTensor::VT_NON_ZERO_LENGTH, non_zero_length, 0);
- }
- void add_sparseIndex_type(org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type) {
- fbb_.AddElement<uint8_t>(SparseTensor::VT_SPARSEINDEX_TYPE, static_cast<uint8_t>(sparseIndex_type), 0);
- }
- void add_sparseIndex(flatbuffers::Offset<void> sparseIndex) {
- fbb_.AddOffset(SparseTensor::VT_SPARSEINDEX, sparseIndex);
- }
- void add_data(const org::apache::arrow::flatbuf::Buffer *data) {
- fbb_.AddStruct(SparseTensor::VT_DATA, data);
- }
- explicit SparseTensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- SparseTensorBuilder &operator=(const SparseTensorBuilder &);
- flatbuffers::Offset<SparseTensor> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseTensor>(end);
- fbb_.Required(o, SparseTensor::VT_TYPE);
- fbb_.Required(o, SparseTensor::VT_SHAPE);
- fbb_.Required(o, SparseTensor::VT_SPARSEINDEX);
- fbb_.Required(o, SparseTensor::VT_DATA);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseTensor> CreateSparseTensor(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape = 0,
- int64_t non_zero_length = 0,
- org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type = org::apache::arrow::flatbuf::SparseTensorIndex::NONE,
- flatbuffers::Offset<void> sparseIndex = 0,
- const org::apache::arrow::flatbuf::Buffer *data = 0) {
- SparseTensorBuilder builder_(_fbb);
- builder_.add_non_zero_length(non_zero_length);
- builder_.add_data(data);
- builder_.add_sparseIndex(sparseIndex);
- builder_.add_shape(shape);
- builder_.add_type(type);
- builder_.add_sparseIndex_type(sparseIndex_type);
- builder_.add_type_type(type_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SparseTensor> CreateSparseTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape = nullptr,
- int64_t non_zero_length = 0,
- org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type = org::apache::arrow::flatbuf::SparseTensorIndex::NONE,
- flatbuffers::Offset<void> sparseIndex = 0,
- const org::apache::arrow::flatbuf::Buffer *data = 0) {
- auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>(*shape) : 0;
- return org::apache::arrow::flatbuf::CreateSparseTensor(
- _fbb,
- type_type,
- type,
- shape__,
- non_zero_length,
- sparseIndex_type,
- sparseIndex,
- data);
-}
-
-inline bool VerifySparseTensorIndex(flatbuffers::Verifier &verifier, const void *obj, SparseTensorIndex type) {
- switch (type) {
- case SparseTensorIndex::NONE: {
- return true;
- }
- case SparseTensorIndex::SparseTensorIndexCOO: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCOO *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case SparseTensorIndex::SparseMatrixIndexCSX: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case SparseTensorIndex::SparseTensorIndexCSF: {
- auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCSF *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default: return true;
- }
-}
-
-inline bool VerifySparseTensorIndexVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
- if (!values || !types) return !values && !types;
- if (values->size() != types->size()) return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
- if (!VerifySparseTensorIndex(
- verifier, values->Get(i), types->GetEnum<SparseTensorIndex>(i))) {
- return false;
- }
- }
- return true;
-}
-
-inline const org::apache::arrow::flatbuf::SparseTensor *GetSparseTensor(const void *buf) {
- return flatbuffers::GetRoot<org::apache::arrow::flatbuf::SparseTensor>(buf);
-}
-
-inline const org::apache::arrow::flatbuf::SparseTensor *GetSizePrefixedSparseTensor(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::SparseTensor>(buf);
-}
-
-inline bool VerifySparseTensorBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<org::apache::arrow::flatbuf::SparseTensor>(nullptr);
-}
-
-inline bool VerifySizePrefixedSparseTensorBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::SparseTensor>(nullptr);
-}
-
-inline void FinishSparseTensorBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::SparseTensor> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedSparseTensorBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::SparseTensor> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace flatbuf
-} // namespace arrow
-} // namespace apache
-} // namespace org
-
-#endif // FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+#define FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+#include "Schema_generated.h"
+#include "Tensor_generated.h"
+
+namespace org {
+namespace apache {
+namespace arrow {
+namespace flatbuf {
+
+struct SparseTensorIndexCOO;
+struct SparseTensorIndexCOOBuilder;
+
+struct SparseMatrixIndexCSX;
+struct SparseMatrixIndexCSXBuilder;
+
+struct SparseTensorIndexCSF;
+struct SparseTensorIndexCSFBuilder;
+
+struct SparseTensor;
+struct SparseTensorBuilder;
+
+enum class SparseMatrixCompressedAxis : int16_t {
+ Row = 0,
+ Column = 1,
+ MIN = Row,
+ MAX = Column
+};
+
+inline const SparseMatrixCompressedAxis (&EnumValuesSparseMatrixCompressedAxis())[2] {
+ static const SparseMatrixCompressedAxis values[] = {
+ SparseMatrixCompressedAxis::Row,
+ SparseMatrixCompressedAxis::Column
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesSparseMatrixCompressedAxis() {
+ static const char * const names[3] = {
+ "Row",
+ "Column",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameSparseMatrixCompressedAxis(SparseMatrixCompressedAxis e) {
+ if (flatbuffers::IsOutRange(e, SparseMatrixCompressedAxis::Row, SparseMatrixCompressedAxis::Column)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesSparseMatrixCompressedAxis()[index];
+}
+
+enum class SparseTensorIndex : uint8_t {
+ NONE = 0,
+ SparseTensorIndexCOO = 1,
+ SparseMatrixIndexCSX = 2,
+ SparseTensorIndexCSF = 3,
+ MIN = NONE,
+ MAX = SparseTensorIndexCSF
+};
+
+inline const SparseTensorIndex (&EnumValuesSparseTensorIndex())[4] {
+ static const SparseTensorIndex values[] = {
+ SparseTensorIndex::NONE,
+ SparseTensorIndex::SparseTensorIndexCOO,
+ SparseTensorIndex::SparseMatrixIndexCSX,
+ SparseTensorIndex::SparseTensorIndexCSF
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesSparseTensorIndex() {
+ static const char * const names[5] = {
+ "NONE",
+ "SparseTensorIndexCOO",
+ "SparseMatrixIndexCSX",
+ "SparseTensorIndexCSF",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameSparseTensorIndex(SparseTensorIndex e) {
+ if (flatbuffers::IsOutRange(e, SparseTensorIndex::NONE, SparseTensorIndex::SparseTensorIndexCSF)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesSparseTensorIndex()[index];
+}
+
+template<typename T> struct SparseTensorIndexTraits {
+ static const SparseTensorIndex enum_value = SparseTensorIndex::NONE;
+};
+
+template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseTensorIndexCOO> {
+ static const SparseTensorIndex enum_value = SparseTensorIndex::SparseTensorIndexCOO;
+};
+
+template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseMatrixIndexCSX> {
+ static const SparseTensorIndex enum_value = SparseTensorIndex::SparseMatrixIndexCSX;
+};
+
+template<> struct SparseTensorIndexTraits<org::apache::arrow::flatbuf::SparseTensorIndexCSF> {
+ static const SparseTensorIndex enum_value = SparseTensorIndex::SparseTensorIndexCSF;
+};
+
+bool VerifySparseTensorIndex(flatbuffers::Verifier &verifier, const void *obj, SparseTensorIndex type);
+bool VerifySparseTensorIndexVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+/// ----------------------------------------------------------------------
+/// EXPERIMENTAL: Data structures for sparse tensors
+/// Coordinate (COO) format of sparse tensor index.
+///
+/// COO's index list are represented as a NxM matrix,
+/// where N is the number of non-zero values,
+/// and M is the number of dimensions of a sparse tensor.
+///
+/// indicesBuffer stores the location and size of the data of this indices
+/// matrix. The value type and the stride of the indices matrix is
+/// specified in indicesType and indicesStrides fields.
+///
+/// For example, let X be a 2x3x4x5 tensor, and it has the following
+/// 6 non-zero values:
+///
+/// X[0, 1, 2, 0] := 1
+/// X[1, 1, 2, 3] := 2
+/// X[0, 2, 1, 0] := 3
+/// X[0, 1, 3, 0] := 4
+/// X[0, 1, 2, 1] := 5
+/// X[1, 2, 0, 4] := 6
+///
+/// In COO format, the index matrix of X is the following 4x6 matrix:
+///
+/// [[0, 0, 0, 0, 1, 1],
+/// [1, 1, 1, 2, 1, 2],
+/// [2, 2, 3, 1, 2, 0],
+/// [0, 1, 0, 0, 3, 4]]
+///
+/// Note that the indices are sorted in lexicographical order.
+struct SparseTensorIndexCOO FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef SparseTensorIndexCOOBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_INDICESTYPE = 4,
+ VT_INDICESSTRIDES = 6,
+ VT_INDICESBUFFER = 8,
+ VT_ISCANONICAL = 10
+ };
+ /// The type of values in indicesBuffer
+ const org::apache::arrow::flatbuf::Int *indicesType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
+ }
+ /// Non-negative byte offsets to advance one value cell along each dimension
+ /// If omitted, default to row-major order (C-like).
+ const flatbuffers::Vector<int64_t> *indicesStrides() const {
+ return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_INDICESSTRIDES);
+ }
+ /// The location and size of the indices matrix's data
+ const org::apache::arrow::flatbuf::Buffer *indicesBuffer() const {
+ return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDICESBUFFER);
+ }
+ /// The canonicality flag
+ bool isCanonical() const {
+ return GetField<uint8_t>(VT_ISCANONICAL, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
+ verifier.VerifyTable(indicesType()) &&
+ VerifyOffset(verifier, VT_INDICESSTRIDES) &&
+ verifier.VerifyVector(indicesStrides()) &&
+ VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDICESBUFFER) &&
+ VerifyField<uint8_t>(verifier, VT_ISCANONICAL) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparseTensorIndexCOOBuilder {
+ typedef SparseTensorIndexCOO Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
+ fbb_.AddOffset(SparseTensorIndexCOO::VT_INDICESTYPE, indicesType);
+ }
+ void add_indicesStrides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> indicesStrides) {
+ fbb_.AddOffset(SparseTensorIndexCOO::VT_INDICESSTRIDES, indicesStrides);
+ }
+ void add_indicesBuffer(const org::apache::arrow::flatbuf::Buffer *indicesBuffer) {
+ fbb_.AddStruct(SparseTensorIndexCOO::VT_INDICESBUFFER, indicesBuffer);
+ }
+ void add_isCanonical(bool isCanonical) {
+ fbb_.AddElement<uint8_t>(SparseTensorIndexCOO::VT_ISCANONICAL, static_cast<uint8_t>(isCanonical), 0);
+ }
+ explicit SparseTensorIndexCOOBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ SparseTensorIndexCOOBuilder &operator=(const SparseTensorIndexCOOBuilder &);
+ flatbuffers::Offset<SparseTensorIndexCOO> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseTensorIndexCOO>(end);
+ fbb_.Required(o, SparseTensorIndexCOO::VT_INDICESTYPE);
+ fbb_.Required(o, SparseTensorIndexCOO::VT_INDICESBUFFER);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseTensorIndexCOO> CreateSparseTensorIndexCOO(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> indicesStrides = 0,
+ const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0,
+ bool isCanonical = false) {
+ SparseTensorIndexCOOBuilder builder_(_fbb);
+ builder_.add_indicesBuffer(indicesBuffer);
+ builder_.add_indicesStrides(indicesStrides);
+ builder_.add_indicesType(indicesType);
+ builder_.add_isCanonical(isCanonical);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparseTensorIndexCOO> CreateSparseTensorIndexCOODirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
+ const std::vector<int64_t> *indicesStrides = nullptr,
+ const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0,
+ bool isCanonical = false) {
+ auto indicesStrides__ = indicesStrides ? _fbb.CreateVector<int64_t>(*indicesStrides) : 0;
+ return org::apache::arrow::flatbuf::CreateSparseTensorIndexCOO(
+ _fbb,
+ indicesType,
+ indicesStrides__,
+ indicesBuffer,
+ isCanonical);
+}
+
+/// Compressed Sparse format, that is matrix-specific.
+struct SparseMatrixIndexCSX FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef SparseMatrixIndexCSXBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_COMPRESSEDAXIS = 4,
+ VT_INDPTRTYPE = 6,
+ VT_INDPTRBUFFER = 8,
+ VT_INDICESTYPE = 10,
+ VT_INDICESBUFFER = 12
+ };
+ /// Which axis, row or column, is compressed
+ org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis() const {
+ return static_cast<org::apache::arrow::flatbuf::SparseMatrixCompressedAxis>(GetField<int16_t>(VT_COMPRESSEDAXIS, 0));
+ }
+ /// The type of values in indptrBuffer
+ const org::apache::arrow::flatbuf::Int *indptrType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDPTRTYPE);
+ }
+ /// indptrBuffer stores the location and size of indptr array that
+ /// represents the range of the rows.
+ /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+ /// The length of this array is 1 + (the number of rows), and the type
+ /// of index value is long.
+ ///
+ /// For example, let X be the following 6x4 matrix:
+ ///
+ /// X := [[0, 1, 2, 0],
+ /// [0, 0, 3, 0],
+ /// [0, 4, 0, 5],
+ /// [0, 0, 0, 0],
+ /// [6, 0, 7, 8],
+ /// [0, 9, 0, 0]].
+ ///
+ /// The array of non-zero values in X is:
+ ///
+ /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+ ///
+ /// And the indptr of X is:
+ ///
+ /// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+ const org::apache::arrow::flatbuf::Buffer *indptrBuffer() const {
+ return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDPTRBUFFER);
+ }
+ /// The type of values in indicesBuffer
+ const org::apache::arrow::flatbuf::Int *indicesType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
+ }
+ /// indicesBuffer stores the location and size of the array that
+ /// contains the column indices of the corresponding non-zero values.
+ /// The type of index value is long.
+ ///
+ /// For example, the indices of the above X is:
+ ///
+ /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+ ///
+ /// Note that the indices are sorted in lexicographical order for each row.
+ const org::apache::arrow::flatbuf::Buffer *indicesBuffer() const {
+ return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_INDICESBUFFER);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int16_t>(verifier, VT_COMPRESSEDAXIS) &&
+ VerifyOffsetRequired(verifier, VT_INDPTRTYPE) &&
+ verifier.VerifyTable(indptrType()) &&
+ VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDPTRBUFFER) &&
+ VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
+ verifier.VerifyTable(indicesType()) &&
+ VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_INDICESBUFFER) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparseMatrixIndexCSXBuilder {
+ typedef SparseMatrixIndexCSX Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_compressedAxis(org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis) {
+ fbb_.AddElement<int16_t>(SparseMatrixIndexCSX::VT_COMPRESSEDAXIS, static_cast<int16_t>(compressedAxis), 0);
+ }
+ void add_indptrType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType) {
+ fbb_.AddOffset(SparseMatrixIndexCSX::VT_INDPTRTYPE, indptrType);
+ }
+ void add_indptrBuffer(const org::apache::arrow::flatbuf::Buffer *indptrBuffer) {
+ fbb_.AddStruct(SparseMatrixIndexCSX::VT_INDPTRBUFFER, indptrBuffer);
+ }
+ void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
+ fbb_.AddOffset(SparseMatrixIndexCSX::VT_INDICESTYPE, indicesType);
+ }
+ void add_indicesBuffer(const org::apache::arrow::flatbuf::Buffer *indicesBuffer) {
+ fbb_.AddStruct(SparseMatrixIndexCSX::VT_INDICESBUFFER, indicesBuffer);
+ }
+ explicit SparseMatrixIndexCSXBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ SparseMatrixIndexCSXBuilder &operator=(const SparseMatrixIndexCSXBuilder &);
+ flatbuffers::Offset<SparseMatrixIndexCSX> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseMatrixIndexCSX>(end);
+ fbb_.Required(o, SparseMatrixIndexCSX::VT_INDPTRTYPE);
+ fbb_.Required(o, SparseMatrixIndexCSX::VT_INDPTRBUFFER);
+ fbb_.Required(o, SparseMatrixIndexCSX::VT_INDICESTYPE);
+ fbb_.Required(o, SparseMatrixIndexCSX::VT_INDICESBUFFER);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseMatrixIndexCSX> CreateSparseMatrixIndexCSX(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::SparseMatrixCompressedAxis compressedAxis = org::apache::arrow::flatbuf::SparseMatrixCompressedAxis::Row,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
+ const org::apache::arrow::flatbuf::Buffer *indptrBuffer = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
+ const org::apache::arrow::flatbuf::Buffer *indicesBuffer = 0) {
+ SparseMatrixIndexCSXBuilder builder_(_fbb);
+ builder_.add_indicesBuffer(indicesBuffer);
+ builder_.add_indicesType(indicesType);
+ builder_.add_indptrBuffer(indptrBuffer);
+ builder_.add_indptrType(indptrType);
+ builder_.add_compressedAxis(compressedAxis);
+ return builder_.Finish();
+}
+
+/// Compressed Sparse Fiber (CSF) sparse tensor index.
+struct SparseTensorIndexCSF FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef SparseTensorIndexCSFBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_INDPTRTYPE = 4,
+ VT_INDPTRBUFFERS = 6,
+ VT_INDICESTYPE = 8,
+ VT_INDICESBUFFERS = 10,
+ VT_AXISORDER = 12
+ };
+ /// CSF is a generalization of compressed sparse row (CSR) index.
+ /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
+ ///
+ /// CSF index recursively compresses each dimension of a tensor into a set
+ /// of prefix trees. Each path from a root to leaf forms one tensor
+ /// non-zero index. CSF is implemented with two arrays of buffers and one
+ /// arrays of integers.
+ ///
+ /// For example, let X be a 2x3x4x5 tensor and let it have the following
+ /// 8 non-zero values:
+ ///
+ /// X[0, 0, 0, 1] := 1
+ /// X[0, 0, 0, 2] := 2
+ /// X[0, 1, 0, 0] := 3
+ /// X[0, 1, 0, 2] := 4
+ /// X[0, 1, 1, 0] := 5
+ /// X[1, 1, 1, 0] := 6
+ /// X[1, 1, 1, 1] := 7
+ /// X[1, 1, 1, 2] := 8
+ ///
+ /// As a prefix tree this would be represented as:
+ ///
+ /// 0 1
+ /// / \ |
+ /// 0 1 1
+ /// / / \ |
+ /// 0 0 1 1
+ /// /| /| | /| |
+ /// 1 2 0 2 0 0 1 2
+ /// The type of values in indptrBuffers
+ const org::apache::arrow::flatbuf::Int *indptrType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDPTRTYPE);
+ }
+ /// indptrBuffers stores the sparsity structure.
+ /// Each two consecutive dimensions in a tensor correspond to a buffer in
+ /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
+ /// and indptrBuffers[dim][i + 1] signify a range of nodes in
+ /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
+ ///
+ /// For example, the indptrBuffers for the above X is:
+ ///
+ /// indptrBuffer(X) = [
+ /// [0, 2, 3],
+ /// [0, 1, 3, 4],
+ /// [0, 2, 4, 5, 8]
+ /// ].
+ ///
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *indptrBuffers() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_INDPTRBUFFERS);
+ }
+ /// The type of values in indicesBuffers
+ const org::apache::arrow::flatbuf::Int *indicesType() const {
+ return GetPointer<const org::apache::arrow::flatbuf::Int *>(VT_INDICESTYPE);
+ }
+ /// indicesBuffers stores values of nodes.
+ /// Each tensor dimension corresponds to a buffer in indicesBuffers.
+ /// For example, the indicesBuffers for the above X is:
+ ///
+ /// indicesBuffer(X) = [
+ /// [0, 1],
+ /// [0, 1, 1],
+ /// [0, 0, 1, 1],
+ /// [1, 2, 0, 2, 0, 0, 1, 2]
+ /// ].
+ ///
+ const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *indicesBuffers() const {
+ return GetPointer<const flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *> *>(VT_INDICESBUFFERS);
+ }
+ /// axisOrder stores the sequence in which dimensions were traversed to
+ /// produce the prefix tree.
+ /// For example, the axisOrder for the above X is:
+ ///
+ /// axisOrder(X) = [0, 1, 2, 3].
+ ///
+ const flatbuffers::Vector<int32_t> *axisOrder() const {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AXISORDER);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffsetRequired(verifier, VT_INDPTRTYPE) &&
+ verifier.VerifyTable(indptrType()) &&
+ VerifyOffsetRequired(verifier, VT_INDPTRBUFFERS) &&
+ verifier.VerifyVector(indptrBuffers()) &&
+ VerifyOffsetRequired(verifier, VT_INDICESTYPE) &&
+ verifier.VerifyTable(indicesType()) &&
+ VerifyOffsetRequired(verifier, VT_INDICESBUFFERS) &&
+ verifier.VerifyVector(indicesBuffers()) &&
+ VerifyOffsetRequired(verifier, VT_AXISORDER) &&
+ verifier.VerifyVector(axisOrder()) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparseTensorIndexCSFBuilder {
+ typedef SparseTensorIndexCSF Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_indptrType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType) {
+ fbb_.AddOffset(SparseTensorIndexCSF::VT_INDPTRTYPE, indptrType);
+ }
+ void add_indptrBuffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indptrBuffers) {
+ fbb_.AddOffset(SparseTensorIndexCSF::VT_INDPTRBUFFERS, indptrBuffers);
+ }
+ void add_indicesType(flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType) {
+ fbb_.AddOffset(SparseTensorIndexCSF::VT_INDICESTYPE, indicesType);
+ }
+ void add_indicesBuffers(flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indicesBuffers) {
+ fbb_.AddOffset(SparseTensorIndexCSF::VT_INDICESBUFFERS, indicesBuffers);
+ }
+ void add_axisOrder(flatbuffers::Offset<flatbuffers::Vector<int32_t>> axisOrder) {
+ fbb_.AddOffset(SparseTensorIndexCSF::VT_AXISORDER, axisOrder);
+ }
+ explicit SparseTensorIndexCSFBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ SparseTensorIndexCSFBuilder &operator=(const SparseTensorIndexCSFBuilder &);
+ flatbuffers::Offset<SparseTensorIndexCSF> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseTensorIndexCSF>(end);
+ fbb_.Required(o, SparseTensorIndexCSF::VT_INDPTRTYPE);
+ fbb_.Required(o, SparseTensorIndexCSF::VT_INDPTRBUFFERS);
+ fbb_.Required(o, SparseTensorIndexCSF::VT_INDICESTYPE);
+ fbb_.Required(o, SparseTensorIndexCSF::VT_INDICESBUFFERS);
+ fbb_.Required(o, SparseTensorIndexCSF::VT_AXISORDER);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseTensorIndexCSF> CreateSparseTensorIndexCSF(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indptrBuffers = 0,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
+ flatbuffers::Offset<flatbuffers::Vector<const org::apache::arrow::flatbuf::Buffer *>> indicesBuffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> axisOrder = 0) {
+ SparseTensorIndexCSFBuilder builder_(_fbb);
+ builder_.add_axisOrder(axisOrder);
+ builder_.add_indicesBuffers(indicesBuffers);
+ builder_.add_indicesType(indicesType);
+ builder_.add_indptrBuffers(indptrBuffers);
+ builder_.add_indptrType(indptrType);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparseTensorIndexCSF> CreateSparseTensorIndexCSFDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indptrType = 0,
+ const std::vector<org::apache::arrow::flatbuf::Buffer> *indptrBuffers = nullptr,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Int> indicesType = 0,
+ const std::vector<org::apache::arrow::flatbuf::Buffer> *indicesBuffers = nullptr,
+ const std::vector<int32_t> *axisOrder = nullptr) {
+ auto indptrBuffers__ = indptrBuffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*indptrBuffers) : 0;
+ auto indicesBuffers__ = indicesBuffers ? _fbb.CreateVectorOfStructs<org::apache::arrow::flatbuf::Buffer>(*indicesBuffers) : 0;
+ auto axisOrder__ = axisOrder ? _fbb.CreateVector<int32_t>(*axisOrder) : 0;
+ return org::apache::arrow::flatbuf::CreateSparseTensorIndexCSF(
+ _fbb,
+ indptrType,
+ indptrBuffers__,
+ indicesType,
+ indicesBuffers__,
+ axisOrder__);
+}
+
+struct SparseTensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef SparseTensorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_TYPE_TYPE = 4,
+ VT_TYPE = 6,
+ VT_SHAPE = 8,
+ VT_NON_ZERO_LENGTH = 10,
+ VT_SPARSEINDEX_TYPE = 12,
+ VT_SPARSEINDEX = 14,
+ VT_DATA = 16
+ };
+ org::apache::arrow::flatbuf::Type type_type() const {
+ return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
+ }
+ /// The type of data contained in a value cell.
+ /// Currently only fixed-width value types are supported,
+ /// no strings or nested types.
+ const void *type() const {
+ return GetPointer<const void *>(VT_TYPE);
+ }
+ template<typename T> const T *type_as() const;
+ const org::apache::arrow::flatbuf::Null *type_as_Null() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Int *type_as_Int() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Date *type_as_Date() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Time *type_as_Time() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::List *type_as_List() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Union *type_as_Union() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Map *type_as_Map() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
+ }
+ /// The dimensions of the tensor, optionally named.
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *>(VT_SHAPE);
+ }
+ /// The number of non-zero values in a sparse tensor.
+ int64_t non_zero_length() const {
+ return GetField<int64_t>(VT_NON_ZERO_LENGTH, 0);
+ }
+ org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type() const {
+ return static_cast<org::apache::arrow::flatbuf::SparseTensorIndex>(GetField<uint8_t>(VT_SPARSEINDEX_TYPE, 0));
+ }
+ /// Sparse tensor index
+ const void *sparseIndex() const {
+ return GetPointer<const void *>(VT_SPARSEINDEX);
+ }
+ template<typename T> const T *sparseIndex_as() const;
+ const org::apache::arrow::flatbuf::SparseTensorIndexCOO *sparseIndex_as_SparseTensorIndexCOO() const {
+ return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseTensorIndexCOO ? static_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCOO *>(sparseIndex()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *sparseIndex_as_SparseMatrixIndexCSX() const {
+ return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseMatrixIndexCSX ? static_cast<const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *>(sparseIndex()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::SparseTensorIndexCSF *sparseIndex_as_SparseTensorIndexCSF() const {
+ return sparseIndex_type() == org::apache::arrow::flatbuf::SparseTensorIndex::SparseTensorIndexCSF ? static_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCSF *>(sparseIndex()) : nullptr;
+ }
+ /// The location and size of the tensor's data
+ const org::apache::arrow::flatbuf::Buffer *data() const {
+ return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_DATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
+ VerifyOffsetRequired(verifier, VT_TYPE) &&
+ VerifyType(verifier, type(), type_type()) &&
+ VerifyOffsetRequired(verifier, VT_SHAPE) &&
+ verifier.VerifyVector(shape()) &&
+ verifier.VerifyVectorOfTables(shape()) &&
+ VerifyField<int64_t>(verifier, VT_NON_ZERO_LENGTH) &&
+ VerifyField<uint8_t>(verifier, VT_SPARSEINDEX_TYPE) &&
+ VerifyOffsetRequired(verifier, VT_SPARSEINDEX) &&
+ VerifySparseTensorIndex(verifier, sparseIndex(), sparseIndex_type()) &&
+ VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_DATA) &&
+ verifier.EndTable();
+ }
+};
+
+template<> inline const org::apache::arrow::flatbuf::Null *SparseTensor::type_as<org::apache::arrow::flatbuf::Null>() const {
+ return type_as_Null();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Int *SparseTensor::type_as<org::apache::arrow::flatbuf::Int>() const {
+ return type_as_Int();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FloatingPoint *SparseTensor::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
+ return type_as_FloatingPoint();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Binary *SparseTensor::type_as<org::apache::arrow::flatbuf::Binary>() const {
+ return type_as_Binary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Utf8 *SparseTensor::type_as<org::apache::arrow::flatbuf::Utf8>() const {
+ return type_as_Utf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Bool *SparseTensor::type_as<org::apache::arrow::flatbuf::Bool>() const {
+ return type_as_Bool();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Decimal *SparseTensor::type_as<org::apache::arrow::flatbuf::Decimal>() const {
+ return type_as_Decimal();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Date *SparseTensor::type_as<org::apache::arrow::flatbuf::Date>() const {
+ return type_as_Date();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Time *SparseTensor::type_as<org::apache::arrow::flatbuf::Time>() const {
+ return type_as_Time();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Timestamp *SparseTensor::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
+ return type_as_Timestamp();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Interval *SparseTensor::type_as<org::apache::arrow::flatbuf::Interval>() const {
+ return type_as_Interval();
+}
+
+template<> inline const org::apache::arrow::flatbuf::List *SparseTensor::type_as<org::apache::arrow::flatbuf::List>() const {
+ return type_as_List();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Struct_ *SparseTensor::type_as<org::apache::arrow::flatbuf::Struct_>() const {
+ return type_as_Struct_();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Union *SparseTensor::type_as<org::apache::arrow::flatbuf::Union>() const {
+ return type_as_Union();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *SparseTensor::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
+ return type_as_FixedSizeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeList *SparseTensor::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
+ return type_as_FixedSizeList();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Map *SparseTensor::type_as<org::apache::arrow::flatbuf::Map>() const {
+ return type_as_Map();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Duration *SparseTensor::type_as<org::apache::arrow::flatbuf::Duration>() const {
+ return type_as_Duration();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeBinary *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
+ return type_as_LargeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
+ return type_as_LargeUtf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeList *SparseTensor::type_as<org::apache::arrow::flatbuf::LargeList>() const {
+ return type_as_LargeList();
+}
+
+template<> inline const org::apache::arrow::flatbuf::SparseTensorIndexCOO *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseTensorIndexCOO>() const {
+ return sparseIndex_as_SparseTensorIndexCOO();
+}
+
+template<> inline const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseMatrixIndexCSX>() const {
+ return sparseIndex_as_SparseMatrixIndexCSX();
+}
+
+template<> inline const org::apache::arrow::flatbuf::SparseTensorIndexCSF *SparseTensor::sparseIndex_as<org::apache::arrow::flatbuf::SparseTensorIndexCSF>() const {
+ return sparseIndex_as_SparseTensorIndexCSF();
+}
+
+struct SparseTensorBuilder {
+ typedef SparseTensor Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
+ fbb_.AddElement<uint8_t>(SparseTensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
+ }
+ void add_type(flatbuffers::Offset<void> type) {
+ fbb_.AddOffset(SparseTensor::VT_TYPE, type);
+ }
+ void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape) {
+ fbb_.AddOffset(SparseTensor::VT_SHAPE, shape);
+ }
+ void add_non_zero_length(int64_t non_zero_length) {
+ fbb_.AddElement<int64_t>(SparseTensor::VT_NON_ZERO_LENGTH, non_zero_length, 0);
+ }
+ void add_sparseIndex_type(org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type) {
+ fbb_.AddElement<uint8_t>(SparseTensor::VT_SPARSEINDEX_TYPE, static_cast<uint8_t>(sparseIndex_type), 0);
+ }
+ void add_sparseIndex(flatbuffers::Offset<void> sparseIndex) {
+ fbb_.AddOffset(SparseTensor::VT_SPARSEINDEX, sparseIndex);
+ }
+ void add_data(const org::apache::arrow::flatbuf::Buffer *data) {
+ fbb_.AddStruct(SparseTensor::VT_DATA, data);
+ }
+ explicit SparseTensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ SparseTensorBuilder &operator=(const SparseTensorBuilder &);
+ flatbuffers::Offset<SparseTensor> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseTensor>(end);
+ fbb_.Required(o, SparseTensor::VT_TYPE);
+ fbb_.Required(o, SparseTensor::VT_SHAPE);
+ fbb_.Required(o, SparseTensor::VT_SPARSEINDEX);
+ fbb_.Required(o, SparseTensor::VT_DATA);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseTensor> CreateSparseTensor(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape = 0,
+ int64_t non_zero_length = 0,
+ org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type = org::apache::arrow::flatbuf::SparseTensorIndex::NONE,
+ flatbuffers::Offset<void> sparseIndex = 0,
+ const org::apache::arrow::flatbuf::Buffer *data = 0) {
+ SparseTensorBuilder builder_(_fbb);
+ builder_.add_non_zero_length(non_zero_length);
+ builder_.add_data(data);
+ builder_.add_sparseIndex(sparseIndex);
+ builder_.add_shape(shape);
+ builder_.add_type(type);
+ builder_.add_sparseIndex_type(sparseIndex_type);
+ builder_.add_type_type(type_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparseTensor> CreateSparseTensorDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape = nullptr,
+ int64_t non_zero_length = 0,
+ org::apache::arrow::flatbuf::SparseTensorIndex sparseIndex_type = org::apache::arrow::flatbuf::SparseTensorIndex::NONE,
+ flatbuffers::Offset<void> sparseIndex = 0,
+ const org::apache::arrow::flatbuf::Buffer *data = 0) {
+ auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>(*shape) : 0;
+ return org::apache::arrow::flatbuf::CreateSparseTensor(
+ _fbb,
+ type_type,
+ type,
+ shape__,
+ non_zero_length,
+ sparseIndex_type,
+ sparseIndex,
+ data);
+}
+
+inline bool VerifySparseTensorIndex(flatbuffers::Verifier &verifier, const void *obj, SparseTensorIndex type) {
+ switch (type) {
+ case SparseTensorIndex::NONE: {
+ return true;
+ }
+ case SparseTensorIndex::SparseTensorIndexCOO: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCOO *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseTensorIndex::SparseMatrixIndexCSX: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseMatrixIndexCSX *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseTensorIndex::SparseTensorIndexCSF: {
+ auto ptr = reinterpret_cast<const org::apache::arrow::flatbuf::SparseTensorIndexCSF *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default: return true;
+ }
+}
+
+inline bool VerifySparseTensorIndexVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+ if (!values || !types) return !values && !types;
+ if (values->size() != types->size()) return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+ if (!VerifySparseTensorIndex(
+ verifier, values->Get(i), types->GetEnum<SparseTensorIndex>(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const org::apache::arrow::flatbuf::SparseTensor *GetSparseTensor(const void *buf) {
+ return flatbuffers::GetRoot<org::apache::arrow::flatbuf::SparseTensor>(buf);
+}
+
+inline const org::apache::arrow::flatbuf::SparseTensor *GetSizePrefixedSparseTensor(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::SparseTensor>(buf);
+}
+
+inline bool VerifySparseTensorBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<org::apache::arrow::flatbuf::SparseTensor>(nullptr);
+}
+
+inline bool VerifySizePrefixedSparseTensorBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::SparseTensor>(nullptr);
+}
+
+inline void FinishSparseTensorBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::SparseTensor> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedSparseTensorBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::SparseTensor> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace flatbuf
+} // namespace arrow
+} // namespace apache
+} // namespace org
+
+#endif // FLATBUFFERS_GENERATED_SPARSETENSOR_ORG_APACHE_ARROW_FLATBUF_H_
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/Tensor_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/Tensor_generated.h
index 062a3b91aaa..e48b2168010 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/Tensor_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/Tensor_generated.h
@@ -1,387 +1,387 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
-#define FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-#include "Schema_generated.h"
-
-namespace org {
-namespace apache {
-namespace arrow {
-namespace flatbuf {
-
-struct TensorDim;
-struct TensorDimBuilder;
-
-struct Tensor;
-struct TensorBuilder;
-
-/// ----------------------------------------------------------------------
-/// Data structures for dense tensors
-/// Shape data for a single axis in a tensor
-struct TensorDim FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TensorDimBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_SIZE = 4,
- VT_NAME = 6
- };
- /// Length of dimension
- int64_t size() const {
- return GetField<int64_t>(VT_SIZE, 0);
- }
- /// Name of the dimension, optional
- const flatbuffers::String *name() const {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int64_t>(verifier, VT_SIZE) &&
- VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) &&
- verifier.EndTable();
- }
-};
-
-struct TensorDimBuilder {
- typedef TensorDim Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_size(int64_t size) {
- fbb_.AddElement<int64_t>(TensorDim::VT_SIZE, size, 0);
- }
- void add_name(flatbuffers::Offset<flatbuffers::String> name) {
- fbb_.AddOffset(TensorDim::VT_NAME, name);
- }
- explicit TensorDimBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TensorDimBuilder &operator=(const TensorDimBuilder &);
- flatbuffers::Offset<TensorDim> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TensorDim>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TensorDim> CreateTensorDim(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t size = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0) {
- TensorDimBuilder builder_(_fbb);
- builder_.add_size(size);
- builder_.add_name(name);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<TensorDim> CreateTensorDimDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- int64_t size = 0,
- const char *name = nullptr) {
- auto name__ = name ? _fbb.CreateString(name) : 0;
- return org::apache::arrow::flatbuf::CreateTensorDim(
- _fbb,
- size,
- name__);
-}
-
-struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TensorBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_TYPE_TYPE = 4,
- VT_TYPE = 6,
- VT_SHAPE = 8,
- VT_STRIDES = 10,
- VT_DATA = 12
- };
- org::apache::arrow::flatbuf::Type type_type() const {
- return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
- }
- /// The type of data contained in a value cell. Currently only fixed-width
- /// value types are supported, no strings or nested types
- const void *type() const {
- return GetPointer<const void *>(VT_TYPE);
- }
- template<typename T> const T *type_as() const;
- const org::apache::arrow::flatbuf::Null *type_as_Null() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Int *type_as_Int() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Date *type_as_Date() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Time *type_as_Time() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::List *type_as_List() const {
- return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Union *type_as_Union() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Map *type_as_Map() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
- return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
- }
- const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
- return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
- }
- /// The dimensions of the tensor, optionally named
- const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *>(VT_SHAPE);
- }
- /// Non-negative byte offsets to advance one value cell along each dimension
- /// If omitted, default to row-major order (C-like).
- const flatbuffers::Vector<int64_t> *strides() const {
- return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_STRIDES);
- }
- /// The location and size of the tensor's data
- const org::apache::arrow::flatbuf::Buffer *data() const {
- return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_DATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
- VerifyOffsetRequired(verifier, VT_TYPE) &&
- VerifyType(verifier, type(), type_type()) &&
- VerifyOffsetRequired(verifier, VT_SHAPE) &&
- verifier.VerifyVector(shape()) &&
- verifier.VerifyVectorOfTables(shape()) &&
- VerifyOffset(verifier, VT_STRIDES) &&
- verifier.VerifyVector(strides()) &&
- VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_DATA) &&
- verifier.EndTable();
- }
-};
-
-template<> inline const org::apache::arrow::flatbuf::Null *Tensor::type_as<org::apache::arrow::flatbuf::Null>() const {
- return type_as_Null();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Int *Tensor::type_as<org::apache::arrow::flatbuf::Int>() const {
- return type_as_Int();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FloatingPoint *Tensor::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
- return type_as_FloatingPoint();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Binary *Tensor::type_as<org::apache::arrow::flatbuf::Binary>() const {
- return type_as_Binary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Utf8 *Tensor::type_as<org::apache::arrow::flatbuf::Utf8>() const {
- return type_as_Utf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Bool *Tensor::type_as<org::apache::arrow::flatbuf::Bool>() const {
- return type_as_Bool();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Decimal *Tensor::type_as<org::apache::arrow::flatbuf::Decimal>() const {
- return type_as_Decimal();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Date *Tensor::type_as<org::apache::arrow::flatbuf::Date>() const {
- return type_as_Date();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Time *Tensor::type_as<org::apache::arrow::flatbuf::Time>() const {
- return type_as_Time();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Timestamp *Tensor::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
- return type_as_Timestamp();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Interval *Tensor::type_as<org::apache::arrow::flatbuf::Interval>() const {
- return type_as_Interval();
-}
-
-template<> inline const org::apache::arrow::flatbuf::List *Tensor::type_as<org::apache::arrow::flatbuf::List>() const {
- return type_as_List();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Struct_ *Tensor::type_as<org::apache::arrow::flatbuf::Struct_>() const {
- return type_as_Struct_();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Union *Tensor::type_as<org::apache::arrow::flatbuf::Union>() const {
- return type_as_Union();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *Tensor::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
- return type_as_FixedSizeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::FixedSizeList *Tensor::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
- return type_as_FixedSizeList();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Map *Tensor::type_as<org::apache::arrow::flatbuf::Map>() const {
- return type_as_Map();
-}
-
-template<> inline const org::apache::arrow::flatbuf::Duration *Tensor::type_as<org::apache::arrow::flatbuf::Duration>() const {
- return type_as_Duration();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeBinary *Tensor::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
- return type_as_LargeBinary();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *Tensor::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
- return type_as_LargeUtf8();
-}
-
-template<> inline const org::apache::arrow::flatbuf::LargeList *Tensor::type_as<org::apache::arrow::flatbuf::LargeList>() const {
- return type_as_LargeList();
-}
-
-struct TensorBuilder {
- typedef Tensor Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
- fbb_.AddElement<uint8_t>(Tensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
- }
- void add_type(flatbuffers::Offset<void> type) {
- fbb_.AddOffset(Tensor::VT_TYPE, type);
- }
- void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape) {
- fbb_.AddOffset(Tensor::VT_SHAPE, shape);
- }
- void add_strides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides) {
- fbb_.AddOffset(Tensor::VT_STRIDES, strides);
- }
- void add_data(const org::apache::arrow::flatbuf::Buffer *data) {
- fbb_.AddStruct(Tensor::VT_DATA, data);
- }
- explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TensorBuilder &operator=(const TensorBuilder &);
- flatbuffers::Offset<Tensor> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Tensor>(end);
- fbb_.Required(o, Tensor::VT_TYPE);
- fbb_.Required(o, Tensor::VT_SHAPE);
- fbb_.Required(o, Tensor::VT_DATA);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Tensor> CreateTensor(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides = 0,
- const org::apache::arrow::flatbuf::Buffer *data = 0) {
- TensorBuilder builder_(_fbb);
- builder_.add_data(data);
- builder_.add_strides(strides);
- builder_.add_shape(shape);
- builder_.add_type(type);
- builder_.add_type_type(type_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
- flatbuffers::Offset<void> type = 0,
- const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape = nullptr,
- const std::vector<int64_t> *strides = nullptr,
- const org::apache::arrow::flatbuf::Buffer *data = 0) {
- auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>(*shape) : 0;
- auto strides__ = strides ? _fbb.CreateVector<int64_t>(*strides) : 0;
- return org::apache::arrow::flatbuf::CreateTensor(
- _fbb,
- type_type,
- type,
- shape__,
- strides__,
- data);
-}
-
-inline const org::apache::arrow::flatbuf::Tensor *GetTensor(const void *buf) {
- return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Tensor>(buf);
-}
-
-inline const org::apache::arrow::flatbuf::Tensor *GetSizePrefixedTensor(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Tensor>(buf);
-}
-
-inline bool VerifyTensorBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
-}
-
-inline bool VerifySizePrefixedTensorBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
-}
-
-inline void FinishTensorBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedTensorBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace flatbuf
-} // namespace arrow
-} // namespace apache
-} // namespace org
-
-#endif // FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+#define FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+#include "Schema_generated.h"
+
+namespace org {
+namespace apache {
+namespace arrow {
+namespace flatbuf {
+
+struct TensorDim;
+struct TensorDimBuilder;
+
+struct Tensor;
+struct TensorBuilder;
+
+/// ----------------------------------------------------------------------
+/// Data structures for dense tensors
+/// Shape data for a single axis in a tensor
+struct TensorDim FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TensorDimBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_SIZE = 4,
+ VT_NAME = 6
+ };
+ /// Length of dimension
+ int64_t size() const {
+ return GetField<int64_t>(VT_SIZE, 0);
+ }
+ /// Name of the dimension, optional
+ const flatbuffers::String *name() const {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int64_t>(verifier, VT_SIZE) &&
+ VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) &&
+ verifier.EndTable();
+ }
+};
+
+struct TensorDimBuilder {
+ typedef TensorDim Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_size(int64_t size) {
+ fbb_.AddElement<int64_t>(TensorDim::VT_SIZE, size, 0);
+ }
+ void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+ fbb_.AddOffset(TensorDim::VT_NAME, name);
+ }
+ explicit TensorDimBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TensorDimBuilder &operator=(const TensorDimBuilder &);
+ flatbuffers::Offset<TensorDim> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TensorDim>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TensorDim> CreateTensorDim(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t size = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0) {
+ TensorDimBuilder builder_(_fbb);
+ builder_.add_size(size);
+ builder_.add_name(name);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorDim> CreateTensorDimDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ int64_t size = 0,
+ const char *name = nullptr) {
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ return org::apache::arrow::flatbuf::CreateTensorDim(
+ _fbb,
+ size,
+ name__);
+}
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TensorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_TYPE_TYPE = 4,
+ VT_TYPE = 6,
+ VT_SHAPE = 8,
+ VT_STRIDES = 10,
+ VT_DATA = 12
+ };
+ org::apache::arrow::flatbuf::Type type_type() const {
+ return static_cast<org::apache::arrow::flatbuf::Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
+ }
+ /// The type of data contained in a value cell. Currently only fixed-width
+ /// value types are supported, no strings or nested types
+ const void *type() const {
+ return GetPointer<const void *>(VT_TYPE);
+ }
+ template<typename T> const T *type_as() const;
+ const org::apache::arrow::flatbuf::Null *type_as_Null() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Null ? static_cast<const org::apache::arrow::flatbuf::Null *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Int *type_as_Int() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Int ? static_cast<const org::apache::arrow::flatbuf::Int *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FloatingPoint *type_as_FloatingPoint() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FloatingPoint ? static_cast<const org::apache::arrow::flatbuf::FloatingPoint *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Binary *type_as_Binary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Binary ? static_cast<const org::apache::arrow::flatbuf::Binary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Utf8 *type_as_Utf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Utf8 ? static_cast<const org::apache::arrow::flatbuf::Utf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Bool *type_as_Bool() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Bool ? static_cast<const org::apache::arrow::flatbuf::Bool *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Decimal *type_as_Decimal() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Decimal ? static_cast<const org::apache::arrow::flatbuf::Decimal *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Date *type_as_Date() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Date ? static_cast<const org::apache::arrow::flatbuf::Date *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Time *type_as_Time() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Time ? static_cast<const org::apache::arrow::flatbuf::Time *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Timestamp *type_as_Timestamp() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Timestamp ? static_cast<const org::apache::arrow::flatbuf::Timestamp *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Interval *type_as_Interval() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Interval ? static_cast<const org::apache::arrow::flatbuf::Interval *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::List *type_as_List() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::List ? static_cast<const org::apache::arrow::flatbuf::List *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Struct_ *type_as_Struct_() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Struct_ ? static_cast<const org::apache::arrow::flatbuf::Struct_ *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Union *type_as_Union() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Union ? static_cast<const org::apache::arrow::flatbuf::Union *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeBinary *type_as_FixedSizeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeBinary ? static_cast<const org::apache::arrow::flatbuf::FixedSizeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::FixedSizeList *type_as_FixedSizeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::FixedSizeList ? static_cast<const org::apache::arrow::flatbuf::FixedSizeList *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Map *type_as_Map() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Map ? static_cast<const org::apache::arrow::flatbuf::Map *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::Duration *type_as_Duration() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::Duration ? static_cast<const org::apache::arrow::flatbuf::Duration *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeBinary *type_as_LargeBinary() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeBinary ? static_cast<const org::apache::arrow::flatbuf::LargeBinary *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeUtf8 *type_as_LargeUtf8() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeUtf8 ? static_cast<const org::apache::arrow::flatbuf::LargeUtf8 *>(type()) : nullptr;
+ }
+ const org::apache::arrow::flatbuf::LargeList *type_as_LargeList() const {
+ return type_type() == org::apache::arrow::flatbuf::Type::LargeList ? static_cast<const org::apache::arrow::flatbuf::LargeList *>(type()) : nullptr;
+ }
+ /// The dimensions of the tensor, optionally named
+ const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *>(VT_SHAPE);
+ }
+ /// Non-negative byte offsets to advance one value cell along each dimension
+ /// If omitted, default to row-major order (C-like).
+ const flatbuffers::Vector<int64_t> *strides() const {
+ return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_STRIDES);
+ }
+ /// The location and size of the tensor's data
+ const org::apache::arrow::flatbuf::Buffer *data() const {
+ return GetStruct<const org::apache::arrow::flatbuf::Buffer *>(VT_DATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
+ VerifyOffsetRequired(verifier, VT_TYPE) &&
+ VerifyType(verifier, type(), type_type()) &&
+ VerifyOffsetRequired(verifier, VT_SHAPE) &&
+ verifier.VerifyVector(shape()) &&
+ verifier.VerifyVectorOfTables(shape()) &&
+ VerifyOffset(verifier, VT_STRIDES) &&
+ verifier.VerifyVector(strides()) &&
+ VerifyFieldRequired<org::apache::arrow::flatbuf::Buffer>(verifier, VT_DATA) &&
+ verifier.EndTable();
+ }
+};
+
+template<> inline const org::apache::arrow::flatbuf::Null *Tensor::type_as<org::apache::arrow::flatbuf::Null>() const {
+ return type_as_Null();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Int *Tensor::type_as<org::apache::arrow::flatbuf::Int>() const {
+ return type_as_Int();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FloatingPoint *Tensor::type_as<org::apache::arrow::flatbuf::FloatingPoint>() const {
+ return type_as_FloatingPoint();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Binary *Tensor::type_as<org::apache::arrow::flatbuf::Binary>() const {
+ return type_as_Binary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Utf8 *Tensor::type_as<org::apache::arrow::flatbuf::Utf8>() const {
+ return type_as_Utf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Bool *Tensor::type_as<org::apache::arrow::flatbuf::Bool>() const {
+ return type_as_Bool();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Decimal *Tensor::type_as<org::apache::arrow::flatbuf::Decimal>() const {
+ return type_as_Decimal();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Date *Tensor::type_as<org::apache::arrow::flatbuf::Date>() const {
+ return type_as_Date();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Time *Tensor::type_as<org::apache::arrow::flatbuf::Time>() const {
+ return type_as_Time();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Timestamp *Tensor::type_as<org::apache::arrow::flatbuf::Timestamp>() const {
+ return type_as_Timestamp();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Interval *Tensor::type_as<org::apache::arrow::flatbuf::Interval>() const {
+ return type_as_Interval();
+}
+
+template<> inline const org::apache::arrow::flatbuf::List *Tensor::type_as<org::apache::arrow::flatbuf::List>() const {
+ return type_as_List();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Struct_ *Tensor::type_as<org::apache::arrow::flatbuf::Struct_>() const {
+ return type_as_Struct_();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Union *Tensor::type_as<org::apache::arrow::flatbuf::Union>() const {
+ return type_as_Union();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeBinary *Tensor::type_as<org::apache::arrow::flatbuf::FixedSizeBinary>() const {
+ return type_as_FixedSizeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::FixedSizeList *Tensor::type_as<org::apache::arrow::flatbuf::FixedSizeList>() const {
+ return type_as_FixedSizeList();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Map *Tensor::type_as<org::apache::arrow::flatbuf::Map>() const {
+ return type_as_Map();
+}
+
+template<> inline const org::apache::arrow::flatbuf::Duration *Tensor::type_as<org::apache::arrow::flatbuf::Duration>() const {
+ return type_as_Duration();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeBinary *Tensor::type_as<org::apache::arrow::flatbuf::LargeBinary>() const {
+ return type_as_LargeBinary();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeUtf8 *Tensor::type_as<org::apache::arrow::flatbuf::LargeUtf8>() const {
+ return type_as_LargeUtf8();
+}
+
+template<> inline const org::apache::arrow::flatbuf::LargeList *Tensor::type_as<org::apache::arrow::flatbuf::LargeList>() const {
+ return type_as_LargeList();
+}
+
+struct TensorBuilder {
+ typedef Tensor Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_type_type(org::apache::arrow::flatbuf::Type type_type) {
+ fbb_.AddElement<uint8_t>(Tensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
+ }
+ void add_type(flatbuffers::Offset<void> type) {
+ fbb_.AddOffset(Tensor::VT_TYPE, type);
+ }
+ void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape) {
+ fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+ }
+ void add_strides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides) {
+ fbb_.AddOffset(Tensor::VT_STRIDES, strides);
+ }
+ void add_data(const org::apache::arrow::flatbuf::Buffer *data) {
+ fbb_.AddStruct(Tensor::VT_DATA, data);
+ }
+ explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TensorBuilder &operator=(const TensorBuilder &);
+ flatbuffers::Offset<Tensor> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Tensor>(end);
+ fbb_.Required(o, Tensor::VT_TYPE);
+ fbb_.Required(o, Tensor::VT_SHAPE);
+ fbb_.Required(o, Tensor::VT_DATA);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Tensor> CreateTensor(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>> shape = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides = 0,
+ const org::apache::arrow::flatbuf::Buffer *data = 0) {
+ TensorBuilder builder_(_fbb);
+ builder_.add_data(data);
+ builder_.add_strides(strides);
+ builder_.add_shape(shape);
+ builder_.add_type(type);
+ builder_.add_type_type(type_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ org::apache::arrow::flatbuf::Type type_type = org::apache::arrow::flatbuf::Type::NONE,
+ flatbuffers::Offset<void> type = 0,
+ const std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>> *shape = nullptr,
+ const std::vector<int64_t> *strides = nullptr,
+ const org::apache::arrow::flatbuf::Buffer *data = 0) {
+ auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<org::apache::arrow::flatbuf::TensorDim>>(*shape) : 0;
+ auto strides__ = strides ? _fbb.CreateVector<int64_t>(*strides) : 0;
+ return org::apache::arrow::flatbuf::CreateTensor(
+ _fbb,
+ type_type,
+ type,
+ shape__,
+ strides__,
+ data);
+}
+
+inline const org::apache::arrow::flatbuf::Tensor *GetTensor(const void *buf) {
+ return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Tensor>(buf);
+}
+
+inline const org::apache::arrow::flatbuf::Tensor *GetSizePrefixedTensor(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Tensor>(buf);
+}
+
+inline bool VerifyTensorBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
+}
+
+inline bool VerifySizePrefixedTensorBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
+}
+
+inline void FinishTensorBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedTensorBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace flatbuf
+} // namespace arrow
+} // namespace apache
+} // namespace org
+
+#endif // FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/feather_generated.h b/contrib/libs/apache/arrow/cpp/src/generated/feather_generated.h
index b925eb2bc6a..5353e444f0e 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/feather_generated.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/feather_generated.h
@@ -1,863 +1,863 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
-#define FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-namespace arrow {
-namespace ipc {
-namespace feather {
-namespace fbs {
-
-struct PrimitiveArray;
-struct PrimitiveArrayBuilder;
-
-struct CategoryMetadata;
-struct CategoryMetadataBuilder;
-
-struct TimestampMetadata;
-struct TimestampMetadataBuilder;
-
-struct DateMetadata;
-struct DateMetadataBuilder;
-
-struct TimeMetadata;
-struct TimeMetadataBuilder;
-
-struct Column;
-struct ColumnBuilder;
-
-struct CTable;
-struct CTableBuilder;
-
-/// Feather is an experimental serialization format implemented using
-/// techniques from Apache Arrow. It was created as a proof-of-concept of an
-/// interoperable file format for storing data frames originating in Python or
-/// R. It enabled the developers to sidestep some of the open design questions
-/// in Arrow from early 2016 and instead create something simple and useful for
-/// the intended use cases.
-enum class Type : int8_t {
- BOOL = 0,
- INT8 = 1,
- INT16 = 2,
- INT32 = 3,
- INT64 = 4,
- UINT8 = 5,
- UINT16 = 6,
- UINT32 = 7,
- UINT64 = 8,
- FLOAT = 9,
- DOUBLE = 10,
- UTF8 = 11,
- BINARY = 12,
- CATEGORY = 13,
- TIMESTAMP = 14,
- DATE = 15,
- TIME = 16,
- LARGE_UTF8 = 17,
- LARGE_BINARY = 18,
- MIN = BOOL,
- MAX = LARGE_BINARY
-};
-
-inline const Type (&EnumValuesType())[19] {
- static const Type values[] = {
- Type::BOOL,
- Type::INT8,
- Type::INT16,
- Type::INT32,
- Type::INT64,
- Type::UINT8,
- Type::UINT16,
- Type::UINT32,
- Type::UINT64,
- Type::FLOAT,
- Type::DOUBLE,
- Type::UTF8,
- Type::BINARY,
- Type::CATEGORY,
- Type::TIMESTAMP,
- Type::DATE,
- Type::TIME,
- Type::LARGE_UTF8,
- Type::LARGE_BINARY
- };
- return values;
-}
-
-inline const char * const *EnumNamesType() {
- static const char * const names[20] = {
- "BOOL",
- "INT8",
- "INT16",
- "INT32",
- "INT64",
- "UINT8",
- "UINT16",
- "UINT32",
- "UINT64",
- "FLOAT",
- "DOUBLE",
- "UTF8",
- "BINARY",
- "CATEGORY",
- "TIMESTAMP",
- "DATE",
- "TIME",
- "LARGE_UTF8",
- "LARGE_BINARY",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameType(Type e) {
- if (flatbuffers::IsOutRange(e, Type::BOOL, Type::LARGE_BINARY)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesType()[index];
-}
-
-enum class Encoding : int8_t {
- PLAIN = 0,
- /// Data is stored dictionary-encoded
- /// dictionary size: <INT32 Dictionary size>
- /// dictionary data: <TYPE primitive array>
- /// dictionary index: <INT32 primitive array>
- ///
- /// TODO: do we care about storing the index values in a smaller typeclass
- DICTIONARY = 1,
- MIN = PLAIN,
- MAX = DICTIONARY
-};
-
-inline const Encoding (&EnumValuesEncoding())[2] {
- static const Encoding values[] = {
- Encoding::PLAIN,
- Encoding::DICTIONARY
- };
- return values;
-}
-
-inline const char * const *EnumNamesEncoding() {
- static const char * const names[3] = {
- "PLAIN",
- "DICTIONARY",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameEncoding(Encoding e) {
- if (flatbuffers::IsOutRange(e, Encoding::PLAIN, Encoding::DICTIONARY)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesEncoding()[index];
-}
-
-enum class TimeUnit : int8_t {
- SECOND = 0,
- MILLISECOND = 1,
- MICROSECOND = 2,
- NANOSECOND = 3,
- MIN = SECOND,
- MAX = NANOSECOND
-};
-
-inline const TimeUnit (&EnumValuesTimeUnit())[4] {
- static const TimeUnit values[] = {
- TimeUnit::SECOND,
- TimeUnit::MILLISECOND,
- TimeUnit::MICROSECOND,
- TimeUnit::NANOSECOND
- };
- return values;
-}
-
-inline const char * const *EnumNamesTimeUnit() {
- static const char * const names[5] = {
- "SECOND",
- "MILLISECOND",
- "MICROSECOND",
- "NANOSECOND",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameTimeUnit(TimeUnit e) {
- if (flatbuffers::IsOutRange(e, TimeUnit::SECOND, TimeUnit::NANOSECOND)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesTimeUnit()[index];
-}
-
-enum class TypeMetadata : uint8_t {
- NONE = 0,
- CategoryMetadata = 1,
- TimestampMetadata = 2,
- DateMetadata = 3,
- TimeMetadata = 4,
- MIN = NONE,
- MAX = TimeMetadata
-};
-
-inline const TypeMetadata (&EnumValuesTypeMetadata())[5] {
- static const TypeMetadata values[] = {
- TypeMetadata::NONE,
- TypeMetadata::CategoryMetadata,
- TypeMetadata::TimestampMetadata,
- TypeMetadata::DateMetadata,
- TypeMetadata::TimeMetadata
- };
- return values;
-}
-
-inline const char * const *EnumNamesTypeMetadata() {
- static const char * const names[6] = {
- "NONE",
- "CategoryMetadata",
- "TimestampMetadata",
- "DateMetadata",
- "TimeMetadata",
- nullptr
- };
- return names;
-}
-
-inline const char *EnumNameTypeMetadata(TypeMetadata e) {
- if (flatbuffers::IsOutRange(e, TypeMetadata::NONE, TypeMetadata::TimeMetadata)) return "";
- const size_t index = static_cast<size_t>(e);
- return EnumNamesTypeMetadata()[index];
-}
-
-template<typename T> struct TypeMetadataTraits {
- static const TypeMetadata enum_value = TypeMetadata::NONE;
-};
-
-template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::CategoryMetadata> {
- static const TypeMetadata enum_value = TypeMetadata::CategoryMetadata;
-};
-
-template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::TimestampMetadata> {
- static const TypeMetadata enum_value = TypeMetadata::TimestampMetadata;
-};
-
-template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::DateMetadata> {
- static const TypeMetadata enum_value = TypeMetadata::DateMetadata;
-};
-
-template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::TimeMetadata> {
- static const TypeMetadata enum_value = TypeMetadata::TimeMetadata;
-};
-
-bool VerifyTypeMetadata(flatbuffers::Verifier &verifier, const void *obj, TypeMetadata type);
-bool VerifyTypeMetadataVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-struct PrimitiveArray FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef PrimitiveArrayBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_TYPE = 4,
- VT_ENCODING = 6,
- VT_OFFSET = 8,
- VT_LENGTH = 10,
- VT_NULL_COUNT = 12,
- VT_TOTAL_BYTES = 14
- };
- arrow::ipc::feather::fbs::Type type() const {
- return static_cast<arrow::ipc::feather::fbs::Type>(GetField<int8_t>(VT_TYPE, 0));
- }
- arrow::ipc::feather::fbs::Encoding encoding() const {
- return static_cast<arrow::ipc::feather::fbs::Encoding>(GetField<int8_t>(VT_ENCODING, 0));
- }
- /// Relative memory offset of the start of the array data excluding the size
- /// of the metadata
- int64_t offset() const {
- return GetField<int64_t>(VT_OFFSET, 0);
- }
- /// The number of logical values in the array
- int64_t length() const {
- return GetField<int64_t>(VT_LENGTH, 0);
- }
- /// The number of observed nulls
- int64_t null_count() const {
- return GetField<int64_t>(VT_NULL_COUNT, 0);
- }
- /// The total size of the actual data in the file
- int64_t total_bytes() const {
- return GetField<int64_t>(VT_TOTAL_BYTES, 0);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_TYPE) &&
- VerifyField<int8_t>(verifier, VT_ENCODING) &&
- VerifyField<int64_t>(verifier, VT_OFFSET) &&
- VerifyField<int64_t>(verifier, VT_LENGTH) &&
- VerifyField<int64_t>(verifier, VT_NULL_COUNT) &&
- VerifyField<int64_t>(verifier, VT_TOTAL_BYTES) &&
- verifier.EndTable();
- }
-};
-
-struct PrimitiveArrayBuilder {
- typedef PrimitiveArray Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_type(arrow::ipc::feather::fbs::Type type) {
- fbb_.AddElement<int8_t>(PrimitiveArray::VT_TYPE, static_cast<int8_t>(type), 0);
- }
- void add_encoding(arrow::ipc::feather::fbs::Encoding encoding) {
- fbb_.AddElement<int8_t>(PrimitiveArray::VT_ENCODING, static_cast<int8_t>(encoding), 0);
- }
- void add_offset(int64_t offset) {
- fbb_.AddElement<int64_t>(PrimitiveArray::VT_OFFSET, offset, 0);
- }
- void add_length(int64_t length) {
- fbb_.AddElement<int64_t>(PrimitiveArray::VT_LENGTH, length, 0);
- }
- void add_null_count(int64_t null_count) {
- fbb_.AddElement<int64_t>(PrimitiveArray::VT_NULL_COUNT, null_count, 0);
- }
- void add_total_bytes(int64_t total_bytes) {
- fbb_.AddElement<int64_t>(PrimitiveArray::VT_TOTAL_BYTES, total_bytes, 0);
- }
- explicit PrimitiveArrayBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- PrimitiveArrayBuilder &operator=(const PrimitiveArrayBuilder &);
- flatbuffers::Offset<PrimitiveArray> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PrimitiveArray>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PrimitiveArray> CreatePrimitiveArray(
- flatbuffers::FlatBufferBuilder &_fbb,
- arrow::ipc::feather::fbs::Type type = arrow::ipc::feather::fbs::Type::BOOL,
- arrow::ipc::feather::fbs::Encoding encoding = arrow::ipc::feather::fbs::Encoding::PLAIN,
- int64_t offset = 0,
- int64_t length = 0,
- int64_t null_count = 0,
- int64_t total_bytes = 0) {
- PrimitiveArrayBuilder builder_(_fbb);
- builder_.add_total_bytes(total_bytes);
- builder_.add_null_count(null_count);
- builder_.add_length(length);
- builder_.add_offset(offset);
- builder_.add_encoding(encoding);
- builder_.add_type(type);
- return builder_.Finish();
-}
-
-struct CategoryMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef CategoryMetadataBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_LEVELS = 4,
- VT_ORDERED = 6
- };
- /// The category codes are presumed to be integers that are valid indexes into
- /// the levels array
- const arrow::ipc::feather::fbs::PrimitiveArray *levels() const {
- return GetPointer<const arrow::ipc::feather::fbs::PrimitiveArray *>(VT_LEVELS);
- }
- bool ordered() const {
- return GetField<uint8_t>(VT_ORDERED, 0) != 0;
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_LEVELS) &&
- verifier.VerifyTable(levels()) &&
- VerifyField<uint8_t>(verifier, VT_ORDERED) &&
- verifier.EndTable();
- }
-};
-
-struct CategoryMetadataBuilder {
- typedef CategoryMetadata Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_levels(flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> levels) {
- fbb_.AddOffset(CategoryMetadata::VT_LEVELS, levels);
- }
- void add_ordered(bool ordered) {
- fbb_.AddElement<uint8_t>(CategoryMetadata::VT_ORDERED, static_cast<uint8_t>(ordered), 0);
- }
- explicit CategoryMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- CategoryMetadataBuilder &operator=(const CategoryMetadataBuilder &);
- flatbuffers::Offset<CategoryMetadata> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CategoryMetadata>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CategoryMetadata> CreateCategoryMetadata(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> levels = 0,
- bool ordered = false) {
- CategoryMetadataBuilder builder_(_fbb);
- builder_.add_levels(levels);
- builder_.add_ordered(ordered);
- return builder_.Finish();
-}
-
-struct TimestampMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TimestampMetadataBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4,
- VT_TIMEZONE = 6
- };
- arrow::ipc::feather::fbs::TimeUnit unit() const {
- return static_cast<arrow::ipc::feather::fbs::TimeUnit>(GetField<int8_t>(VT_UNIT, 0));
- }
- /// Timestamp data is assumed to be UTC, but the time zone is stored here for
- /// presentation as localized
- const flatbuffers::String *timezone() const {
- return GetPointer<const flatbuffers::String *>(VT_TIMEZONE);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_UNIT) &&
- VerifyOffset(verifier, VT_TIMEZONE) &&
- verifier.VerifyString(timezone()) &&
- verifier.EndTable();
- }
-};
-
-struct TimestampMetadataBuilder {
- typedef TimestampMetadata Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(arrow::ipc::feather::fbs::TimeUnit unit) {
- fbb_.AddElement<int8_t>(TimestampMetadata::VT_UNIT, static_cast<int8_t>(unit), 0);
- }
- void add_timezone(flatbuffers::Offset<flatbuffers::String> timezone) {
- fbb_.AddOffset(TimestampMetadata::VT_TIMEZONE, timezone);
- }
- explicit TimestampMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TimestampMetadataBuilder &operator=(const TimestampMetadataBuilder &);
- flatbuffers::Offset<TimestampMetadata> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TimestampMetadata>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TimestampMetadata> CreateTimestampMetadata(
- flatbuffers::FlatBufferBuilder &_fbb,
- arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND,
- flatbuffers::Offset<flatbuffers::String> timezone = 0) {
- TimestampMetadataBuilder builder_(_fbb);
- builder_.add_timezone(timezone);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<TimestampMetadata> CreateTimestampMetadataDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND,
- const char *timezone = nullptr) {
- auto timezone__ = timezone ? _fbb.CreateString(timezone) : 0;
- return arrow::ipc::feather::fbs::CreateTimestampMetadata(
- _fbb,
- unit,
- timezone__);
-}
-
-struct DateMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef DateMetadataBuilder Builder;
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- verifier.EndTable();
- }
-};
-
-struct DateMetadataBuilder {
- typedef DateMetadata Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit DateMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- DateMetadataBuilder &operator=(const DateMetadataBuilder &);
- flatbuffers::Offset<DateMetadata> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DateMetadata>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DateMetadata> CreateDateMetadata(
- flatbuffers::FlatBufferBuilder &_fbb) {
- DateMetadataBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct TimeMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef TimeMetadataBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_UNIT = 4
- };
- arrow::ipc::feather::fbs::TimeUnit unit() const {
- return static_cast<arrow::ipc::feather::fbs::TimeUnit>(GetField<int8_t>(VT_UNIT, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_UNIT) &&
- verifier.EndTable();
- }
-};
-
-struct TimeMetadataBuilder {
- typedef TimeMetadata Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_unit(arrow::ipc::feather::fbs::TimeUnit unit) {
- fbb_.AddElement<int8_t>(TimeMetadata::VT_UNIT, static_cast<int8_t>(unit), 0);
- }
- explicit TimeMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- TimeMetadataBuilder &operator=(const TimeMetadataBuilder &);
- flatbuffers::Offset<TimeMetadata> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TimeMetadata>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TimeMetadata> CreateTimeMetadata(
- flatbuffers::FlatBufferBuilder &_fbb,
- arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND) {
- TimeMetadataBuilder builder_(_fbb);
- builder_.add_unit(unit);
- return builder_.Finish();
-}
-
-struct Column FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef ColumnBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_NAME = 4,
- VT_VALUES = 6,
- VT_METADATA_TYPE = 8,
- VT_METADATA = 10,
- VT_USER_METADATA = 12
- };
- const flatbuffers::String *name() const {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- const arrow::ipc::feather::fbs::PrimitiveArray *values() const {
- return GetPointer<const arrow::ipc::feather::fbs::PrimitiveArray *>(VT_VALUES);
- }
- arrow::ipc::feather::fbs::TypeMetadata metadata_type() const {
- return static_cast<arrow::ipc::feather::fbs::TypeMetadata>(GetField<uint8_t>(VT_METADATA_TYPE, 0));
- }
- const void *metadata() const {
- return GetPointer<const void *>(VT_METADATA);
- }
- template<typename T> const T *metadata_as() const;
- const arrow::ipc::feather::fbs::CategoryMetadata *metadata_as_CategoryMetadata() const {
- return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::CategoryMetadata ? static_cast<const arrow::ipc::feather::fbs::CategoryMetadata *>(metadata()) : nullptr;
- }
- const arrow::ipc::feather::fbs::TimestampMetadata *metadata_as_TimestampMetadata() const {
- return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::TimestampMetadata ? static_cast<const arrow::ipc::feather::fbs::TimestampMetadata *>(metadata()) : nullptr;
- }
- const arrow::ipc::feather::fbs::DateMetadata *metadata_as_DateMetadata() const {
- return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::DateMetadata ? static_cast<const arrow::ipc::feather::fbs::DateMetadata *>(metadata()) : nullptr;
- }
- const arrow::ipc::feather::fbs::TimeMetadata *metadata_as_TimeMetadata() const {
- return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::TimeMetadata ? static_cast<const arrow::ipc::feather::fbs::TimeMetadata *>(metadata()) : nullptr;
- }
- /// This should (probably) be JSON
- const flatbuffers::String *user_metadata() const {
- return GetPointer<const flatbuffers::String *>(VT_USER_METADATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) &&
- VerifyOffset(verifier, VT_VALUES) &&
- verifier.VerifyTable(values()) &&
- VerifyField<uint8_t>(verifier, VT_METADATA_TYPE) &&
- VerifyOffset(verifier, VT_METADATA) &&
- VerifyTypeMetadata(verifier, metadata(), metadata_type()) &&
- VerifyOffset(verifier, VT_USER_METADATA) &&
- verifier.VerifyString(user_metadata()) &&
- verifier.EndTable();
- }
-};
-
-template<> inline const arrow::ipc::feather::fbs::CategoryMetadata *Column::metadata_as<arrow::ipc::feather::fbs::CategoryMetadata>() const {
- return metadata_as_CategoryMetadata();
-}
-
-template<> inline const arrow::ipc::feather::fbs::TimestampMetadata *Column::metadata_as<arrow::ipc::feather::fbs::TimestampMetadata>() const {
- return metadata_as_TimestampMetadata();
-}
-
-template<> inline const arrow::ipc::feather::fbs::DateMetadata *Column::metadata_as<arrow::ipc::feather::fbs::DateMetadata>() const {
- return metadata_as_DateMetadata();
-}
-
-template<> inline const arrow::ipc::feather::fbs::TimeMetadata *Column::metadata_as<arrow::ipc::feather::fbs::TimeMetadata>() const {
- return metadata_as_TimeMetadata();
-}
-
-struct ColumnBuilder {
- typedef Column Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_name(flatbuffers::Offset<flatbuffers::String> name) {
- fbb_.AddOffset(Column::VT_NAME, name);
- }
- void add_values(flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values) {
- fbb_.AddOffset(Column::VT_VALUES, values);
- }
- void add_metadata_type(arrow::ipc::feather::fbs::TypeMetadata metadata_type) {
- fbb_.AddElement<uint8_t>(Column::VT_METADATA_TYPE, static_cast<uint8_t>(metadata_type), 0);
- }
- void add_metadata(flatbuffers::Offset<void> metadata) {
- fbb_.AddOffset(Column::VT_METADATA, metadata);
- }
- void add_user_metadata(flatbuffers::Offset<flatbuffers::String> user_metadata) {
- fbb_.AddOffset(Column::VT_USER_METADATA, user_metadata);
- }
- explicit ColumnBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- ColumnBuilder &operator=(const ColumnBuilder &);
- flatbuffers::Offset<Column> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Column>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Column> CreateColumn(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values = 0,
- arrow::ipc::feather::fbs::TypeMetadata metadata_type = arrow::ipc::feather::fbs::TypeMetadata::NONE,
- flatbuffers::Offset<void> metadata = 0,
- flatbuffers::Offset<flatbuffers::String> user_metadata = 0) {
- ColumnBuilder builder_(_fbb);
- builder_.add_user_metadata(user_metadata);
- builder_.add_metadata(metadata);
- builder_.add_values(values);
- builder_.add_name(name);
- builder_.add_metadata_type(metadata_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Column> CreateColumnDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const char *name = nullptr,
- flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values = 0,
- arrow::ipc::feather::fbs::TypeMetadata metadata_type = arrow::ipc::feather::fbs::TypeMetadata::NONE,
- flatbuffers::Offset<void> metadata = 0,
- const char *user_metadata = nullptr) {
- auto name__ = name ? _fbb.CreateString(name) : 0;
- auto user_metadata__ = user_metadata ? _fbb.CreateString(user_metadata) : 0;
- return arrow::ipc::feather::fbs::CreateColumn(
- _fbb,
- name__,
- values,
- metadata_type,
- metadata,
- user_metadata__);
-}
-
-struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
- typedef CTableBuilder Builder;
- enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_DESCRIPTION = 4,
- VT_NUM_ROWS = 6,
- VT_COLUMNS = 8,
- VT_VERSION = 10,
- VT_METADATA = 12
- };
- /// Some text (or a name) metadata about what the file is, optional
- const flatbuffers::String *description() const {
- return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
- }
- int64_t num_rows() const {
- return GetField<int64_t>(VT_NUM_ROWS, 0);
- }
- const flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *columns() const {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *>(VT_COLUMNS);
- }
- /// Version number of the Feather format
- ///
- /// Internal versions 0, 1, and 2: Implemented in Apache Arrow <= 0.16.0 and
- /// wesm/feather. Uses "custom" metadata defined in this file.
- int32_t version() const {
- return GetField<int32_t>(VT_VERSION, 0);
- }
- /// Table metadata (likely JSON), not yet used
- const flatbuffers::String *metadata() const {
- return GetPointer<const flatbuffers::String *>(VT_METADATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const {
- return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_DESCRIPTION) &&
- verifier.VerifyString(description()) &&
- VerifyField<int64_t>(verifier, VT_NUM_ROWS) &&
- VerifyOffset(verifier, VT_COLUMNS) &&
- verifier.VerifyVector(columns()) &&
- verifier.VerifyVectorOfTables(columns()) &&
- VerifyField<int32_t>(verifier, VT_VERSION) &&
- VerifyOffset(verifier, VT_METADATA) &&
- verifier.VerifyString(metadata()) &&
- verifier.EndTable();
- }
-};
-
-struct CTableBuilder {
- typedef CTable Table;
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_description(flatbuffers::Offset<flatbuffers::String> description) {
- fbb_.AddOffset(CTable::VT_DESCRIPTION, description);
- }
- void add_num_rows(int64_t num_rows) {
- fbb_.AddElement<int64_t>(CTable::VT_NUM_ROWS, num_rows, 0);
- }
- void add_columns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>> columns) {
- fbb_.AddOffset(CTable::VT_COLUMNS, columns);
- }
- void add_version(int32_t version) {
- fbb_.AddElement<int32_t>(CTable::VT_VERSION, version, 0);
- }
- void add_metadata(flatbuffers::Offset<flatbuffers::String> metadata) {
- fbb_.AddOffset(CTable::VT_METADATA, metadata);
- }
- explicit CTableBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb) {
- start_ = fbb_.StartTable();
- }
- CTableBuilder &operator=(const CTableBuilder &);
- flatbuffers::Offset<CTable> Finish() {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CTable>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CTable> CreateCTable(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- int64_t num_rows = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>> columns = 0,
- int32_t version = 0,
- flatbuffers::Offset<flatbuffers::String> metadata = 0) {
- CTableBuilder builder_(_fbb);
- builder_.add_num_rows(num_rows);
- builder_.add_metadata(metadata);
- builder_.add_version(version);
- builder_.add_columns(columns);
- builder_.add_description(description);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<CTable> CreateCTableDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const char *description = nullptr,
- int64_t num_rows = 0,
- const std::vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *columns = nullptr,
- int32_t version = 0,
- const char *metadata = nullptr) {
- auto description__ = description ? _fbb.CreateString(description) : 0;
- auto columns__ = columns ? _fbb.CreateVector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>(*columns) : 0;
- auto metadata__ = metadata ? _fbb.CreateString(metadata) : 0;
- return arrow::ipc::feather::fbs::CreateCTable(
- _fbb,
- description__,
- num_rows,
- columns__,
- version,
- metadata__);
-}
-
-inline bool VerifyTypeMetadata(flatbuffers::Verifier &verifier, const void *obj, TypeMetadata type) {
- switch (type) {
- case TypeMetadata::NONE: {
- return true;
- }
- case TypeMetadata::CategoryMetadata: {
- auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::CategoryMetadata *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case TypeMetadata::TimestampMetadata: {
- auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::TimestampMetadata *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case TypeMetadata::DateMetadata: {
- auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::DateMetadata *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case TypeMetadata::TimeMetadata: {
- auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::TimeMetadata *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default: return true;
- }
-}
-
-inline bool VerifyTypeMetadataVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
- if (!values || !types) return !values && !types;
- if (values->size() != types->size()) return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
- if (!VerifyTypeMetadata(
- verifier, values->Get(i), types->GetEnum<TypeMetadata>(i))) {
- return false;
- }
- }
- return true;
-}
-
-inline const arrow::ipc::feather::fbs::CTable *GetCTable(const void *buf) {
- return flatbuffers::GetRoot<arrow::ipc::feather::fbs::CTable>(buf);
-}
-
-inline const arrow::ipc::feather::fbs::CTable *GetSizePrefixedCTable(const void *buf) {
- return flatbuffers::GetSizePrefixedRoot<arrow::ipc::feather::fbs::CTable>(buf);
-}
-
-inline bool VerifyCTableBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifyBuffer<arrow::ipc::feather::fbs::CTable>(nullptr);
-}
-
-inline bool VerifySizePrefixedCTableBuffer(
- flatbuffers::Verifier &verifier) {
- return verifier.VerifySizePrefixedBuffer<arrow::ipc::feather::fbs::CTable>(nullptr);
-}
-
-inline void FinishCTableBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<arrow::ipc::feather::fbs::CTable> root) {
- fbb.Finish(root);
-}
-
-inline void FinishSizePrefixedCTableBuffer(
- flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<arrow::ipc::feather::fbs::CTable> root) {
- fbb.FinishSizePrefixed(root);
-}
-
-} // namespace fbs
-} // namespace feather
-} // namespace ipc
-} // namespace arrow
-
-#endif // FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
+#define FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace arrow {
+namespace ipc {
+namespace feather {
+namespace fbs {
+
+struct PrimitiveArray;
+struct PrimitiveArrayBuilder;
+
+struct CategoryMetadata;
+struct CategoryMetadataBuilder;
+
+struct TimestampMetadata;
+struct TimestampMetadataBuilder;
+
+struct DateMetadata;
+struct DateMetadataBuilder;
+
+struct TimeMetadata;
+struct TimeMetadataBuilder;
+
+struct Column;
+struct ColumnBuilder;
+
+struct CTable;
+struct CTableBuilder;
+
+/// Feather is an experimental serialization format implemented using
+/// techniques from Apache Arrow. It was created as a proof-of-concept of an
+/// interoperable file format for storing data frames originating in Python or
+/// R. It enabled the developers to sidestep some of the open design questions
+/// in Arrow from early 2016 and instead create something simple and useful for
+/// the intended use cases.
+enum class Type : int8_t {
+ BOOL = 0,
+ INT8 = 1,
+ INT16 = 2,
+ INT32 = 3,
+ INT64 = 4,
+ UINT8 = 5,
+ UINT16 = 6,
+ UINT32 = 7,
+ UINT64 = 8,
+ FLOAT = 9,
+ DOUBLE = 10,
+ UTF8 = 11,
+ BINARY = 12,
+ CATEGORY = 13,
+ TIMESTAMP = 14,
+ DATE = 15,
+ TIME = 16,
+ LARGE_UTF8 = 17,
+ LARGE_BINARY = 18,
+ MIN = BOOL,
+ MAX = LARGE_BINARY
+};
+
+inline const Type (&EnumValuesType())[19] {
+ static const Type values[] = {
+ Type::BOOL,
+ Type::INT8,
+ Type::INT16,
+ Type::INT32,
+ Type::INT64,
+ Type::UINT8,
+ Type::UINT16,
+ Type::UINT32,
+ Type::UINT64,
+ Type::FLOAT,
+ Type::DOUBLE,
+ Type::UTF8,
+ Type::BINARY,
+ Type::CATEGORY,
+ Type::TIMESTAMP,
+ Type::DATE,
+ Type::TIME,
+ Type::LARGE_UTF8,
+ Type::LARGE_BINARY
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesType() {
+ static const char * const names[20] = {
+ "BOOL",
+ "INT8",
+ "INT16",
+ "INT32",
+ "INT64",
+ "UINT8",
+ "UINT16",
+ "UINT32",
+ "UINT64",
+ "FLOAT",
+ "DOUBLE",
+ "UTF8",
+ "BINARY",
+ "CATEGORY",
+ "TIMESTAMP",
+ "DATE",
+ "TIME",
+ "LARGE_UTF8",
+ "LARGE_BINARY",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameType(Type e) {
+ if (flatbuffers::IsOutRange(e, Type::BOOL, Type::LARGE_BINARY)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesType()[index];
+}
+
+enum class Encoding : int8_t {
+ PLAIN = 0,
+ /// Data is stored dictionary-encoded
+ /// dictionary size: <INT32 Dictionary size>
+ /// dictionary data: <TYPE primitive array>
+ /// dictionary index: <INT32 primitive array>
+ ///
+ /// TODO: do we care about storing the index values in a smaller typeclass
+ DICTIONARY = 1,
+ MIN = PLAIN,
+ MAX = DICTIONARY
+};
+
+inline const Encoding (&EnumValuesEncoding())[2] {
+ static const Encoding values[] = {
+ Encoding::PLAIN,
+ Encoding::DICTIONARY
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesEncoding() {
+ static const char * const names[3] = {
+ "PLAIN",
+ "DICTIONARY",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameEncoding(Encoding e) {
+ if (flatbuffers::IsOutRange(e, Encoding::PLAIN, Encoding::DICTIONARY)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesEncoding()[index];
+}
+
+enum class TimeUnit : int8_t {
+ SECOND = 0,
+ MILLISECOND = 1,
+ MICROSECOND = 2,
+ NANOSECOND = 3,
+ MIN = SECOND,
+ MAX = NANOSECOND
+};
+
+inline const TimeUnit (&EnumValuesTimeUnit())[4] {
+ static const TimeUnit values[] = {
+ TimeUnit::SECOND,
+ TimeUnit::MILLISECOND,
+ TimeUnit::MICROSECOND,
+ TimeUnit::NANOSECOND
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesTimeUnit() {
+ static const char * const names[5] = {
+ "SECOND",
+ "MILLISECOND",
+ "MICROSECOND",
+ "NANOSECOND",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameTimeUnit(TimeUnit e) {
+ if (flatbuffers::IsOutRange(e, TimeUnit::SECOND, TimeUnit::NANOSECOND)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesTimeUnit()[index];
+}
+
+enum class TypeMetadata : uint8_t {
+ NONE = 0,
+ CategoryMetadata = 1,
+ TimestampMetadata = 2,
+ DateMetadata = 3,
+ TimeMetadata = 4,
+ MIN = NONE,
+ MAX = TimeMetadata
+};
+
+inline const TypeMetadata (&EnumValuesTypeMetadata())[5] {
+ static const TypeMetadata values[] = {
+ TypeMetadata::NONE,
+ TypeMetadata::CategoryMetadata,
+ TypeMetadata::TimestampMetadata,
+ TypeMetadata::DateMetadata,
+ TypeMetadata::TimeMetadata
+ };
+ return values;
+}
+
+inline const char * const *EnumNamesTypeMetadata() {
+ static const char * const names[6] = {
+ "NONE",
+ "CategoryMetadata",
+ "TimestampMetadata",
+ "DateMetadata",
+ "TimeMetadata",
+ nullptr
+ };
+ return names;
+}
+
+inline const char *EnumNameTypeMetadata(TypeMetadata e) {
+ if (flatbuffers::IsOutRange(e, TypeMetadata::NONE, TypeMetadata::TimeMetadata)) return "";
+ const size_t index = static_cast<size_t>(e);
+ return EnumNamesTypeMetadata()[index];
+}
+
+template<typename T> struct TypeMetadataTraits {
+ static const TypeMetadata enum_value = TypeMetadata::NONE;
+};
+
+template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::CategoryMetadata> {
+ static const TypeMetadata enum_value = TypeMetadata::CategoryMetadata;
+};
+
+template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::TimestampMetadata> {
+ static const TypeMetadata enum_value = TypeMetadata::TimestampMetadata;
+};
+
+template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::DateMetadata> {
+ static const TypeMetadata enum_value = TypeMetadata::DateMetadata;
+};
+
+template<> struct TypeMetadataTraits<arrow::ipc::feather::fbs::TimeMetadata> {
+ static const TypeMetadata enum_value = TypeMetadata::TimeMetadata;
+};
+
+bool VerifyTypeMetadata(flatbuffers::Verifier &verifier, const void *obj, TypeMetadata type);
+bool VerifyTypeMetadataVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+struct PrimitiveArray FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef PrimitiveArrayBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_TYPE = 4,
+ VT_ENCODING = 6,
+ VT_OFFSET = 8,
+ VT_LENGTH = 10,
+ VT_NULL_COUNT = 12,
+ VT_TOTAL_BYTES = 14
+ };
+ arrow::ipc::feather::fbs::Type type() const {
+ return static_cast<arrow::ipc::feather::fbs::Type>(GetField<int8_t>(VT_TYPE, 0));
+ }
+ arrow::ipc::feather::fbs::Encoding encoding() const {
+ return static_cast<arrow::ipc::feather::fbs::Encoding>(GetField<int8_t>(VT_ENCODING, 0));
+ }
+ /// Relative memory offset of the start of the array data excluding the size
+ /// of the metadata
+ int64_t offset() const {
+ return GetField<int64_t>(VT_OFFSET, 0);
+ }
+ /// The number of logical values in the array
+ int64_t length() const {
+ return GetField<int64_t>(VT_LENGTH, 0);
+ }
+ /// The number of observed nulls
+ int64_t null_count() const {
+ return GetField<int64_t>(VT_NULL_COUNT, 0);
+ }
+ /// The total size of the actual data in the file
+ int64_t total_bytes() const {
+ return GetField<int64_t>(VT_TOTAL_BYTES, 0);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_TYPE) &&
+ VerifyField<int8_t>(verifier, VT_ENCODING) &&
+ VerifyField<int64_t>(verifier, VT_OFFSET) &&
+ VerifyField<int64_t>(verifier, VT_LENGTH) &&
+ VerifyField<int64_t>(verifier, VT_NULL_COUNT) &&
+ VerifyField<int64_t>(verifier, VT_TOTAL_BYTES) &&
+ verifier.EndTable();
+ }
+};
+
+struct PrimitiveArrayBuilder {
+ typedef PrimitiveArray Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_type(arrow::ipc::feather::fbs::Type type) {
+ fbb_.AddElement<int8_t>(PrimitiveArray::VT_TYPE, static_cast<int8_t>(type), 0);
+ }
+ void add_encoding(arrow::ipc::feather::fbs::Encoding encoding) {
+ fbb_.AddElement<int8_t>(PrimitiveArray::VT_ENCODING, static_cast<int8_t>(encoding), 0);
+ }
+ void add_offset(int64_t offset) {
+ fbb_.AddElement<int64_t>(PrimitiveArray::VT_OFFSET, offset, 0);
+ }
+ void add_length(int64_t length) {
+ fbb_.AddElement<int64_t>(PrimitiveArray::VT_LENGTH, length, 0);
+ }
+ void add_null_count(int64_t null_count) {
+ fbb_.AddElement<int64_t>(PrimitiveArray::VT_NULL_COUNT, null_count, 0);
+ }
+ void add_total_bytes(int64_t total_bytes) {
+ fbb_.AddElement<int64_t>(PrimitiveArray::VT_TOTAL_BYTES, total_bytes, 0);
+ }
+ explicit PrimitiveArrayBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ PrimitiveArrayBuilder &operator=(const PrimitiveArrayBuilder &);
+ flatbuffers::Offset<PrimitiveArray> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PrimitiveArray>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PrimitiveArray> CreatePrimitiveArray(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ arrow::ipc::feather::fbs::Type type = arrow::ipc::feather::fbs::Type::BOOL,
+ arrow::ipc::feather::fbs::Encoding encoding = arrow::ipc::feather::fbs::Encoding::PLAIN,
+ int64_t offset = 0,
+ int64_t length = 0,
+ int64_t null_count = 0,
+ int64_t total_bytes = 0) {
+ PrimitiveArrayBuilder builder_(_fbb);
+ builder_.add_total_bytes(total_bytes);
+ builder_.add_null_count(null_count);
+ builder_.add_length(length);
+ builder_.add_offset(offset);
+ builder_.add_encoding(encoding);
+ builder_.add_type(type);
+ return builder_.Finish();
+}
+
+struct CategoryMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef CategoryMetadataBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_LEVELS = 4,
+ VT_ORDERED = 6
+ };
+ /// The category codes are presumed to be integers that are valid indexes into
+ /// the levels array
+ const arrow::ipc::feather::fbs::PrimitiveArray *levels() const {
+ return GetPointer<const arrow::ipc::feather::fbs::PrimitiveArray *>(VT_LEVELS);
+ }
+ bool ordered() const {
+ return GetField<uint8_t>(VT_ORDERED, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffset(verifier, VT_LEVELS) &&
+ verifier.VerifyTable(levels()) &&
+ VerifyField<uint8_t>(verifier, VT_ORDERED) &&
+ verifier.EndTable();
+ }
+};
+
+struct CategoryMetadataBuilder {
+ typedef CategoryMetadata Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_levels(flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> levels) {
+ fbb_.AddOffset(CategoryMetadata::VT_LEVELS, levels);
+ }
+ void add_ordered(bool ordered) {
+ fbb_.AddElement<uint8_t>(CategoryMetadata::VT_ORDERED, static_cast<uint8_t>(ordered), 0);
+ }
+ explicit CategoryMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ CategoryMetadataBuilder &operator=(const CategoryMetadataBuilder &);
+ flatbuffers::Offset<CategoryMetadata> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CategoryMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CategoryMetadata> CreateCategoryMetadata(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> levels = 0,
+ bool ordered = false) {
+ CategoryMetadataBuilder builder_(_fbb);
+ builder_.add_levels(levels);
+ builder_.add_ordered(ordered);
+ return builder_.Finish();
+}
+
+struct TimestampMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TimestampMetadataBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4,
+ VT_TIMEZONE = 6
+ };
+ arrow::ipc::feather::fbs::TimeUnit unit() const {
+ return static_cast<arrow::ipc::feather::fbs::TimeUnit>(GetField<int8_t>(VT_UNIT, 0));
+ }
+ /// Timestamp data is assumed to be UTC, but the time zone is stored here for
+ /// presentation as localized
+ const flatbuffers::String *timezone() const {
+ return GetPointer<const flatbuffers::String *>(VT_TIMEZONE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_UNIT) &&
+ VerifyOffset(verifier, VT_TIMEZONE) &&
+ verifier.VerifyString(timezone()) &&
+ verifier.EndTable();
+ }
+};
+
+struct TimestampMetadataBuilder {
+ typedef TimestampMetadata Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(arrow::ipc::feather::fbs::TimeUnit unit) {
+ fbb_.AddElement<int8_t>(TimestampMetadata::VT_UNIT, static_cast<int8_t>(unit), 0);
+ }
+ void add_timezone(flatbuffers::Offset<flatbuffers::String> timezone) {
+ fbb_.AddOffset(TimestampMetadata::VT_TIMEZONE, timezone);
+ }
+ explicit TimestampMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TimestampMetadataBuilder &operator=(const TimestampMetadataBuilder &);
+ flatbuffers::Offset<TimestampMetadata> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TimestampMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TimestampMetadata> CreateTimestampMetadata(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND,
+ flatbuffers::Offset<flatbuffers::String> timezone = 0) {
+ TimestampMetadataBuilder builder_(_fbb);
+ builder_.add_timezone(timezone);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TimestampMetadata> CreateTimestampMetadataDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND,
+ const char *timezone = nullptr) {
+ auto timezone__ = timezone ? _fbb.CreateString(timezone) : 0;
+ return arrow::ipc::feather::fbs::CreateTimestampMetadata(
+ _fbb,
+ unit,
+ timezone__);
+}
+
+struct DateMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef DateMetadataBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ verifier.EndTable();
+ }
+};
+
+struct DateMetadataBuilder {
+ typedef DateMetadata Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit DateMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ DateMetadataBuilder &operator=(const DateMetadataBuilder &);
+ flatbuffers::Offset<DateMetadata> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DateMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DateMetadata> CreateDateMetadata(
+ flatbuffers::FlatBufferBuilder &_fbb) {
+ DateMetadataBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct TimeMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef TimeMetadataBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_UNIT = 4
+ };
+ arrow::ipc::feather::fbs::TimeUnit unit() const {
+ return static_cast<arrow::ipc::feather::fbs::TimeUnit>(GetField<int8_t>(VT_UNIT, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_UNIT) &&
+ verifier.EndTable();
+ }
+};
+
+struct TimeMetadataBuilder {
+ typedef TimeMetadata Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_unit(arrow::ipc::feather::fbs::TimeUnit unit) {
+ fbb_.AddElement<int8_t>(TimeMetadata::VT_UNIT, static_cast<int8_t>(unit), 0);
+ }
+ explicit TimeMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ TimeMetadataBuilder &operator=(const TimeMetadataBuilder &);
+ flatbuffers::Offset<TimeMetadata> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TimeMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TimeMetadata> CreateTimeMetadata(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ arrow::ipc::feather::fbs::TimeUnit unit = arrow::ipc::feather::fbs::TimeUnit::SECOND) {
+ TimeMetadataBuilder builder_(_fbb);
+ builder_.add_unit(unit);
+ return builder_.Finish();
+}
+
+struct Column FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef ColumnBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_NAME = 4,
+ VT_VALUES = 6,
+ VT_METADATA_TYPE = 8,
+ VT_METADATA = 10,
+ VT_USER_METADATA = 12
+ };
+ const flatbuffers::String *name() const {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ const arrow::ipc::feather::fbs::PrimitiveArray *values() const {
+ return GetPointer<const arrow::ipc::feather::fbs::PrimitiveArray *>(VT_VALUES);
+ }
+ arrow::ipc::feather::fbs::TypeMetadata metadata_type() const {
+ return static_cast<arrow::ipc::feather::fbs::TypeMetadata>(GetField<uint8_t>(VT_METADATA_TYPE, 0));
+ }
+ const void *metadata() const {
+ return GetPointer<const void *>(VT_METADATA);
+ }
+ template<typename T> const T *metadata_as() const;
+ const arrow::ipc::feather::fbs::CategoryMetadata *metadata_as_CategoryMetadata() const {
+ return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::CategoryMetadata ? static_cast<const arrow::ipc::feather::fbs::CategoryMetadata *>(metadata()) : nullptr;
+ }
+ const arrow::ipc::feather::fbs::TimestampMetadata *metadata_as_TimestampMetadata() const {
+ return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::TimestampMetadata ? static_cast<const arrow::ipc::feather::fbs::TimestampMetadata *>(metadata()) : nullptr;
+ }
+ const arrow::ipc::feather::fbs::DateMetadata *metadata_as_DateMetadata() const {
+ return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::DateMetadata ? static_cast<const arrow::ipc::feather::fbs::DateMetadata *>(metadata()) : nullptr;
+ }
+ const arrow::ipc::feather::fbs::TimeMetadata *metadata_as_TimeMetadata() const {
+ return metadata_type() == arrow::ipc::feather::fbs::TypeMetadata::TimeMetadata ? static_cast<const arrow::ipc::feather::fbs::TimeMetadata *>(metadata()) : nullptr;
+ }
+ /// This should (probably) be JSON
+ const flatbuffers::String *user_metadata() const {
+ return GetPointer<const flatbuffers::String *>(VT_USER_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) &&
+ VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyTable(values()) &&
+ VerifyField<uint8_t>(verifier, VT_METADATA_TYPE) &&
+ VerifyOffset(verifier, VT_METADATA) &&
+ VerifyTypeMetadata(verifier, metadata(), metadata_type()) &&
+ VerifyOffset(verifier, VT_USER_METADATA) &&
+ verifier.VerifyString(user_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+template<> inline const arrow::ipc::feather::fbs::CategoryMetadata *Column::metadata_as<arrow::ipc::feather::fbs::CategoryMetadata>() const {
+ return metadata_as_CategoryMetadata();
+}
+
+template<> inline const arrow::ipc::feather::fbs::TimestampMetadata *Column::metadata_as<arrow::ipc::feather::fbs::TimestampMetadata>() const {
+ return metadata_as_TimestampMetadata();
+}
+
+template<> inline const arrow::ipc::feather::fbs::DateMetadata *Column::metadata_as<arrow::ipc::feather::fbs::DateMetadata>() const {
+ return metadata_as_DateMetadata();
+}
+
+template<> inline const arrow::ipc::feather::fbs::TimeMetadata *Column::metadata_as<arrow::ipc::feather::fbs::TimeMetadata>() const {
+ return metadata_as_TimeMetadata();
+}
+
+struct ColumnBuilder {
+ typedef Column Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+ fbb_.AddOffset(Column::VT_NAME, name);
+ }
+ void add_values(flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values) {
+ fbb_.AddOffset(Column::VT_VALUES, values);
+ }
+ void add_metadata_type(arrow::ipc::feather::fbs::TypeMetadata metadata_type) {
+ fbb_.AddElement<uint8_t>(Column::VT_METADATA_TYPE, static_cast<uint8_t>(metadata_type), 0);
+ }
+ void add_metadata(flatbuffers::Offset<void> metadata) {
+ fbb_.AddOffset(Column::VT_METADATA, metadata);
+ }
+ void add_user_metadata(flatbuffers::Offset<flatbuffers::String> user_metadata) {
+ fbb_.AddOffset(Column::VT_USER_METADATA, user_metadata);
+ }
+ explicit ColumnBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ ColumnBuilder &operator=(const ColumnBuilder &);
+ flatbuffers::Offset<Column> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Column>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Column> CreateColumn(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values = 0,
+ arrow::ipc::feather::fbs::TypeMetadata metadata_type = arrow::ipc::feather::fbs::TypeMetadata::NONE,
+ flatbuffers::Offset<void> metadata = 0,
+ flatbuffers::Offset<flatbuffers::String> user_metadata = 0) {
+ ColumnBuilder builder_(_fbb);
+ builder_.add_user_metadata(user_metadata);
+ builder_.add_metadata(metadata);
+ builder_.add_values(values);
+ builder_.add_name(name);
+ builder_.add_metadata_type(metadata_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Column> CreateColumnDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ flatbuffers::Offset<arrow::ipc::feather::fbs::PrimitiveArray> values = 0,
+ arrow::ipc::feather::fbs::TypeMetadata metadata_type = arrow::ipc::feather::fbs::TypeMetadata::NONE,
+ flatbuffers::Offset<void> metadata = 0,
+ const char *user_metadata = nullptr) {
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ auto user_metadata__ = user_metadata ? _fbb.CreateString(user_metadata) : 0;
+ return arrow::ipc::feather::fbs::CreateColumn(
+ _fbb,
+ name__,
+ values,
+ metadata_type,
+ metadata,
+ user_metadata__);
+}
+
+struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+ typedef CTableBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+ VT_DESCRIPTION = 4,
+ VT_NUM_ROWS = 6,
+ VT_COLUMNS = 8,
+ VT_VERSION = 10,
+ VT_METADATA = 12
+ };
+ /// Some text (or a name) metadata about what the file is, optional
+ const flatbuffers::String *description() const {
+ return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+ }
+ int64_t num_rows() const {
+ return GetField<int64_t>(VT_NUM_ROWS, 0);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *columns() const {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *>(VT_COLUMNS);
+ }
+ /// Version number of the Feather format
+ ///
+ /// Internal versions 0, 1, and 2: Implemented in Apache Arrow <= 0.16.0 and
+ /// wesm/feather. Uses "custom" metadata defined in this file.
+ int32_t version() const {
+ return GetField<int32_t>(VT_VERSION, 0);
+ }
+ /// Table metadata (likely JSON), not yet used
+ const flatbuffers::String *metadata() const {
+ return GetPointer<const flatbuffers::String *>(VT_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const {
+ return VerifyTableStart(verifier) &&
+ VerifyOffset(verifier, VT_DESCRIPTION) &&
+ verifier.VerifyString(description()) &&
+ VerifyField<int64_t>(verifier, VT_NUM_ROWS) &&
+ VerifyOffset(verifier, VT_COLUMNS) &&
+ verifier.VerifyVector(columns()) &&
+ verifier.VerifyVectorOfTables(columns()) &&
+ VerifyField<int32_t>(verifier, VT_VERSION) &&
+ VerifyOffset(verifier, VT_METADATA) &&
+ verifier.VerifyString(metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+struct CTableBuilder {
+ typedef CTable Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_description(flatbuffers::Offset<flatbuffers::String> description) {
+ fbb_.AddOffset(CTable::VT_DESCRIPTION, description);
+ }
+ void add_num_rows(int64_t num_rows) {
+ fbb_.AddElement<int64_t>(CTable::VT_NUM_ROWS, num_rows, 0);
+ }
+ void add_columns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>> columns) {
+ fbb_.AddOffset(CTable::VT_COLUMNS, columns);
+ }
+ void add_version(int32_t version) {
+ fbb_.AddElement<int32_t>(CTable::VT_VERSION, version, 0);
+ }
+ void add_metadata(flatbuffers::Offset<flatbuffers::String> metadata) {
+ fbb_.AddOffset(CTable::VT_METADATA, metadata);
+ }
+ explicit CTableBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb) {
+ start_ = fbb_.StartTable();
+ }
+ CTableBuilder &operator=(const CTableBuilder &);
+ flatbuffers::Offset<CTable> Finish() {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CTable>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CTable> CreateCTable(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ int64_t num_rows = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>> columns = 0,
+ int32_t version = 0,
+ flatbuffers::Offset<flatbuffers::String> metadata = 0) {
+ CTableBuilder builder_(_fbb);
+ builder_.add_num_rows(num_rows);
+ builder_.add_metadata(metadata);
+ builder_.add_version(version);
+ builder_.add_columns(columns);
+ builder_.add_description(description);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<CTable> CreateCTableDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const char *description = nullptr,
+ int64_t num_rows = 0,
+ const std::vector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>> *columns = nullptr,
+ int32_t version = 0,
+ const char *metadata = nullptr) {
+ auto description__ = description ? _fbb.CreateString(description) : 0;
+ auto columns__ = columns ? _fbb.CreateVector<flatbuffers::Offset<arrow::ipc::feather::fbs::Column>>(*columns) : 0;
+ auto metadata__ = metadata ? _fbb.CreateString(metadata) : 0;
+ return arrow::ipc::feather::fbs::CreateCTable(
+ _fbb,
+ description__,
+ num_rows,
+ columns__,
+ version,
+ metadata__);
+}
+
+inline bool VerifyTypeMetadata(flatbuffers::Verifier &verifier, const void *obj, TypeMetadata type) {
+ switch (type) {
+ case TypeMetadata::NONE: {
+ return true;
+ }
+ case TypeMetadata::CategoryMetadata: {
+ auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::CategoryMetadata *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case TypeMetadata::TimestampMetadata: {
+ auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::TimestampMetadata *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case TypeMetadata::DateMetadata: {
+ auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::DateMetadata *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case TypeMetadata::TimeMetadata: {
+ auto ptr = reinterpret_cast<const arrow::ipc::feather::fbs::TimeMetadata *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default: return true;
+ }
+}
+
+inline bool VerifyTypeMetadataVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+ if (!values || !types) return !values && !types;
+ if (values->size() != types->size()) return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+ if (!VerifyTypeMetadata(
+ verifier, values->Get(i), types->GetEnum<TypeMetadata>(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const arrow::ipc::feather::fbs::CTable *GetCTable(const void *buf) {
+ return flatbuffers::GetRoot<arrow::ipc::feather::fbs::CTable>(buf);
+}
+
+inline const arrow::ipc::feather::fbs::CTable *GetSizePrefixedCTable(const void *buf) {
+ return flatbuffers::GetSizePrefixedRoot<arrow::ipc::feather::fbs::CTable>(buf);
+}
+
+inline bool VerifyCTableBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifyBuffer<arrow::ipc::feather::fbs::CTable>(nullptr);
+}
+
+inline bool VerifySizePrefixedCTableBuffer(
+ flatbuffers::Verifier &verifier) {
+ return verifier.VerifySizePrefixedBuffer<arrow::ipc::feather::fbs::CTable>(nullptr);
+}
+
+inline void FinishCTableBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<arrow::ipc::feather::fbs::CTable> root) {
+ fbb.Finish(root);
+}
+
+inline void FinishSizePrefixedCTableBuffer(
+ flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<arrow::ipc::feather::fbs::CTable> root) {
+ fbb.FinishSizePrefixed(root);
+}
+
+} // namespace fbs
+} // namespace feather
+} // namespace ipc
+} // namespace arrow
+
+#endif // FLATBUFFERS_GENERATED_FEATHER_ARROW_IPC_FEATHER_FBS_H_
diff --git a/contrib/libs/apache/arrow/src/arrow/util/config.h b/contrib/libs/apache/arrow/src/arrow/util/config.h
index 2d46017e47e..eecc591a8e5 100644
--- a/contrib/libs/apache/arrow/src/arrow/util/config.h
+++ b/contrib/libs/apache/arrow/src/arrow/util/config.h
@@ -1,39 +1,39 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
#define ARROW_VERSION_MAJOR 5
-#define ARROW_VERSION_MINOR 0
-#define ARROW_VERSION_PATCH 0
-#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
-
+#define ARROW_VERSION_MINOR 0
+#define ARROW_VERSION_PATCH 0
+#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
+
#define ARROW_VERSION_STRING "5.0.0"
-
+
#define ARROW_SO_VERSION "500"
#define ARROW_FULL_SO_VERSION "500.0.0"
-
-#define ARROW_CXX_COMPILER_ID "GNU"
+
+#define ARROW_CXX_COMPILER_ID "GNU"
#define ARROW_CXX_COMPILER_VERSION "10.2.0"
-#define ARROW_CXX_COMPILER_FLAGS " -fdiagnostics-color=always -O3 -DNDEBUG"
-
-#define ARROW_GIT_ID ""
-#define ARROW_GIT_DESCRIPTION ""
-
-#define ARROW_PACKAGE_KIND ""
-
+#define ARROW_CXX_COMPILER_FLAGS " -fdiagnostics-color=always -O3 -DNDEBUG"
+
+#define ARROW_GIT_ID ""
+#define ARROW_GIT_DESCRIPTION ""
+
+#define ARROW_PACKAGE_KIND ""
+
#define ARROW_COMPUTE
#define ARROW_CSV
/* #undef ARROW_DATASET */
@@ -42,9 +42,9 @@
#define ARROW_IPC
/* #undef ARROW_JSON */
-/* #undef ARROW_S3 */
+/* #undef ARROW_S3 */
#ifdef __GNUC__
#define ARROW_USE_NATIVE_INT128
#endif
-
-/* #undef GRPCPP_PP_INCLUDE */
+
+/* #undef GRPCPP_PP_INCLUDE */
diff --git a/contrib/libs/apache/arrow/ya.make b/contrib/libs/apache/arrow/ya.make
index 27b9235d9e9..b6e4f8f4c67 100644
--- a/contrib/libs/apache/arrow/ya.make
+++ b/contrib/libs/apache/arrow/ya.make
@@ -1,14 +1,14 @@
# Generated by devtools/yamaker from nixpkgs 3322db8e36d0b32700737d8de7315bd9e9c2b21a.
-
-LIBRARY()
-
+
+LIBRARY()
+
OWNER(
primorial
g:cpp-contrib
)
-
+
VERSION(5.0.0)
-
+
ORIGINAL_SOURCE(https://github.com/apache/arrow/archive/apache-arrow-5.0.0.tar.gz)
LICENSE(
@@ -27,65 +27,65 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-PEERDIR(
+PEERDIR(
contrib/libs/apache/orc
- contrib/libs/brotli/dec
- contrib/libs/brotli/enc
- contrib/libs/double-conversion
- contrib/libs/lz4
+ contrib/libs/brotli/dec
+ contrib/libs/brotli/enc
+ contrib/libs/double-conversion
+ contrib/libs/lz4
contrib/libs/re2
- contrib/libs/snappy
+ contrib/libs/snappy
contrib/libs/utf8proc
- contrib/libs/xxhash
- contrib/libs/zlib
- contrib/libs/zstd
+ contrib/libs/xxhash
+ contrib/libs/zlib
+ contrib/libs/zstd
contrib/restricted/boost
contrib/restricted/fast_float
contrib/restricted/thrift
contrib/restricted/uriparser
-)
-
-ADDINCL(
+)
+
+ADDINCL(
GLOBAL contrib/libs/apache/arrow/cpp/src
GLOBAL contrib/libs/apache/arrow/src
contrib/libs/apache/arrow/cpp/src/generated
contrib/libs/apache/orc/c++/include
- contrib/libs/flatbuffers/include
- contrib/libs/lz4
+ contrib/libs/flatbuffers/include
+ contrib/libs/lz4
contrib/libs/re2
contrib/libs/utf8proc
contrib/libs/zstd/include
contrib/restricted/boost
-)
-
-NO_COMPILER_WARNINGS()
-
-NO_UTIL()
-
-CFLAGS(
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+CFLAGS(
GLOBAL -DARROW_STATIC
-DARROW_EXPORTING
- -DARROW_WITH_BROTLI
- -DARROW_WITH_LZ4
+ -DARROW_WITH_BROTLI
+ -DARROW_WITH_LZ4
-DARROW_WITH_RE2
- -DARROW_WITH_SNAPPY
- -DARROW_WITH_TIMING_TESTS
+ -DARROW_WITH_SNAPPY
+ -DARROW_WITH_TIMING_TESTS
-DARROW_WITH_UTF8PROC
- -DARROW_WITH_ZLIB
- -DARROW_WITH_ZSTD
+ -DARROW_WITH_ZLIB
+ -DARROW_WITH_ZSTD
-DHAVE_INTTYPES_H
-DHAVE_NETDB_H
-DPARQUET_EXPORTING
- -DURI_STATIC_BUILD
-)
-
+ -DURI_STATIC_BUILD
+)
+
IF (NOT OS_WINDOWS)
CFLAGS(
-DHAVE_NETINET_IN_H
)
ENDIF()
-SRCS(
+SRCS(
cpp/src/arrow/adapters/orc/adapter.cc
cpp/src/arrow/adapters/orc/adapter_util.cc
cpp/src/arrow/array/array_base.cc
@@ -272,6 +272,6 @@ SRCS(
cpp/src/parquet/stream_reader.cc
cpp/src/parquet/stream_writer.cc
cpp/src/parquet/types.cc
-)
-
-END()
+)
+
+END()