diff options
| author | robot-piglet <[email protected]> | 2025-08-14 11:26:15 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2025-08-14 12:06:36 +0300 |
| commit | dc2bf727ea4698fa382f0f8623a8854c4900e212 (patch) | |
| tree | a621e92060fd7560066f33a323b4b8aca34f1e36 /contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h | |
| parent | 322ee7d149464c6f18d6a330d937227cb022b9f3 (diff) | |
Intermediate changes
commit_hash:746e9b78ab4c78ba4f30511f1fa9330c0d56a406
Diffstat (limited to 'contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h')
| -rw-r--r-- | contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h b/contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h new file mode 100644 index 00000000000..efba1195683 --- /dev/null +++ b/contrib/libs/apache/arrow_next/cpp/src/arrow/array/diff.h @@ -0,0 +1,77 @@ +#pragma clang system_header +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> +#include <functional> +#include <iosfwd> +#include <memory> + +#include "arrow/array/array_base.h" +#include "arrow/array/array_nested.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/type.h" +#include "arrow/util/visibility.h" + +namespace arrow20 { + +/// \brief Compare two arrays, returning an edit script which expresses the difference +/// between them +/// +/// An edit script is an array of struct(insert: bool, run_length: int64_t). +/// Each element of "insert" determines whether an element was inserted into (true) +/// or deleted from (false) base. Each insertion or deletion is followed by a run of +/// elements which are unchanged from base to target; the length of this run is stored +/// in "run_length". (Note that the edit script begins and ends with a run of shared +/// elements but both fields of the struct must have the same length. To accommodate this +/// the first element of "insert" should be ignored.) +/// +/// For example for base "hlloo" and target "hello", the edit script would be +/// [ +/// {"insert": false, "run_length": 1}, // leading run of length 1 ("h") +/// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo") +/// {"insert": false, "run_length": 0} // delete("o") then an empty run +/// ] +/// +/// Diffing arrays containing nulls is not currently supported. +/// +/// \param[in] base baseline for comparison +/// \param[in] target an array of identical type to base whose elements differ from base's +/// \param[in] pool memory to store the result will be allocated from this memory pool +/// \return an edit script array which can be applied to base to produce target +ARROW_EXPORT +Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target, + MemoryPool* pool = default_memory_pool()); + +/// \brief visitor interface for easy traversal of an edit script +/// +/// visitor will be called for each hunk of insertions and deletions. +ARROW_EXPORT Status VisitEditScript( + const Array& edits, + const std::function<Status(int64_t delete_begin, int64_t delete_end, + int64_t insert_begin, int64_t insert_end)>& visitor); + +/// \brief return a function which will format an edit script in unified +/// diff format to os, given base and target arrays of type +ARROW_EXPORT Result< + std::function<Status(const Array& edits, const Array& base, const Array& target)>> +MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os); + +} // namespace arrow20 |
