aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h
diff options
context:
space:
mode:
authornkozlovskiy <nmk@ydb.tech>2023-10-11 19:11:46 +0300
committernkozlovskiy <nmk@ydb.tech>2023-10-11 19:33:28 +0300
commit61b3971447e473726d6cdb23fc298e457b4d973c (patch)
treee2a2a864bb7717f7ae6138f6a3194a254dd2c7bb /contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h
parenta674dc57d88d43c2e8e90a6084d5d2c988e0402c (diff)
downloadydb-61b3971447e473726d6cdb23fc298e457b4d973c.tar.gz
add sanitizers dependencies
Diffstat (limited to 'contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h')
-rw-r--r--contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h159
1 files changed, 159 insertions, 0 deletions
diff --git a/contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h b/contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h
new file mode 100644
index 0000000000..42acfbdcea
--- /dev/null
+++ b/contrib/libs/clang14-rt/lib/sanitizer_common/sanitizer_lzw.h
@@ -0,0 +1,159 @@
+//===-- sanitizer_lzw.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Lempel–Ziv–Welch encoding/decoding
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_LZW_H
+#define SANITIZER_LZW_H
+
+#include "sanitizer_dense_map.h"
+
+namespace __sanitizer {
+
+using LzwCodeType = u32;
+
+template <class T, class ItIn, class ItOut>
+ItOut LzwEncode(ItIn begin, ItIn end, ItOut out) {
+ using Substring =
+ detail::DenseMapPair<LzwCodeType /* Prefix */, T /* Next input */>;
+
+ // Sentinel value for substrings of len 1.
+ static constexpr LzwCodeType kNoPrefix =
+ Min(DenseMapInfo<Substring>::getEmptyKey().first,
+ DenseMapInfo<Substring>::getTombstoneKey().first) -
+ 1;
+ DenseMap<Substring, LzwCodeType> prefix_to_code;
+ {
+ // Add all substring of len 1 as initial dictionary.
+ InternalMmapVector<T> dict_len1;
+ for (auto it = begin; it != end; ++it)
+ if (prefix_to_code.try_emplace({kNoPrefix, *it}, 0).second)
+ dict_len1.push_back(*it);
+
+ // Slightly helps with later delta encoding.
+ Sort(dict_len1.data(), dict_len1.size());
+
+ // For large sizeof(T) we have to store dict_len1. Smaller types like u8 can
+ // just generate them.
+ *out = dict_len1.size();
+ ++out;
+
+ for (uptr i = 0; i != dict_len1.size(); ++i) {
+ // Remap after the Sort.
+ prefix_to_code[{kNoPrefix, dict_len1[i]}] = i;
+ *out = dict_len1[i];
+ ++out;
+ }
+ CHECK_EQ(prefix_to_code.size(), dict_len1.size());
+ }
+
+ if (begin == end)
+ return out;
+
+ // Main LZW encoding loop.
+ LzwCodeType match = prefix_to_code.find({kNoPrefix, *begin})->second;
+ ++begin;
+ for (auto it = begin; it != end; ++it) {
+ // Extend match with the new item.
+ auto ins = prefix_to_code.try_emplace({match, *it}, prefix_to_code.size());
+ if (ins.second) {
+ // This is a new substring, but emit the code for the current match
+ // (before extend). This allows LZW decoder to recover the dictionary.
+ *out = match;
+ ++out;
+ // Reset the match to a single item, which must be already in the map.
+ match = prefix_to_code.find({kNoPrefix, *it})->second;
+ } else {
+ // Already known, use as the current match.
+ match = ins.first->second;
+ }
+ }
+
+ *out = match;
+ ++out;
+
+ return out;
+}
+
+template <class T, class ItIn, class ItOut>
+ItOut LzwDecode(ItIn begin, ItIn end, ItOut out) {
+ if (begin == end)
+ return out;
+
+ // Load dictionary of len 1 substrings. Theses correspont to lowest codes.
+ InternalMmapVector<T> dict_len1(*begin);
+ ++begin;
+
+ if (begin == end)
+ return out;
+
+ for (auto& v : dict_len1) {
+ v = *begin;
+ ++begin;
+ }
+
+ // Substrings of len 2 and up. Indexes are shifted because [0,
+ // dict_len1.size()) stored in dict_len1. Substings get here after being
+ // emitted to the output, so we can use output position.
+ InternalMmapVector<detail::DenseMapPair<ItOut /* begin. */, ItOut /* end */>>
+ code_to_substr;
+
+ // Copies already emitted substrings into the output again.
+ auto copy = [&code_to_substr, &dict_len1](LzwCodeType code, ItOut out) {
+ if (code < dict_len1.size()) {
+ *out = dict_len1[code];
+ ++out;
+ return out;
+ }
+ const auto& s = code_to_substr[code - dict_len1.size()];
+
+ for (ItOut it = s.first; it != s.second; ++it, ++out) *out = *it;
+ return out;
+ };
+
+ // Returns lens of the substring with the given code.
+ auto code_to_len = [&code_to_substr, &dict_len1](LzwCodeType code) -> uptr {
+ if (code < dict_len1.size())
+ return 1;
+ const auto& s = code_to_substr[code - dict_len1.size()];
+ return s.second - s.first;
+ };
+
+ // Main LZW decoding loop.
+ LzwCodeType prev_code = *begin;
+ ++begin;
+ out = copy(prev_code, out);
+ for (auto it = begin; it != end; ++it) {
+ LzwCodeType code = *it;
+ auto start = out;
+ if (code == dict_len1.size() + code_to_substr.size()) {
+ // Special LZW case. The code is not in the dictionary yet. This is
+ // possible only when the new substring is the same as previous one plus
+ // the first item of the previous substring. We can emit that in two
+ // steps.
+ out = copy(prev_code, out);
+ *out = *start;
+ ++out;
+ } else {
+ out = copy(code, out);
+ }
+
+ // Every time encoded emits the code, it also creates substing of len + 1
+ // including the first item of the just emmited substring. Do the same here.
+ uptr len = code_to_len(prev_code);
+ code_to_substr.push_back({start - len, start + 1});
+
+ prev_code = code;
+ }
+ return out;
+}
+
+} // namespace __sanitizer
+#endif