aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.com>2024-08-06 11:28:07 +0300
committerthegeorg <thegeorg@yandex-team.com>2024-08-06 12:50:21 +0300
commitde4d7efd8871b850e3ea79164d7661e2299836b7 (patch)
tree47d8cf597b3789a807a4b1cec0a9fd66788767c2 /contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal
parente003b4c129e1381591dcb75a96bf9a970b2b47fb (diff)
downloadydb-de4d7efd8871b850e3ea79164d7661e2299836b7.tar.gz
Update contrib/restricted/abseil-cpp-tstring to 20240722.0
83a5727000e16bc5a94523a0cf1cce75fa86a191
Diffstat (limited to 'contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal')
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/bounded_utf8_length_sequence.h126
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.cc258
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.h55
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.cc1143
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.h3
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.cc925
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.h42
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.cc59
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.h15
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/stacktrace_aarch64-inl.inc17
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.cc70
-rw-r--r--contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.h47
12 files changed, 2644 insertions, 116 deletions
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/bounded_utf8_length_sequence.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/bounded_utf8_length_sequence.h
new file mode 100644
index 0000000000..df2804299b
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/bounded_utf8_length_sequence.h
@@ -0,0 +1,126 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef Y_ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
+#define Y_ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
+
+#include <cstdint>
+
+#include "y_absl/base/config.h"
+#include "y_absl/numeric/bits.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+// A sequence of up to max_elements integers between 1 and 4 inclusive, whose
+// insertion operation computes the sum of all the elements before the insertion
+// point. This is useful in decoding Punycode, where one needs to know where in
+// a UTF-8 byte stream the n-th code point begins.
+//
+// BoundedUtf8LengthSequence is async-signal-safe and suitable for use in
+// symbolizing stack traces in a signal handler, provided max_elements is not
+// improvidently large. For inputs of lengths accepted by the Rust demangler,
+// up to a couple hundred code points, InsertAndReturnSumOfPredecessors should
+// run in a few dozen clock cycles, on par with the other arithmetic required
+// for Punycode decoding.
+template <uint32_t max_elements>
+class BoundedUtf8LengthSequence {
+ public:
+ // Constructs an empty sequence.
+ BoundedUtf8LengthSequence() = default;
+
+ // Inserts `utf_length` at position `index`, shifting any existing elements at
+ // or beyond `index` one position to the right. If the sequence is already
+ // full, the rightmost element is discarded.
+ //
+ // Returns the sum of the elements at positions 0 to `index - 1` inclusive.
+ // If `index` is greater than the number of elements already inserted, the
+ // excess positions in the range count 1 apiece.
+ //
+ // REQUIRES: index < max_elements and 1 <= utf8_length <= 4.
+ uint32_t InsertAndReturnSumOfPredecessors(
+ uint32_t index, uint32_t utf8_length) {
+ // The caller shouldn't pass out-of-bounds inputs, but if it does happen,
+ // clamp the values and try to continue. If we're being called from a
+ // signal handler, the last thing we want to do is crash. Emitting
+ // malformed UTF-8 is a lesser evil.
+ if (index >= max_elements) index = max_elements - 1;
+ if (utf8_length == 0 || utf8_length > 4) utf8_length = 1;
+
+ const uint32_t word_index = index/32;
+ const uint32_t bit_index = 2 * (index % 32);
+ const uint64_t ones_bit = uint64_t{1} << bit_index;
+
+ // Compute the sum of predecessors.
+ // - Each value from 1 to 4 is represented by a bit field with value from
+ // 0 to 3, so the desired sum is index plus the sum of the
+ // representations actually stored.
+ // - For each bit field, a set low bit should contribute 1 to the sum, and
+ // a set high bit should contribute 2.
+ // - Another way to say the same thing is that each set bit contributes 1,
+ // and each set high bit contributes an additional 1.
+ // - So the sum we want is index + popcount(everything) + popcount(bits in
+ // odd positions).
+ const uint64_t odd_bits_mask = 0xaaaaaaaaaaaaaaaa;
+ const uint64_t lower_seminibbles_mask = ones_bit - 1;
+ const uint64_t higher_seminibbles_mask = ~lower_seminibbles_mask;
+ const uint64_t same_word_bits_below_insertion =
+ rep_[word_index] & lower_seminibbles_mask;
+ int full_popcount = y_absl::popcount(same_word_bits_below_insertion);
+ int odd_popcount =
+ y_absl::popcount(same_word_bits_below_insertion & odd_bits_mask);
+ for (uint32_t j = word_index; j > 0; --j) {
+ const uint64_t word_below_insertion = rep_[j - 1];
+ full_popcount += y_absl::popcount(word_below_insertion);
+ odd_popcount += y_absl::popcount(word_below_insertion & odd_bits_mask);
+ }
+ const uint32_t sum_of_predecessors =
+ index + static_cast<uint32_t>(full_popcount + odd_popcount);
+
+ // Now insert utf8_length's representation, shifting successors up one
+ // place.
+ for (uint32_t j = max_elements/32 - 1; j > word_index; --j) {
+ rep_[j] = (rep_[j] << 2) | (rep_[j - 1] >> 62);
+ }
+ rep_[word_index] =
+ (rep_[word_index] & lower_seminibbles_mask) |
+ (uint64_t{utf8_length - 1} << bit_index) |
+ ((rep_[word_index] & higher_seminibbles_mask) << 2);
+
+ return sum_of_predecessors;
+ }
+
+ private:
+ // If the (32 * i + j)-th element of the represented sequence has the value k
+ // (0 <= j < 32, 1 <= k <= 4), then bits 2 * j and 2 * j + 1 of rep_[i]
+ // contain the seminibble (k - 1).
+ //
+ // In particular, the zero-initialization of rep_ makes positions not holding
+ // any inserted element count as 1 in InsertAndReturnSumOfPredecessors.
+ //
+ // Example: rep_ = {0xb1, ... the rest zeroes ...} represents the sequence
+ // (2, 1, 4, 3, ... the rest 1's ...). Constructing the sequence of Unicode
+ // code points "Àa🂻中" = {U+00C0, U+0061, U+1F0BB, U+4E2D} (among many
+ // other examples) would yield this value of rep_.
+ static_assert(max_elements > 0 && max_elements % 32 == 0,
+ "max_elements must be a positive multiple of 32");
+ uint64_t rep_[max_elements/32] = {};
+};
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
+
+#endif // Y_ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.cc
new file mode 100644
index 0000000000..bb9fe3cb0e
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.cc
@@ -0,0 +1,258 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "y_absl/debugging/internal/decode_rust_punycode.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "y_absl/base/config.h"
+#include "y_absl/base/nullability.h"
+#include "y_absl/debugging/internal/bounded_utf8_length_sequence.h"
+#include "y_absl/debugging/internal/utf8_for_code_point.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+namespace {
+
+// Decoding Punycode requires repeated random-access insertion into a stream of
+// variable-length UTF-8 code-point encodings. We need this to be tolerably
+// fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data
+// structures on the heap (async-signal-safety).
+//
+// It is pragmatic to impose a moderately low limit on the identifier length and
+// bail out if we ever hit it. Then BoundedUtf8LengthSequence efficiently
+// determines where to insert the next code point, and memmove efficiently makes
+// room for it.
+//
+// The chosen limit is a round number several times larger than identifiers
+// expected in practice, yet still small enough that a memmove of this many
+// UTF-8 characters is not much more expensive than the division and modulus
+// operations that Punycode decoding requires.
+constexpr uint32_t kMaxChars = 256;
+
+// Constants from RFC 3492 section 5.
+constexpr uint32_t kBase = 36, kTMin = 1, kTMax = 26, kSkew = 38, kDamp = 700;
+
+constexpr uint32_t kMaxCodePoint = 0x10ffff;
+
+// Overflow threshold in DecodeRustPunycode's inner loop; see comments there.
+constexpr uint32_t kMaxI = 1 << 30;
+
+// If punycode_begin .. punycode_end begins with a prefix matching the regular
+// expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final
+// underscore into out_begin .. out_end, sets num_ascii_chars to the number of
+// bytes copied, and returns true. (A prefix of this sort represents the
+// nonempty subsequence of ASCII characters in the corresponding plaintext.)
+//
+// If punycode_begin .. punycode_end does not contain an underscore, sets
+// num_ascii_chars to zero and returns true. (The encoding of a plaintext
+// without any ASCII characters does not carry such a prefix.)
+//
+// Returns false and zeroes num_ascii_chars on failure (either parse error or
+// not enough space in the output buffer).
+bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin,
+ const char* const punycode_end,
+ char* const out_begin,
+ char* const out_end,
+ uint32_t& num_ascii_chars) {
+ num_ascii_chars = 0;
+
+ // Remember the last underscore if any. Also use the same string scan to
+ // reject any ASCII bytes that do not belong in an identifier, including NUL,
+ // as well as non-ASCII bytes, which should have been delta-encoded instead.
+ int last_underscore = -1;
+ for (int i = 0; i < punycode_end - punycode_begin; ++i) {
+ const char c = punycode_begin[i];
+ if (c == '_') {
+ last_underscore = i;
+ continue;
+ }
+ // We write out the meaning of y_absl::ascii_isalnum rather than call that
+ // function because its documentation does not promise it will remain
+ // async-signal-safe under future development.
+ if ('a' <= c && c <= 'z') continue;
+ if ('A' <= c && c <= 'Z') continue;
+ if ('0' <= c && c <= '9') continue;
+ return false;
+ }
+
+ // If there was no underscore, that means there were no ASCII characters in
+ // the plaintext, so there is no prefix to consume. Our work is done.
+ if (last_underscore < 0) return true;
+
+ // Otherwise there will be an underscore delimiter somewhere. It can't be
+ // initial because then there would be no ASCII characters to its left, and no
+ // delimiter would have been added in that case.
+ if (last_underscore == 0) return false;
+
+ // Any other position is reasonable. Make sure there's room in the buffer.
+ if (last_underscore + 1 > out_end - out_begin) return false;
+
+ // Consume and write out the ASCII characters.
+ num_ascii_chars = static_cast<uint32_t>(last_underscore);
+ std::memcpy(out_begin, punycode_begin, num_ascii_chars);
+ out_begin[num_ascii_chars] = '\0';
+ punycode_begin += num_ascii_chars + 1;
+ return true;
+}
+
+// Returns the value of `c` as a base-36 digit according to RFC 3492 section 5,
+// or -1 if `c` is not such a digit.
+int DigitValue(char c) {
+ if ('0' <= c && c <= '9') return c - '0' + 26;
+ if ('a' <= c && c <= 'z') return c - 'a';
+ if ('A' <= c && c <= 'Z') return c - 'A';
+ return -1;
+}
+
+// Consumes the next delta encoding from punycode_begin .. punycode_end,
+// updating i accordingly. Returns true on success. Returns false on parse
+// failure or arithmetic overflow.
+bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end,
+ uint32_t bias, uint32_t& i) {
+ uint64_t w = 1; // 64 bits to prevent overflow in w *= kBase - t
+
+ // "for k = base to infinity in steps of base do begin ... end" in RFC 3492
+ // section 6.2. Each loop iteration scans one digit of the delta.
+ for (uint32_t k = kBase; punycode_begin != punycode_end; k += kBase) {
+ const int digit_value = DigitValue(*punycode_begin++);
+ if (digit_value < 0) return false;
+
+ // Compute this in 64-bit arithmetic so we can check for overflow afterward.
+ const uint64_t new_i = i + static_cast<uint64_t>(digit_value) * w;
+
+ // Valid deltas are bounded by (#chars already emitted) * kMaxCodePoint, but
+ // invalid input could encode an arbitrarily large delta. Nip that in the
+ // bud here.
+ static_assert(
+ kMaxI >= kMaxChars * kMaxCodePoint,
+ "kMaxI is too small to prevent spurious failures on good input");
+ if (new_i > kMaxI) return false;
+
+ static_assert(
+ kMaxI < (uint64_t{1} << 32),
+ "Make kMaxI smaller or i 64 bits wide to prevent silent wraparound");
+ i = static_cast<uint32_t>(new_i);
+
+ // Compute the threshold that determines whether this is the last digit and
+ // (if not) what the next digit's place value will be. This logic from RFC
+ // 3492 section 6.2 is explained in section 3.3.
+ uint32_t t;
+ if (k <= bias + kTMin) {
+ t = kTMin;
+ } else if (k >= bias + kTMax) {
+ t = kTMax;
+ } else {
+ t = k - bias;
+ }
+ if (static_cast<uint32_t>(digit_value) < t) return true;
+
+ // If this gets too large, the range check on new_i in the next iteration
+ // will catch it. We know this multiplication will not overwrap because w
+ // is 64 bits wide.
+ w *= kBase - t;
+ }
+ return false;
+}
+
+} // namespace
+
+y_absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) {
+ const char* punycode_begin = options.punycode_begin;
+ const char* const punycode_end = options.punycode_end;
+ char* const out_begin = options.out_begin;
+ char* const out_end = options.out_end;
+
+ // Write a NUL terminator first. Later memcpy calls will keep bumping it
+ // along to its new right place.
+ const size_t out_size = static_cast<size_t>(out_end - out_begin);
+ if (out_size == 0) return nullptr;
+ *out_begin = '\0';
+
+ // RFC 3492 section 6.2 begins here. We retain the names of integer variables
+ // appearing in that text.
+ uint32_t n = 128, i = 0, bias = 72, num_chars = 0;
+
+ // If there are any ASCII characters, consume them and their trailing
+ // underscore delimiter.
+ if (!ConsumeOptionalAsciiPrefix(punycode_begin, punycode_end,
+ out_begin, out_end, num_chars)) {
+ return nullptr;
+ }
+ uint32_t total_utf8_bytes = num_chars;
+
+ BoundedUtf8LengthSequence<kMaxChars> utf8_lengths;
+
+ // "while the input is not exhausted do begin ... end"
+ while (punycode_begin != punycode_end) {
+ if (num_chars >= kMaxChars) return nullptr;
+
+ const uint32_t old_i = i;
+
+ if (!ScanNextDelta(punycode_begin, punycode_end, bias, i)) return nullptr;
+
+ // Update bias as in RFC 3492 section 6.1. (We have inlined adapt.)
+ uint32_t delta = i - old_i;
+ delta /= (old_i == 0 ? kDamp : 2);
+ delta += delta/(num_chars + 1);
+ bias = 0;
+ while (delta > ((kBase - kTMin) * kTMax)/2) {
+ delta /= kBase - kTMin;
+ bias += kBase;
+ }
+ bias += ((kBase - kTMin + 1) * delta)/(delta + kSkew);
+
+ // Back in section 6.2, compute the new code point and insertion index.
+ static_assert(
+ kMaxI + kMaxCodePoint < (uint64_t{1} << 32),
+ "Make kMaxI smaller or n 64 bits wide to prevent silent wraparound");
+ n += i/(num_chars + 1);
+ i %= num_chars + 1;
+
+ // To actually insert, we need to convert the code point n to UTF-8 and the
+ // character index i to an index into the byte stream emitted so far. First
+ // prepare the UTF-8 encoding for n, rejecting surrogates, overlarge values,
+ // and anything that won't fit into the remaining output storage.
+ Utf8ForCodePoint utf8_for_code_point(n);
+ if (!utf8_for_code_point.ok()) return nullptr;
+ if (total_utf8_bytes + utf8_for_code_point.length + 1 > out_size) {
+ return nullptr;
+ }
+
+ // Now insert the new character into both our length map and the output.
+ uint32_t n_index =
+ utf8_lengths.InsertAndReturnSumOfPredecessors(
+ i, utf8_for_code_point.length);
+ std::memmove(
+ out_begin + n_index + utf8_for_code_point.length, out_begin + n_index,
+ total_utf8_bytes + 1 - n_index);
+ std::memcpy(out_begin + n_index, utf8_for_code_point.bytes,
+ utf8_for_code_point.length);
+ total_utf8_bytes += utf8_for_code_point.length;
+ ++num_chars;
+
+ // Finally, advance to the next state before continuing.
+ ++i;
+ }
+
+ return out_begin + total_utf8_bytes;
+}
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.h
new file mode 100644
index 0000000000..b232b4c12f
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/decode_rust_punycode.h
@@ -0,0 +1,55 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef Y_ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
+#define Y_ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
+
+#include "y_absl/base/config.h"
+#include "y_absl/base/nullability.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+struct DecodeRustPunycodeOptions {
+ const char* punycode_begin;
+ const char* punycode_end;
+ char* out_begin;
+ char* out_end;
+};
+
+// Given Rust Punycode in `punycode_begin .. punycode_end`, writes the
+// corresponding UTF-8 plaintext into `out_begin .. out_end`, followed by a NUL
+// character, and returns a pointer to that final NUL on success. On failure
+// returns a null pointer, and the contents of `out_begin .. out_end` are
+// unspecified.
+//
+// Failure occurs in precisely these cases:
+// - Any input byte does not match [0-9a-zA-Z_].
+// - The first input byte is an underscore, but no other underscore appears in
+// the input.
+// - The delta sequence does not represent a valid sequence of code-point
+// insertions.
+// - The plaintext would contain more than 256 code points.
+//
+// DecodeRustPunycode is async-signal-safe with bounded runtime and a small
+// stack footprint, making it suitable for use in demangling Rust symbol names
+// from a signal handler.
+y_absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options);
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
+
+#endif // Y_ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.cc
index f79c798eef..22848f9861 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.cc
@@ -14,18 +14,19 @@
// For reference check out:
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
-//
-// Note that we only have partial C++11 support yet.
#include "y_absl/debugging/internal/demangle.h"
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
+#include <cstring>
#include <limits>
#include <util/generic/string.h>
#include "y_absl/base/config.h"
+#include "y_absl/debugging/internal/demangle_rust.h"
#if Y_ABSL_INTERNAL_HAS_CXA_DEMANGLE
#include <cxxabi.h>
@@ -44,14 +45,16 @@ typedef struct {
// List of operators from Itanium C++ ABI.
static const AbbrevPair kOperatorList[] = {
- // New has special syntax (not currently supported).
+ // New has special syntax.
{"nw", "new", 0},
{"na", "new[]", 0},
- // Works except that the 'gs' prefix is not supported.
+ // Special-cased elsewhere to support the optional gs prefix.
{"dl", "delete", 1},
{"da", "delete[]", 1},
+ {"aw", "co_await", 1},
+
{"ps", "+", 1}, // "positive"
{"ng", "-", 1}, // "negative"
{"ad", "&", 1}, // "address-of"
@@ -79,6 +82,7 @@ static const AbbrevPair kOperatorList[] = {
{"rs", ">>", 2},
{"lS", "<<=", 2},
{"rS", ">>=", 2},
+ {"ss", "<=>", 2},
{"eq", "==", 2},
{"ne", "!=", 2},
{"lt", "<", 2},
@@ -98,6 +102,7 @@ static const AbbrevPair kOperatorList[] = {
{"qu", "?", 3},
{"st", "sizeof", 0}, // Special syntax
{"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
+ {"sZ", "sizeof...", 0}, // Special syntax
{nullptr, nullptr, 0},
};
@@ -187,9 +192,50 @@ typedef struct {
int recursion_depth; // For stack exhaustion prevention.
int steps; // Cap how much work we'll do, regardless of depth.
ParseState parse_state; // Backtrackable state copied for most frames.
+
+ // Conditionally compiled support for marking the position of the first
+ // construct Demangle couldn't parse. This preprocessor symbol is intended
+ // for use by Abseil demangler maintainers only; its behavior is not part of
+ // Abseil's public interface.
+#ifdef Y_ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ int high_water_mark; // Input position where parsing failed.
+ bool too_complex; // True if any guard.IsTooComplex() call returned true.
+#endif
} State;
namespace {
+
+#ifdef Y_ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+void UpdateHighWaterMark(State *state) {
+ if (state->high_water_mark < state->parse_state.mangled_idx) {
+ state->high_water_mark = state->parse_state.mangled_idx;
+ }
+}
+
+void ReportHighWaterMark(State *state) {
+ // Write out the mangled name with the trouble point marked, provided that the
+ // output buffer is large enough and the mangled name did not hit a complexity
+ // limit (in which case the high water mark wouldn't point out an unparsable
+ // construct, only the point where a budget ran out).
+ const size_t input_length = std::strlen(state->mangled_begin);
+ if (input_length + 6 > static_cast<size_t>(state->out_end_idx) ||
+ state->too_complex) {
+ if (state->out_end_idx > 0) state->out[0] = '\0';
+ return;
+ }
+ const size_t high_water_mark = static_cast<size_t>(state->high_water_mark);
+ std::memcpy(state->out, state->mangled_begin, high_water_mark);
+ std::memcpy(state->out + high_water_mark, "--!--", 5);
+ std::memcpy(state->out + high_water_mark + 5,
+ state->mangled_begin + high_water_mark,
+ input_length - high_water_mark);
+ state->out[input_length + 5] = '\0';
+}
+#else
+void UpdateHighWaterMark(State *) {}
+void ReportHighWaterMark(State *) {}
+#endif
+
// Prevent deep recursion / stack exhaustion.
// Also prevent unbounded handling of complex inputs.
class ComplexityGuard {
@@ -201,7 +247,7 @@ class ComplexityGuard {
~ComplexityGuard() { --state_->recursion_depth; }
// 256 levels of recursion seems like a reasonable upper limit on depth.
- // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
+ // 128 is not enough to demangle synthetic tests from demangle_unittest.txt:
// "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
static constexpr int kRecursionDepthLimit = 256;
@@ -222,8 +268,14 @@ class ComplexityGuard {
static constexpr int kParseStepsLimit = 1 << 17;
bool IsTooComplex() const {
- return state_->recursion_depth > kRecursionDepthLimit ||
- state_->steps > kParseStepsLimit;
+ if (state_->recursion_depth > kRecursionDepthLimit ||
+ state_->steps > kParseStepsLimit) {
+#ifdef Y_ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state_->too_complex = true;
+#endif
+ return true;
+ }
+ return false;
}
private:
@@ -270,6 +322,10 @@ static void InitState(State* state,
state->out_end_idx = static_cast<int>(out_size);
state->recursion_depth = 0;
state->steps = 0;
+#ifdef Y_ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state->high_water_mark = 0;
+ state->too_complex = false;
+#endif
state->parse_state.mangled_idx = 0;
state->parse_state.out_cur_idx = 0;
@@ -291,13 +347,14 @@ static bool ParseOneCharToken(State *state, const char one_char_token) {
if (guard.IsTooComplex()) return false;
if (RemainingInput(state)[0] == one_char_token) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
-// Returns true and advances "mangled_cur" if we find "two_char_token"
-// at "mangled_cur" position. It is assumed that "two_char_token" does
+// Returns true and advances "mangled_idx" if we find "two_char_token"
+// at "mangled_idx" position. It is assumed that "two_char_token" does
// not contain '\0'.
static bool ParseTwoCharToken(State *state, const char *two_char_token) {
ComplexityGuard guard(state);
@@ -305,11 +362,45 @@ static bool ParseTwoCharToken(State *state, const char *two_char_token) {
if (RemainingInput(state)[0] == two_char_token[0] &&
RemainingInput(state)[1] == two_char_token[1]) {
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
+// Returns true and advances "mangled_idx" if we find "three_char_token"
+// at "mangled_idx" position. It is assumed that "three_char_token" does
+// not contain '\0'.
+static bool ParseThreeCharToken(State *state, const char *three_char_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == three_char_token[0] &&
+ RemainingInput(state)[1] == three_char_token[1] &&
+ RemainingInput(state)[2] == three_char_token[2]) {
+ state->parse_state.mangled_idx += 3;
+ UpdateHighWaterMark(state);
+ return true;
+ }
+ return false;
+}
+
+// Returns true and advances "mangled_idx" if we find a copy of the
+// NUL-terminated string "long_token" at "mangled_idx" position.
+static bool ParseLongToken(State *state, const char *long_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ int i = 0;
+ for (; long_token[i] != '\0'; ++i) {
+ // Note that we cannot run off the end of the NUL-terminated input here.
+ // Inside the loop body, long_token[i] is known to be different from NUL.
+ // So if we read the NUL on the end of the input here, we return at once.
+ if (RemainingInput(state)[i] != long_token[i]) return false;
+ }
+ state->parse_state.mangled_idx += i;
+ UpdateHighWaterMark(state);
+ return true;
+}
+
// Returns true and advances "mangled_cur" if we find any character in
// "char_class" at "mangled_cur" position.
static bool ParseCharClass(State *state, const char *char_class) {
@@ -322,6 +413,7 @@ static bool ParseCharClass(State *state, const char *char_class) {
for (; *p != '\0'; ++p) {
if (RemainingInput(state)[0] == *p) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -554,6 +646,7 @@ static bool ParseFloatNumber(State *state);
static bool ParseSeqId(State *state);
static bool ParseIdentifier(State *state, size_t length);
static bool ParseOperatorName(State *state, int *arity);
+static bool ParseConversionOperatorType(State *state);
static bool ParseSpecialName(State *state);
static bool ParseCallOffset(State *state);
static bool ParseNVOffset(State *state);
@@ -563,21 +656,33 @@ static bool ParseCtorDtorName(State *state);
static bool ParseDecltype(State *state);
static bool ParseType(State *state);
static bool ParseCVQualifiers(State *state);
+static bool ParseExtendedQualifier(State *state);
static bool ParseBuiltinType(State *state);
+static bool ParseVendorExtendedType(State *state);
static bool ParseFunctionType(State *state);
static bool ParseBareFunctionType(State *state);
+static bool ParseOverloadAttribute(State *state);
static bool ParseClassEnumType(State *state);
static bool ParseArrayType(State *state);
static bool ParsePointerToMemberType(State *state);
static bool ParseTemplateParam(State *state);
+static bool ParseTemplateParamDecl(State *state);
static bool ParseTemplateTemplateParam(State *state);
static bool ParseTemplateArgs(State *state);
static bool ParseTemplateArg(State *state);
static bool ParseBaseUnresolvedName(State *state);
static bool ParseUnresolvedName(State *state);
+static bool ParseUnresolvedQualifierLevel(State *state);
+static bool ParseUnionSelector(State* state);
+static bool ParseFunctionParam(State* state);
+static bool ParseBracedExpression(State *state);
static bool ParseExpression(State *state);
+static bool ParseInitializer(State *state);
static bool ParseExprPrimary(State *state);
-static bool ParseExprCastValue(State *state);
+static bool ParseExprCastValueAndTrailingE(State *state);
+static bool ParseQRequiresClauseExpr(State *state);
+static bool ParseRequirement(State *state);
+static bool ParseTypeConstraint(State *state);
static bool ParseLocalName(State *state);
static bool ParseLocalNameSuffix(State *state);
static bool ParseDiscriminator(State *state);
@@ -622,22 +727,34 @@ static bool ParseMangledName(State *state) {
}
// <encoding> ::= <(function) name> <bare-function-type>
+// [`Q` <requires-clause expr>]
// ::= <(data) name>
// ::= <special-name>
+//
+// NOTE: Based on http://shortn/_Hoq9qG83rx
static bool ParseEncoding(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- // Implementing the first two productions together as <name>
- // [<bare-function-type>] avoids exponential blowup of backtracking.
+ // Since the first two productions both start with <name>, attempt
+ // to parse it only once to avoid exponential blowup of backtracking.
//
- // Since Optional(...) can't fail, there's no need to copy the state for
- // backtracking.
- if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
+ // We're careful about exponential blowup because <encoding> recursively
+ // appears in other productions downstream of its first two productions,
+ // which means that every call to `ParseName` would possibly indirectly
+ // result in two calls to `ParseName` etc.
+ if (ParseName(state)) {
+ if (!ParseBareFunctionType(state)) {
+ return true; // <(data) name>
+ }
+
+ // Parsed: <(function) name> <bare-function-type>
+ // Pending: [`Q` <requires-clause expr>]
+ ParseQRequiresClauseExpr(state); // restores state on failure
return true;
}
if (ParseSpecialName(state)) {
- return true;
+ return true; // <special-name>
}
return false;
}
@@ -723,19 +840,26 @@ static bool ParseNestedName(State *state) {
// <prefix> ::= <prefix> <unqualified-name>
// ::= <template-prefix> <template-args>
// ::= <template-param>
+// ::= <decltype>
// ::= <substitution>
// ::= # empty
// <template-prefix> ::= <prefix> <(template) unqualified-name>
// ::= <template-param>
// ::= <substitution>
+// ::= <vendor-extended-type>
static bool ParsePrefix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
bool has_something = false;
while (true) {
MaybeAppendSeparator(state);
- if (ParseTemplateParam(state) ||
+ if (ParseTemplateParam(state) || ParseDecltype(state) ||
ParseSubstitution(state, /*accept_std=*/true) ||
+ // Although the official grammar does not mention it, nested-names
+ // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it
+ // is not clear what else a compiler is supposed to do when a
+ // vendor-extended type has named members.
+ ParseVendorExtendedType(state) ||
ParseUnscopedName(state) ||
(ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
has_something = true;
@@ -757,8 +881,14 @@ static bool ParsePrefix(State *state) {
// ::= <source-name> [<abi-tags>]
// ::= <local-source-name> [<abi-tags>]
// ::= <unnamed-type-name> [<abi-tags>]
+// ::= DC <source-name>+ E # C++17 structured binding
+// ::= F <source-name> # C++20 constrained friend
+// ::= F <operator-name> # C++20 constrained friend
//
// <local-source-name> is a GCC extension; see below.
+//
+// For the F notation for constrained friends, see
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332.
static bool ParseUnqualifiedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -767,6 +897,23 @@ static bool ParseUnqualifiedName(State *state) {
ParseUnnamedTypeName(state)) {
return ParseAbiTags(state);
}
+
+ // DC <source-name>+ E
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // F <source-name>
+ // F <operator-name>
+ if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") &&
+ (ParseSourceName(state) || ParseOperatorName(state, nullptr))) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -824,7 +971,11 @@ static bool ParseLocalSourceName(State *state) {
// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
// ::= <closure-type-name>
// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
-// <lambda-sig> ::= <(parameter) type>+
+// <lambda-sig> ::= <template-param-decl>* <(parameter) type>+
+//
+// For <template-param-decl>* in <lambda-sig> see:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/31
static bool ParseUnnamedTypeName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -847,6 +998,7 @@ static bool ParseUnnamedTypeName(State *state) {
// Closure type.
which = -1;
if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
@@ -888,6 +1040,7 @@ static bool ParseNumber(State *state, int *number_out) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
if (number_out != nullptr) {
// Note: possibly truncate "number".
*number_out = static_cast<int>(number);
@@ -910,6 +1063,7 @@ static bool ParseFloatNumber(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -928,6 +1082,7 @@ static bool ParseSeqId(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -946,11 +1101,13 @@ static bool ParseIdentifier(State *state, size_t length) {
MaybeAppendWithLength(state, RemainingInput(state), length);
}
state->parse_state.mangled_idx += length;
+ UpdateHighWaterMark(state);
return true;
}
// <operator-name> ::= nw, and other two letters cases
// ::= cv <type> # (cast)
+// ::= li <source-name> # C++11 user-defined literal
// ::= v <digit> <source-name> # vendor extended operator
static bool ParseOperatorName(State *state, int *arity) {
ComplexityGuard guard(state);
@@ -961,7 +1118,7 @@ static bool ParseOperatorName(State *state, int *arity) {
// First check with "cv" (cast) case.
ParseState copy = state->parse_state;
if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
- EnterNestedName(state) && ParseType(state) &&
+ EnterNestedName(state) && ParseConversionOperatorType(state) &&
LeaveNestedName(state, copy.nest_level)) {
if (arity != nullptr) {
*arity = 1;
@@ -970,6 +1127,13 @@ static bool ParseOperatorName(State *state, int *arity) {
}
state->parse_state = copy;
+ // Then user-defined literals.
+ if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") &&
+ ParseSourceName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Then vendor extended operators.
if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
ParseSourceName(state)) {
@@ -997,36 +1161,120 @@ static bool ParseOperatorName(State *state, int *arity) {
}
MaybeAppend(state, p->real_name);
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
}
return false;
}
+// <operator-name> ::= cv <type> # (cast)
+//
+// The name of a conversion operator is the one place where cv-qualifiers, *, &,
+// and other simple type combinators are expected to appear in our stripped-down
+// demangling (elsewhere they appear in function signatures or template
+// arguments, which we omit from the output). We make reasonable efforts to
+// render simple cases accurately.
+static bool ParseConversionOperatorType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // Scan pointers, const, and other easy mangling prefixes with postfix
+ // demanglings. Remember the range of input for later rescanning.
+ //
+ // See `ParseType` and the `switch` below for the meaning of each char.
+ const char* begin_simple_prefixes = RemainingInput(state);
+ while (ParseCharClass(state, "OPRCGrVK")) {}
+ const char* end_simple_prefixes = RemainingInput(state);
+
+ // Emit the base type first.
+ if (!ParseType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ // Then rescan the easy type combinators in reverse order to emit their
+ // demanglings in the expected output order.
+ while (begin_simple_prefixes != end_simple_prefixes) {
+ switch (*--end_simple_prefixes) {
+ case 'P':
+ MaybeAppend(state, "*");
+ break;
+ case 'R':
+ MaybeAppend(state, "&");
+ break;
+ case 'O':
+ MaybeAppend(state, "&&");
+ break;
+ case 'C':
+ MaybeAppend(state, " _Complex");
+ break;
+ case 'G':
+ MaybeAppend(state, " _Imaginary");
+ break;
+ case 'r':
+ MaybeAppend(state, " restrict");
+ break;
+ case 'V':
+ MaybeAppend(state, " volatile");
+ break;
+ case 'K':
+ MaybeAppend(state, " const");
+ break;
+ }
+ }
+ return true;
+}
+
// <special-name> ::= TV <type>
// ::= TT <type>
// ::= TI <type>
// ::= TS <type>
-// ::= TH <type> # thread-local
+// ::= TW <name> # thread-local wrapper
+// ::= TH <name> # thread-local initialization
// ::= Tc <call-offset> <call-offset> <(base) encoding>
// ::= GV <(object) name>
+// ::= GR <(object) name> [<seq-id>] _
// ::= T <call-offset> <(base) encoding>
+// ::= GTt <encoding> # transaction-safe entry point
+// ::= TA <template-arg> # nontype template parameter object
// G++ extensions:
// ::= TC <type> <(offset) number> _ <(base) type>
// ::= TF <type>
// ::= TJ <type>
-// ::= GR <name>
+// ::= GR <name> # without final _, perhaps an earlier form?
// ::= GA <encoding>
// ::= Th <call-offset> <(base) encoding>
// ::= Tv <call-offset> <(base) encoding>
//
-// Note: we don't care much about them since they don't appear in
-// stack traces. The are special data.
+// Note: Most of these are special data, not functions that occur in stack
+// traces. Exceptions are TW and TH, which denote functions supporting the
+// thread_local feature. For these see:
+//
+// https://maskray.me/blog/2021-02-14-all-about-thread-local-storage
+//
+// For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63.
static bool ParseSpecialName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
+
+ if (ParseTwoCharToken(state, "TW")) {
+ MaybeAppend(state, "thread-local wrapper routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseTwoCharToken(state, "TH")) {
+ MaybeAppend(state, "thread-local initialization routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") &&
ParseType(state)) {
return true;
}
@@ -1064,21 +1312,51 @@ static bool ParseSpecialName(State *state) {
}
state->parse_state = copy;
- if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
+ // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard
+ // ::= GR <(object) name> # also recognized
+ if (ParseTwoCharToken(state, "GR")) {
+ MaybeAppend(state, "reference temporary for ");
+ if (!ParseName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ const bool has_seq_id = ParseSeqId(state);
+ const bool has_underscore = ParseOneCharToken(state, '_');
+ if (has_seq_id && !has_underscore) {
+ state->parse_state = copy;
+ return false;
+ }
return true;
}
- state->parse_state = copy;
if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseThreeCharToken(state, "GTt") &&
+ MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
ParseCallOffset(state) && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TA")) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (ParseTemplateArg(state)) {
+ RestoreAppend(state, append);
+ MaybeAppend(state, "template parameter object");
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1182,7 +1460,6 @@ static bool ParseDecltype(State *state) {
// ::= O <type> # rvalue reference-to (C++0x)
// ::= C <type> # complex pair (C 2000)
// ::= G <type> # imaginary (C 2000)
-// ::= U <source-name> <type> # vendor extended type qualifier
// ::= <builtin-type>
// ::= <function-type>
// ::= <class-enum-type> # note: just an alias for <name>
@@ -1193,7 +1470,9 @@ static bool ParseDecltype(State *state) {
// ::= <decltype>
// ::= <substitution>
// ::= Dp <type> # pack expansion of (C++0x)
-// ::= Dv <num-elems> _ # GNU vector extension
+// ::= Dv <(elements) number> _ <type> # GNU vector extension
+// ::= Dv <(bytes) expression> _ <type>
+// ::= Dk <type-constraint> # constrained auto
//
static bool ParseType(State *state) {
ComplexityGuard guard(state);
@@ -1236,12 +1515,6 @@ static bool ParseType(State *state) {
}
state->parse_state = copy;
- if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
- ParseType(state)) {
- return true;
- }
- state->parse_state = copy;
-
if (ParseBuiltinType(state) || ParseFunctionType(state) ||
ParseClassEnumType(state) || ParseArrayType(state) ||
ParsePointerToMemberType(state) || ParseDecltype(state) ||
@@ -1260,54 +1533,160 @@ static bool ParseType(State *state) {
return true;
}
+ // GNU vector extension Dv <number> _ <type>
if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
- ParseOneCharToken(state, '_')) {
+ ParseOneCharToken(state, '_') && ParseType(state)) {
return true;
}
state->parse_state = copy;
- return false;
+ // GNU vector extension Dv <expression> _ <type>
+ if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) &&
+ ParseOneCharToken(state, '_') && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // For this notation see CXXNameMangler::mangleType in Clang's source code.
+ // The relevant logic and its comment "not clear how to mangle this!" date
+ // from 2011, so it may be with us awhile.
+ return ParseLongToken(state, "_SUBSTPACK_");
}
+// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
// <CV-qualifiers> ::= [r] [V] [K]
+//
// We don't allow empty <CV-qualifiers> to avoid infinite loop in
// ParseType().
static bool ParseCVQualifiers(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
int num_cv_qualifiers = 0;
+ while (ParseExtendedQualifier(state)) ++num_cv_qualifiers;
num_cv_qualifiers += ParseOneCharToken(state, 'r');
num_cv_qualifiers += ParseOneCharToken(state, 'V');
num_cv_qualifiers += ParseOneCharToken(state, 'K');
return num_cv_qualifiers > 0;
}
+// <extended-qualifier> ::= U <source-name> [<template-args>]
+static bool ParseExtendedQualifier(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (!ParseOneCharToken(state, 'U')) return false;
+
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (!ParseSourceName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseTemplateArgs(state));
+ RestoreAppend(state, append);
+ return true;
+}
+
// <builtin-type> ::= v, etc. # single-character builtin types
-// ::= u <source-name>
+// ::= <vendor-extended-type>
// ::= Dd, etc. # two-character builtin types
+// ::= DB (<number> | <expression>) _ # _BitInt(N)
+// ::= DU (<number> | <expression>) _ # unsigned _BitInt(N)
+// ::= DF <number> _ # _FloatN (N bits)
+// ::= DF <number> x # _FloatNx
+// ::= DF16b # std::bfloat16_t
//
// Not supported:
-// ::= DF <number> _ # _FloatN (N bits)
-//
+// ::= [DS] DA <fixed-point-size>
+// ::= [DS] DR <fixed-point-size>
+// because real implementations of N1169 fixed-point are scant.
static bool ParseBuiltinType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- const AbbrevPair *p;
- for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
+ ParseState copy = state->parse_state;
+
+ // DB (<number> | <expression>) _ # _BitInt(N)
+ // DU (<number> | <expression>) _ # unsigned _BitInt(N)
+ if (ParseTwoCharToken(state, "DB") ||
+ (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ int number = -1;
+ if (!ParseNumber(state, &number) && !ParseExpression(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ RestoreAppend(state, append);
+
+ if (!ParseOneCharToken(state, '_')) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ MaybeAppend(state, "_BitInt(");
+ if (number >= 0) {
+ MaybeAppendDecimal(state, number);
+ } else {
+ MaybeAppend(state, "?"); // the best we can do for dependent sizes
+ }
+ MaybeAppend(state, ")");
+ return true;
+ }
+
+ // DF <number> _ # _FloatN
+ // DF <number> x # _FloatNx
+ // DF16b # std::bfloat16_t
+ if (ParseTwoCharToken(state, "DF")) {
+ if (ParseThreeCharToken(state, "16b")) {
+ MaybeAppend(state, "std::bfloat16_t");
+ return true;
+ }
+ int number = 0;
+ if (!ParseNumber(state, &number)) {
+ state->parse_state = copy;
+ return false;
+ }
+ MaybeAppend(state, "_Float");
+ MaybeAppendDecimal(state, number);
+ if (ParseOneCharToken(state, 'x')) {
+ MaybeAppend(state, "x");
+ return true;
+ }
+ if (ParseOneCharToken(state, '_')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
// Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
if (p->abbrev[1] == '\0') {
if (ParseOneCharToken(state, p->abbrev[0])) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= v, etc. # single-character builtin types
}
} else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= Dd, etc. # two-character builtin types
}
}
+ return ParseVendorExtendedType(state);
+}
+
+// <vendor-extended-type> ::= u <source-name> [<template-args>]
+static bool ParseVendorExtendedType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ Optional(ParseTemplateArgs(state))) {
return true;
}
state->parse_state = copy;
@@ -1342,28 +1721,44 @@ static bool ParseExceptionSpec(State *state) {
return false;
}
-// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
+// <function-type> ::=
+// [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
+//
+// <ref-qualifier> ::= R | O
static bool ParseFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
- Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
- Optional(ParseOneCharToken(state, 'O')) &&
- ParseOneCharToken(state, 'E')) {
- return true;
+ Optional(ParseExceptionSpec(state));
+ Optional(ParseTwoCharToken(state, "Dx"));
+ if (!ParseOneCharToken(state, 'F')) {
+ state->parse_state = copy;
+ return false;
}
- state->parse_state = copy;
- return false;
+ Optional(ParseOneCharToken(state, 'Y'));
+ if (!ParseBareFunctionType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseCharClass(state, "RO"));
+ if (!ParseOneCharToken(state, 'E')) {
+ state->parse_state = copy;
+ return false;
+ }
+ return true;
}
-// <bare-function-type> ::= <(signature) type>+
+// <bare-function-type> ::= <overload-attribute>* <(signature) type>+
+//
+// The <overload-attribute>* prefix is nonstandard; see the comment on
+// ParseOverloadAttribute.
static bool ParseBareFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
- if (OneOrMore(ParseType, state)) {
+ if (ZeroOrMore(ParseOverloadAttribute, state) &&
+ OneOrMore(ParseType, state)) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "()");
return true;
@@ -1372,11 +1767,43 @@ static bool ParseBareFunctionType(State *state) {
return false;
}
+// <overload-attribute> ::= Ua <name>
+//
+// The nonstandard <overload-attribute> production is sufficient to accept the
+// current implementation of __attribute__((enable_if(condition, "message")))
+// and future attributes of a similar shape. See
+// https://clang.llvm.org/docs/AttributeReference.html#enable-if and the
+// definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's
+// source code.
+static bool ParseOverloadAttribute(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "Ua") && ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <class-enum-type> ::= <name>
+// ::= Ts <name> # struct Name or class Name
+// ::= Tu <name> # union Name
+// ::= Te <name> # enum Name
+//
+// See http://shortn/_W3YrltiEd0.
static bool ParseClassEnumType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- return ParseName(state);
+ ParseState copy = state->parse_state;
+ if (Optional(ParseTwoCharToken(state, "Ts") ||
+ ParseTwoCharToken(state, "Tu") ||
+ ParseTwoCharToken(state, "Te")) &&
+ ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
}
// <array-type> ::= A <(positive dimension) number> _ <(element) type>
@@ -1413,21 +1840,83 @@ static bool ParsePointerToMemberType(State *state) {
// <template-param> ::= T_
// ::= T <parameter-2 non-negative number> _
+// ::= TL <level-1> __
+// ::= TL <level-1> _ <parameter-2 non-negative number> _
static bool ParseTemplateParam(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
if (ParseTwoCharToken(state, "T_")) {
MaybeAppend(state, "?"); // We don't support template substitutions.
- return true;
+ return true; // ::= T_
}
ParseState copy = state->parse_state;
if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
ParseOneCharToken(state, '_')) {
MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= T <parameter-2 non-negative number> _
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) {
+ if (ParseTwoCharToken(state, "__")) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> __
+ }
+
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _
+ }
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <template-param-decl>
+// ::= Ty # template type parameter
+// ::= Tk <concept name> [<template-args>] # constrained type parameter
+// ::= Tn <type> # template non-type parameter
+// ::= Tt <template-param-decl>* E # template template parameter
+// ::= Tp <template-param-decl> # template parameter pack
+//
+// NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1
+// TODO(b/324066279): Implement optional suffix for `Tt`:
+// [Q <requires-clause expr>]
+static bool ParseTemplateParamDecl(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "Ty")) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tk") && ParseName(state) &&
+ Optional(ParseTemplateArgs(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tn") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tt") &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1441,13 +1930,14 @@ static bool ParseTemplateTemplateParam(State *state) {
ParseSubstitution(state, /*accept_std=*/false));
}
-// <template-args> ::= I <template-arg>+ E
+// <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E
static bool ParseTemplateArgs(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
+ Optional(ParseQRequiresClauseExpr(state)) &&
ParseOneCharToken(state, 'E')) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "<>");
@@ -1457,7 +1947,8 @@ static bool ParseTemplateArgs(State *state) {
return false;
}
-// <template-arg> ::= <type>
+// <template-arg> ::= <template-param-decl> <template-arg>
+// ::= <type>
// ::= <expr-primary>
// ::= J <template-arg>* E # argument pack
// ::= X <expression> E
@@ -1541,7 +2032,7 @@ static bool ParseTemplateArg(State *state) {
// ::= L <source-name> [<template-args>] [<expr-cast-value> E]
if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
copy = state->parse_state;
- if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseExprCastValueAndTrailingE(state)) {
return true;
}
state->parse_state = copy;
@@ -1560,6 +2051,12 @@ static bool ParseTemplateArg(State *state) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1614,6 +2111,13 @@ static bool ParseBaseUnresolvedName(State *state) {
// <base-unresolved-name>
// ::= [gs] sr <unresolved-qualifier-level>+ E
// <base-unresolved-name>
+// ::= sr St <simple-id> <simple-id> # nonstandard
+//
+// The last case is not part of the official grammar but has been observed in
+// real-world examples that the GNU demangler (but not the LLVM demangler) is
+// able to decode; see demangle_test.cc for one such symbol name. The shape
+// sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU
+// demangler.
static bool ParseUnresolvedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1633,7 +2137,7 @@ static bool ParseUnresolvedName(State *state) {
if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
ParseUnresolvedType(state) &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
@@ -1641,35 +2145,160 @@ static bool ParseUnresolvedName(State *state) {
if (Optional(ParseTwoCharToken(state, "gs")) &&
ParseTwoCharToken(state, "sr") &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") &&
+ ParseSimpleId(state) && ParseSimpleId(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
+// <unresolved-qualifier-level> ::= <simple-id>
+// ::= <substitution> <template-args>
+//
+// The production <substitution> <template-args> is nonstandard but is observed
+// in practice. An upstream discussion on the best shape of <unresolved-name>
+// has not converged:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/38
+static bool ParseUnresolvedQualifierLevel(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ if (ParseSimpleId(state)) return true;
+
+ ParseState copy = state->parse_state;
+ if (ParseSubstitution(state, /*accept_std=*/false) &&
+ ParseTemplateArgs(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <union-selector> ::= _ [<number>]
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+static bool ParseUnionSelector(State *state) {
+ return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr));
+}
+
+// <function-param> ::= fp <(top-level) CV-qualifiers> _
+// ::= fp <(top-level) CV-qualifiers> <number> _
+// ::= fL <number> p <(top-level) CV-qualifiers> _
+// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= fpT # this
+static bool ParseFunctionParam(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ // Function-param expression (level 0).
+ if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Function-param expression (level 1+).
+ if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
+ ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseThreeCharToken(state, "fpT");
+}
+
+// <braced-expression> ::= <expression>
+// ::= di <field source-name> <braced-expression>
+// ::= dx <index expression> <braced-expression>
+// ::= dX <expression> <expression> <braced-expression>
+static bool ParseBracedExpression(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "di") && ParseSourceName(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dx") && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dX") &&
+ ParseExpression(state) && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseExpression(state);
+}
+
// <expression> ::= <1-ary operator-name> <expression>
// ::= <2-ary operator-name> <expression> <expression>
// ::= <3-ary operator-name> <expression> <expression> <expression>
+// ::= pp_ <expression> # ++e; pp <expression> is e++
+// ::= mm_ <expression> # --e; mm <expression> is e--
// ::= cl <expression>+ E
// ::= cp <simple-id> <expression>* E # Clang-specific.
+// ::= so <type> <expression> [<number>] <union-selector>* [p] E
// ::= cv <type> <expression> # type (expression)
// ::= cv <type> _ <expression>* E # type (expr-list)
+// ::= tl <type> <braced-expression>* E
+// ::= il <braced-expression>* E
+// ::= [gs] nw <expression>* _ <type> E
+// ::= [gs] nw <expression>* _ <type> <initializer>
+// ::= [gs] na <expression>* _ <type> E
+// ::= [gs] na <expression>* _ <type> <initializer>
+// ::= [gs] dl <expression>
+// ::= [gs] da <expression>
+// ::= dc <type> <expression>
+// ::= sc <type> <expression>
+// ::= cc <type> <expression>
+// ::= rc <type> <expression>
+// ::= ti <type>
+// ::= te <expression>
// ::= st <type>
+// ::= at <type>
+// ::= az <expression>
+// ::= nx <expression>
// ::= <template-param>
// ::= <function-param>
+// ::= sZ <template-param>
+// ::= sZ <function-param>
+// ::= sP <template-arg>* E
// ::= <expr-primary>
// ::= dt <expression> <unresolved-name> # expr.name
// ::= pt <expression> <unresolved-name> # expr->name
// ::= sp <expression> # argument pack expansion
+// ::= fl <binary operator-name> <expression>
+// ::= fr <binary operator-name> <expression>
+// ::= fL <binary operator-name> <expression> <expression>
+// ::= fR <binary operator-name> <expression> <expression>
+// ::= tw <expression>
+// ::= tr
// ::= sr <type> <unqualified-name> <template-args>
// ::= sr <type> <unqualified-name>
-// <function-param> ::= fp <(top-level) CV-qualifiers> _
-// ::= fp <(top-level) CV-qualifiers> <number> _
-// ::= fL <number> p <(top-level) CV-qualifiers> _
-// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= u <source-name> <template-arg>* E # vendor extension
+// ::= rq <requirement>+ E
+// ::= rQ <bare-function-type> _ <requirement>+ E
static bool ParseExpression(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1686,6 +2315,15 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Preincrement and predecrement. Postincrement and postdecrement are handled
+ // by the operator-name logic later on.
+ if ((ParseThreeCharToken(state, "pp_") ||
+ ParseThreeCharToken(state, "mm_")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Clang-specific "cp <simple-id> <expression>* E"
// https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
@@ -1694,17 +2332,65 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
- // Function-param expression (level 0).
- if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+ if (ParseTwoCharToken(state, "so") && ParseType(state) &&
+ ParseExpression(state) && Optional(ParseNumber(state, nullptr)) &&
+ ZeroOrMore(ParseUnionSelector, state) &&
+ Optional(ParseOneCharToken(state, 'p')) &&
+ ParseOneCharToken(state, 'E')) {
return true;
}
state->parse_state = copy;
- // Function-param expression (level 1+).
- if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
- ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= <function-param>
+ if (ParseFunctionParam(state)) return true;
+ state->parse_state = copy;
+
+ // <expression> ::= tl <type> <braced-expression>* E
+ if (ParseTwoCharToken(state, "tl") && ParseType(state) &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= il <braced-expression>* E
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] nw <expression>* _ <type> E
+ // ::= [gs] nw <expression>* _ <type> <initializer>
+ // ::= [gs] na <expression>* _ <type> E
+ // ::= [gs] na <expression>* _ <type> <initializer>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) &&
+ ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') &&
+ ParseType(state) &&
+ (ParseOneCharToken(state, 'E') || ParseInitializer(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] dl <expression>
+ // ::= [gs] da <expression>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // dynamic_cast, static_cast, const_cast, reinterpret_cast.
+ //
+ // <expression> ::= (dc | sc | cc | rc) <type> <expression>
+ if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') &&
+ ParseType(state) && ParseExpression(state)) {
return true;
}
state->parse_state = copy;
@@ -1746,15 +2432,96 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // typeid(type)
+ if (ParseTwoCharToken(state, "ti") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // typeid(expression)
+ if (ParseTwoCharToken(state, "te") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// sizeof type
if (ParseTwoCharToken(state, "st") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+ // alignof(type)
+ if (ParseTwoCharToken(state, "at") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // alignof(expression), a GNU extension
+ if (ParseTwoCharToken(state, "az") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // noexcept(expression) appearing as an expression in a dependent signature
+ if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack)
+ //
+ // <expression> ::= sZ <template-param>
+ // ::= sZ <function-param>
+ if (ParseTwoCharToken(state, "sZ") &&
+ (ParseFunctionParam(state) || ParseTemplateParam(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack) captured from an alias template
+ //
+ // <expression> ::= sP <template-arg>* E
+ if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Unary folds (... op pack) and (pack op ...).
+ //
+ // <expression> ::= fl <binary operator-name> <expression>
+ // ::= fr <binary operator-name> <expression>
+ if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Binary folds (init op ... op pack) and (pack op ... op init).
+ //
+ // <expression> ::= fL <binary operator-name> <expression> <expression>
+ // ::= fR <binary operator-name> <expression> <expression>
+ if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tw <expression>: throw e
+ if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tr: throw (rethrows an exception from the handler that caught it)
+ if (ParseTwoCharToken(state, "tr")) return true;
+
// Object and pointer member access expressions.
+ //
+ // <expression> ::= (dt | pt) <expression> <unresolved-name>
if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
- ParseExpression(state) && ParseType(state)) {
+ ParseExpression(state) && ParseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
@@ -1774,9 +2541,61 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Vendor extended expressions
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rq <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rQ <bare-function-type> _ <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) &&
+ ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
return ParseUnresolvedName(state);
}
+// <initializer> ::= pi <expression>* E
+// ::= il <braced-expression>* E
+//
+// The il ... E form is not in the ABI spec but is seen in practice for
+// braced-init-lists in new-expressions, which are standard syntax from C++11
+// on.
+static bool ParseInitializer(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <expr-primary> ::= L <type> <(value) number> E
// ::= L <type> <(value) float> E
// ::= L <mangled-name> E
@@ -1819,10 +2638,35 @@ static bool ParseExprPrimary(State *state) {
return false;
}
- // The merged cast production.
- if (ParseOneCharToken(state, 'L') && ParseType(state) &&
- ParseExprCastValue(state)) {
- return true;
+ if (ParseOneCharToken(state, 'L')) {
+ // There are two special cases in which a literal may or must contain a type
+ // without a value. The first is that both LDnE and LDn0E are valid
+ // encodings of nullptr, used in different situations. Recognize LDnE here,
+ // leaving LDn0E to be recognized by the general logic afterward.
+ if (ParseThreeCharToken(state, "DnE")) return true;
+
+ // The second special case is a string literal, currently mangled in C++98
+ // style as LA<length + 1>_KcE. This is inadequate to support C++11 and
+ // later versions, and the discussion of this problem has not converged.
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/64
+ //
+ // For now the bare-type mangling is what's used in practice, so we
+ // recognize this form and only this form if an array type appears here.
+ // Someday we'll probably have to accept a new form of value mangling in
+ // LA...E constructs. (Note also that C++20 allows a wide range of
+ // class-type objects as template arguments, so someday their values will be
+ // mangled and we'll have to recognize them here too.)
+ if (RemainingInput(state)[0] == 'A' /* an array type follows */) {
+ if (ParseType(state) && ParseOneCharToken(state, 'E')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ // The merged cast production.
+ if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) {
+ return true;
+ }
}
state->parse_state = copy;
@@ -1836,7 +2680,7 @@ static bool ParseExprPrimary(State *state) {
}
// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
-static bool ParseExprCastValue(State *state) {
+static bool ParseExprCastValueAndTrailingE(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
// We have to be able to backtrack after accepting a number because we could
@@ -1848,39 +2692,148 @@ static bool ParseExprCastValue(State *state) {
}
state->parse_state = copy;
- if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseFloatNumber(state)) {
+ // <float> for ordinary floating-point types
+ if (ParseOneCharToken(state, 'E')) return true;
+
+ // <float> _ <float> for complex floating-point types
+ if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// Parses `Q <requires-clause expr>`.
+// If parsing fails, applies backtracking to `state`.
+//
+// This function covers two symbols instead of one for convenience,
+// because in LLVM's Itanium ABI mangling grammar, <requires-clause expr>
+// always appears after Q.
+//
+// Does not emit the parsed `requires` clause to simplify the implementation.
+// In other words, these two functions' mangled names will demangle identically:
+//
+// template <typename T>
+// int foo(T) requires IsIntegral<T>;
+//
+// vs.
+//
+// template <typename T>
+// int foo(T);
+static bool ParseQRequiresClauseExpr(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ DisableAppend(state);
+
+ // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) {
+ RestoreAppend(state, copy.append);
+ return true;
+ }
+
+ // also restores append
+ state->parse_state = copy;
+ return false;
+}
+
+// <requirement> ::= X <expression> [N] [R <type-constraint>]
+// <requirement> ::= T <type>
+// <requirement> ::= Q <constraint-expression>
+//
+// <constraint-expression> ::= <expression>
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+static bool ParseRequirement(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
+ Optional(ParseOneCharToken(state, 'N')) &&
+ // This logic backtracks cleanly if we eat an R but a valid type doesn't
+ // follow it.
+ (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) {
return true;
}
state->parse_state = copy;
+ if (ParseOneCharToken(state, 'T') && ParseType(state)) return true;
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true;
+ state->parse_state = copy;
+
return false;
}
+// <type-constraint> ::= <name>
+static bool ParseTypeConstraint(State *state) {
+ return ParseName(state);
+}
+
// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
// ::= Z <(function) encoding> E s [<discriminator>]
+// ::= Z <(function) encoding> E d [<(parameter) number>] _ <name>
//
// Parsing a common prefix of these two productions together avoids an
// exponential blowup of backtracking. Parse like:
// <local-name> := Z <encoding> E <local-name-suffix>
// <local-name-suffix> ::= s [<discriminator>]
+// ::= d [<(parameter) number>] _ <name>
// ::= <name> [<discriminator>]
static bool ParseLocalNameSuffix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // <local-name-suffix> ::= d [<(parameter) number>] _ <name>
+ if (ParseOneCharToken(state, 'd') &&
+ (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) {
+ int number = -1;
+ Optional(ParseNumber(state, &number));
+ if (number < -1 || number > 2147483645) {
+ // Work around overflow cases. We do not expect these outside of a fuzzer
+ // or other source of adversarial input. If we do detect overflow here,
+ // we'll print {default arg#1}.
+ number = -1;
+ }
+ number += 2;
+
+ // The ::{default arg#1}:: infix must be rendered before the lambda itself,
+ // so print this before parsing the rest of the <local-name-suffix>.
+ MaybeAppend(state, "::{default arg#");
+ MaybeAppendDecimal(state, number);
+ MaybeAppend(state, "}::");
+ if (ParseOneCharToken(state, '_') && ParseName(state)) return true;
+
+ // On late parse failure, roll back not only the input but also the output,
+ // whose trailing NUL was overwritten.
+ state->parse_state = copy;
+ if (state->parse_state.append) {
+ state->out[state->parse_state.out_cur_idx] = '\0';
+ }
+ return false;
+ }
+ state->parse_state = copy;
+ // <local-name-suffix> ::= <name> [<discriminator>]
if (MaybeAppend(state, "::") && ParseName(state) &&
Optional(ParseDiscriminator(state))) {
return true;
}
-
- // Since we're not going to overwrite the above "::" by re-parsing the
- // <encoding> (whose trailing '\0' byte was in the byte now holding the
- // first ':'), we have to rollback the "::" if the <name> parse failed.
+ state->parse_state = copy;
if (state->parse_state.append) {
- state->out[state->parse_state.out_cur_idx - 2] = '\0';
+ state->out[state->parse_state.out_cur_idx] = '\0';
}
+ // <local-name-suffix> ::= s [<discriminator>]
return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
}
@@ -1896,12 +2849,22 @@ static bool ParseLocalName(State *state) {
return false;
}
-// <discriminator> := _ <(non-negative) number>
+// <discriminator> := _ <digit>
+// := __ <number (>= 10)> _
static bool ParseDiscriminator(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
+
+ // Both forms start with _ so parse that first.
+ if (!ParseOneCharToken(state, '_')) return false;
+
+ // <digit>
+ if (ParseDigit(state, nullptr)) return true;
+
+ // _ <number> _
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
return true;
}
state->parse_state = copy;
@@ -1947,6 +2910,7 @@ static bool ParseSubstitution(State *state, bool accept_std) {
MaybeAppend(state, p->real_name);
}
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -1972,10 +2936,13 @@ static bool ParseTopLevelMangledName(State *state) {
MaybeAppend(state, RemainingInput(state));
return true;
}
+ ReportHighWaterMark(state);
return false; // Unconsumed suffix.
}
return true;
}
+
+ ReportHighWaterMark(state);
return false;
}
@@ -1985,6 +2952,10 @@ static bool Overflowed(const State *state) {
// The demangler entry point.
bool Demangle(const char* mangled, char* out, size_t out_size) {
+ if (mangled[0] == '_' && mangled[1] == 'R') {
+ return DemangleRustSymbolEncoding(mangled, out, out_size);
+ }
+
State state;
InitState(&state, mangled, out, out_size);
return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.h
index 6acfee2c50..a1166f9e7c 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.h
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle.h
@@ -56,6 +56,9 @@ namespace debugging_internal {
//
// See the unit test for more examples.
//
+// Demangle also recognizes Rust mangled names by delegating the parsing of
+// anything that starts with _R to DemangleRustSymbolEncoding (demangle_rust.h).
+//
// Note: we might want to write demanglers for ABIs other than Itanium
// C++ ABI in the future.
bool Demangle(const char* mangled, char* out, size_t out_size);
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.cc
new file mode 100644
index 0000000000..086e1457f9
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.cc
@@ -0,0 +1,925 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "y_absl/debugging/internal/demangle_rust.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+
+#include "y_absl/base/attributes.h"
+#include "y_absl/base/config.h"
+#include "y_absl/debugging/internal/decode_rust_punycode.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+namespace {
+
+// Same step limit as the C++ demangler in demangle.cc uses.
+constexpr int kMaxReturns = 1 << 17;
+
+bool IsDigit(char c) { return '0' <= c && c <= '9'; }
+bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
+bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
+bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
+bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
+bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
+
+const char* BasicTypeName(char c) {
+ switch (c) {
+ case 'a': return "i8";
+ case 'b': return "bool";
+ case 'c': return "char";
+ case 'd': return "f64";
+ case 'e': return "str";
+ case 'f': return "f32";
+ case 'h': return "u8";
+ case 'i': return "isize";
+ case 'j': return "usize";
+ case 'l': return "i32";
+ case 'm': return "u32";
+ case 'n': return "i128";
+ case 'o': return "u128";
+ case 'p': return "_";
+ case 's': return "i16";
+ case 't': return "u16";
+ case 'u': return "()";
+ case 'v': return "...";
+ case 'x': return "i64";
+ case 'y': return "u64";
+ case 'z': return "!";
+ }
+ return nullptr;
+}
+
+// Parser for Rust symbol mangling v0, whose grammar is defined here:
+//
+// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
+class RustSymbolParser {
+ public:
+ // Prepares to demangle the given encoding, a Rust symbol name starting with
+ // _R, into the output buffer [out, out_end). The caller is expected to
+ // continue by calling the new object's Parse function.
+ RustSymbolParser(const char* encoding, char* out, char* const out_end)
+ : encoding_(encoding), out_(out), out_end_(out_end) {
+ if (out_ != out_end_) *out_ = '\0';
+ }
+
+ // Parses the constructor's encoding argument, writing output into the range
+ // [out, out_end). Returns true on success and false for input whose
+ // structure was not recognized or exceeded implementation limits, such as by
+ // nesting structures too deep. In either case *this should not be used
+ // again.
+ Y_ABSL_MUST_USE_RESULT bool Parse() && {
+ // Recursively parses the grammar production named by callee, then resumes
+ // execution at the next statement.
+ //
+ // Recursive-descent parsing is a beautifully readable translation of a
+ // grammar, but it risks stack overflow if implemented by naive recursion on
+ // the C++ call stack. So we simulate recursion by goto and switch instead,
+ // keeping a bounded stack of "return addresses" in the recursion_stack_
+ // member.
+ //
+ // The callee argument is a statement label. We goto that label after
+ // saving the "return address" on recursion_stack_. The next continue
+ // statement in the for loop below "returns" from this "call".
+ //
+ // The caller argument names the return point. Each value of caller must
+ // appear in only one Y_ABSL_DEMANGLER_RECURSE call and be listed in the
+ // definition of enum ReturnAddress. The switch implements the control
+ // transfer from the end of a "called" subroutine back to the statement
+ // after the "call".
+ //
+ // Note that not all the grammar productions have to be packed into the
+ // switch, but only those which appear in a cycle in the grammar. Anything
+ // acyclic can be written as ordinary functions and function calls, e.g.,
+ // ParseIdentifier.
+#define Y_ABSL_DEMANGLER_RECURSE(callee, caller) \
+ do { \
+ if (recursion_depth_ == kStackSize) return false; \
+ /* The next continue will switch on this saved value ... */ \
+ recursion_stack_[recursion_depth_++] = caller; \
+ goto callee; \
+ /* ... and will land here, resuming the suspended code. */ \
+ case caller: {} \
+ } while (0)
+
+ // Parse the encoding, counting completed recursive calls to guard against
+ // excessively complex input and infinite-loop bugs.
+ int iter = 0;
+ goto whole_encoding;
+ for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
+ // This switch resumes the code path most recently suspended by
+ // Y_ABSL_DEMANGLER_RECURSE.
+ switch (recursion_stack_[--recursion_depth_]) {
+ //
+ // symbol-name ->
+ // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
+ whole_encoding:
+ if (!Eat('_') || !Eat('R')) return false;
+ // decimal-number? is always empty today, so proceed to path, which
+ // can't start with a decimal digit.
+ Y_ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
+ if (IsAlpha(Peek())) {
+ ++silence_depth_; // Print nothing more from here on.
+ Y_ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
+ }
+ switch (Take()) {
+ case '.': case '$': case '\0': return true;
+ }
+ return false; // unexpected trailing content
+
+ // path -> crate-root | inherent-impl | trait-impl | trait-definition |
+ // nested-path | generic-args | backref
+ //
+ // Note that Y_ABSL_DEMANGLER_RECURSE does not work inside a nested switch
+ // (which would hide the generated case label). Thus we jump out of the
+ // inner switch with gotos before performing any fake recursion.
+ path:
+ switch (Take()) {
+ case 'C': goto crate_root;
+ case 'M': goto inherent_impl;
+ case 'X': goto trait_impl;
+ case 'Y': goto trait_definition;
+ case 'N': goto nested_path;
+ case 'I': goto generic_args;
+ case 'B': goto path_backref;
+ default: return false;
+ }
+
+ // crate-root -> C identifier (C consumed above)
+ crate_root:
+ if (!ParseIdentifier()) return false;
+ continue;
+
+ // inherent-impl -> M impl-path type (M already consumed)
+ inherent_impl:
+ if (!Emit("<")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
+ Y_ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // trait-impl -> X impl-path type path (X already consumed)
+ trait_impl:
+ if (!Emit("<")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
+ Y_ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
+ if (!Emit(" as ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // impl-path -> disambiguator? path (but never print it!)
+ impl_path:
+ ++silence_depth_;
+ {
+ int ignored_disambiguator;
+ if (!ParseDisambiguator(ignored_disambiguator)) return false;
+ }
+ Y_ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
+ --silence_depth_;
+ continue;
+
+ // trait-definition -> Y type path (Y already consumed)
+ trait_definition:
+ if (!Emit("<")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
+ if (!Emit(" as ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // nested-path -> N namespace path identifier (N already consumed)
+ // namespace -> lower | upper
+ nested_path:
+ // Uppercase namespaces must be saved on a stack so we can print
+ // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
+ if (IsUpper(Peek())) {
+ if (!PushNamespace(Take())) return false;
+ Y_ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier(PopNamespace())) return false;
+ continue;
+ }
+
+ // Lowercase namespaces, however, are never represented in the output;
+ // they all emit just ::name.
+ if (IsLower(Take())) {
+ Y_ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier()) return false;
+ continue;
+ }
+
+ // Neither upper or lower
+ return false;
+
+ // type -> basic-type | array-type | slice-type | tuple-type |
+ // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
+ // fn-type | dyn-trait-type | path | backref
+ //
+ // We use ifs instead of switch (Take()) because the default case jumps
+ // to path, which will need to see the first character not yet Taken
+ // from the input. Because we do not use a nested switch here,
+ // Y_ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
+ type:
+ if (IsLower(Peek())) {
+ const char* type_name = BasicTypeName(Take());
+ if (type_name == nullptr || !Emit(type_name)) return false;
+ continue;
+ }
+ if (Eat('A')) {
+ // array-type = A type const
+ if (!Emit("[")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kArraySize);
+ if (!Emit("; ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
+ if (!Emit("]")) return false;
+ continue;
+ }
+ if (Eat('S')) {
+ if (!Emit("[")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
+ if (!Emit("]")) return false;
+ continue;
+ }
+ if (Eat('T')) goto tuple_type;
+ if (Eat('R')) {
+ if (!Emit("&")) return false;
+ if (!ParseOptionalLifetime()) return false;
+ goto type;
+ }
+ if (Eat('Q')) {
+ if (!Emit("&mut ")) return false;
+ if (!ParseOptionalLifetime()) return false;
+ goto type;
+ }
+ if (Eat('P')) {
+ if (!Emit("*const ")) return false;
+ goto type;
+ }
+ if (Eat('O')) {
+ if (!Emit("*mut ")) return false;
+ goto type;
+ }
+ if (Eat('F')) goto fn_type;
+ if (Eat('D')) goto dyn_trait_type;
+ if (Eat('B')) goto type_backref;
+ goto path;
+
+ // tuple-type -> T type* E (T already consumed)
+ tuple_type:
+ if (!Emit("(")) return false;
+
+ // The toolchain should call the unit type u instead of TE, but the
+ // grammar and other demanglers also recognize TE, so we do too.
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // A tuple with one element is rendered (type,) instead of (type).
+ Y_ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
+ if (Eat('E')) {
+ if (!Emit(",)")) return false;
+ continue;
+ }
+
+ // A tuple with two elements is of course (x, y).
+ if (!Emit(", ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // And (x, y, z) for three elements.
+ if (!Emit(", ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // For longer tuples we write (x, y, z, ...), printing none of the
+ // content of the fourth and later types. Thus we avoid exhausting
+ // output buffers and human readers' patience when some library has a
+ // long tuple as an implementation detail, without having to
+ // completely obfuscate all tuples.
+ if (!Emit(", ...)")) return false;
+ ++silence_depth_;
+ while (!Eat('E')) {
+ Y_ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
+ }
+ --silence_depth_;
+ continue;
+
+ // fn-type -> F fn-sig (F already consumed)
+ // fn-sig -> binder? U? (K abi)? type* E type
+ // abi -> C | undisambiguated-identifier
+ //
+ // We follow the C++ demangler in suppressing details of function
+ // signatures. Every function type is rendered "fn...".
+ fn_type:
+ if (!Emit("fn...")) return false;
+ ++silence_depth_;
+ if (!ParseOptionalBinder()) return false;
+ (void)Eat('U');
+ if (Eat('K')) {
+ if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
+ }
+ while (!Eat('E')) {
+ Y_ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
+ }
+ Y_ABSL_DEMANGLER_RECURSE(type, kFinishFn);
+ --silence_depth_;
+ continue;
+
+ // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
+ // dyn-bounds -> binder? dyn-trait* E
+ //
+ // The grammar strangely allows an empty trait list, even though the
+ // compiler should never output one. We follow existing demanglers in
+ // rendering DEL_ as "dyn ".
+ //
+ // Because auto traits lengthen a type name considerably without
+ // providing much value to a search for related source code, it would be
+ // desirable to abbreviate
+ // dyn main::Trait + std::marker::Copy + std::marker::Send
+ // to
+ // dyn main::Trait + ...,
+ // eliding the auto traits. But it is difficult to do so correctly, in
+ // part because there is no guarantee that the mangling will list the
+ // main trait first. So we just print all the traits in their order of
+ // appearance in the mangled name.
+ dyn_trait_type:
+ if (!Emit("dyn ")) return false;
+ if (!ParseOptionalBinder()) return false;
+ if (!Eat('E')) {
+ Y_ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
+ while (!Eat('E')) {
+ if (!Emit(" + ")) return false;
+ Y_ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
+ }
+ }
+ if (!ParseRequiredLifetime()) return false;
+ continue;
+
+ // dyn-trait -> path dyn-trait-assoc-binding*
+ // dyn-trait-assoc-binding -> p undisambiguated-identifier type
+ //
+ // We render nonempty binding lists as <>, omitting their contents as
+ // for generic-args.
+ dyn_trait:
+ Y_ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
+ if (Peek() == 'p') {
+ if (!Emit("<>")) return false;
+ ++silence_depth_;
+ while (Eat('p')) {
+ if (!ParseUndisambiguatedIdentifier()) return false;
+ Y_ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
+ }
+ --silence_depth_;
+ }
+ continue;
+
+ // const -> type const-data | p | backref
+ //
+ // const is a C++ keyword, so we use the label `constant` instead.
+ constant:
+ if (Eat('B')) goto const_backref;
+ if (Eat('p')) {
+ if (!Emit("_")) return false;
+ continue;
+ }
+
+ // Scan the type without printing it.
+ //
+ // The Rust language restricts the type of a const generic argument
+ // much more than the mangling grammar does. We do not enforce this.
+ //
+ // We also do not bother printing false, true, 'A', and '\u{abcd}' for
+ // the types bool and char. Because we do not print generic-args
+ // contents, we expect to print constants only in array sizes, and
+ // those should not be bool or char.
+ ++silence_depth_;
+ Y_ABSL_DEMANGLER_RECURSE(type, kConstData);
+ --silence_depth_;
+
+ // const-data -> n? hex-digit* _
+ //
+ // Although the grammar doesn't say this, existing demanglers expect
+ // that zero is 0, not an empty digit sequence, and no nonzero value
+ // may have leading zero digits. Also n0_ is accepted and printed as
+ // -0, though a toolchain will probably never write that encoding.
+ if (Eat('n') && !EmitChar('-')) return false;
+ if (!Emit("0x")) return false;
+ if (Eat('0')) {
+ if (!EmitChar('0')) return false;
+ if (!Eat('_')) return false;
+ continue;
+ }
+ while (IsLowerHexDigit(Peek())) {
+ if (!EmitChar(Take())) return false;
+ }
+ if (!Eat('_')) return false;
+ continue;
+
+ // generic-args -> I path generic-arg* E (I already consumed)
+ //
+ // We follow the C++ demangler in omitting all the arguments from the
+ // output, printing only the list opening and closing tokens.
+ generic_args:
+ Y_ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
+ if (!Emit("::<>")) return false;
+ ++silence_depth_;
+ while (!Eat('E')) {
+ Y_ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
+ }
+ --silence_depth_;
+ continue;
+
+ // generic-arg -> lifetime | type | K const
+ generic_arg:
+ if (Peek() == 'L') {
+ if (!ParseOptionalLifetime()) return false;
+ continue;
+ }
+ if (Eat('K')) goto constant;
+ goto type;
+
+ // backref -> B base-62-number (B already consumed)
+ //
+ // The BeginBackref call parses and range-checks the base-62-number. We
+ // always do that much.
+ //
+ // The recursive call parses and prints what the backref points at. We
+ // save CPU and stack by skipping this work if the output would be
+ // suppressed anyway.
+ path_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ Y_ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
+ }
+ EndBackref();
+ continue;
+
+ // This represents the same backref production as in path_backref but
+ // parses the target as a type instead of a path.
+ type_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ Y_ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
+ }
+ EndBackref();
+ continue;
+
+ const_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ Y_ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
+ }
+ EndBackref();
+ continue;
+ }
+ }
+
+ return false; // hit iteration limit or a bug in our stack handling
+ }
+
+ private:
+ // Enumerates resumption points for Y_ABSL_DEMANGLER_RECURSE calls.
+ enum ReturnAddress : uint8_t {
+ kInstantiatingCrate,
+ kVendorSpecificSuffix,
+ kIdentifierInUppercaseNamespace,
+ kIdentifierInLowercaseNamespace,
+ kInherentImplType,
+ kInherentImplEnding,
+ kTraitImplType,
+ kTraitImplInfix,
+ kTraitImplEnding,
+ kImplPathEnding,
+ kTraitDefinitionInfix,
+ kTraitDefinitionEnding,
+ kArraySize,
+ kFinishArray,
+ kSliceEnding,
+ kAfterFirstTupleElement,
+ kAfterSecondTupleElement,
+ kAfterThirdTupleElement,
+ kAfterSubsequentTupleElement,
+ kContinueParameterList,
+ kFinishFn,
+ kBeginAutoTraits,
+ kContinueAutoTraits,
+ kContinueDynTrait,
+ kContinueAssocBinding,
+ kConstData,
+ kBeginGenericArgList,
+ kContinueGenericArgList,
+ kPathBackrefEnding,
+ kTypeBackrefEnding,
+ kConstantBackrefEnding,
+ };
+
+ // Element counts for the stack arrays. Larger stack sizes accommodate more
+ // deeply nested names at the cost of a larger footprint on the C++ call
+ // stack.
+ enum {
+ // Maximum recursive calls outstanding at one time.
+ kStackSize = 256,
+
+ // Maximum N<uppercase> nested-paths open at once. We do not expect
+ // closures inside closures inside closures as much as functions inside
+ // modules inside other modules, so we can use a smaller array here.
+ kNamespaceStackSize = 64,
+
+ // Maximum number of nested backrefs. We can keep this stack pretty small
+ // because we do not follow backrefs inside generic-args or other contexts
+ // that suppress printing, so deep stacking is unlikely in practice.
+ kPositionStackSize = 16,
+ };
+
+ // Returns the next input character without consuming it.
+ char Peek() const { return encoding_[pos_]; }
+
+ // Consumes and returns the next input character.
+ char Take() { return encoding_[pos_++]; }
+
+ // If the next input character is the given character, consumes it and returns
+ // true; otherwise returns false without consuming a character.
+ Y_ABSL_MUST_USE_RESULT bool Eat(char want) {
+ if (encoding_[pos_] != want) return false;
+ ++pos_;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends c to the output,
+ // writing a fresh NUL terminator afterward, and returns true. Returns false
+ // if the output buffer had less than two bytes free.
+ Y_ABSL_MUST_USE_RESULT bool EmitChar(char c) {
+ if (silence_depth_ > 0) return true;
+ if (out_end_ - out_ < 2) return false;
+ *out_++ = c;
+ *out_ = '\0';
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the C string token
+ // to the output, followed by a NUL character, and returns true. Returns
+ // false if not everything fit into the output buffer.
+ Y_ABSL_MUST_USE_RESULT bool Emit(const char* token) {
+ if (silence_depth_ > 0) return true;
+ const size_t token_length = std::strlen(token);
+ const size_t bytes_to_copy = token_length + 1; // token and final NUL
+ if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
+ std::memcpy(out_, token, bytes_to_copy);
+ out_ += token_length;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the decimal form
+ // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
+ // output, followed by a NUL character, and returns true. Returns false if
+ // not everything fit into the output buffer.
+ Y_ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
+ if (disambiguator < 0) return EmitChar('?'); // parsed but too large
+ if (disambiguator == 0) return EmitChar('0');
+ // Convert disambiguator to decimal text. Three digits per byte is enough
+ // because 999 > 256. The bound will remain correct even if future
+ // maintenance changes the type of the disambiguator variable.
+ char digits[3 * sizeof(disambiguator)] = {};
+ size_t leading_digit_index = sizeof(digits) - 1;
+ for (; disambiguator > 0; disambiguator /= 10) {
+ digits[--leading_digit_index] =
+ static_cast<char>('0' + disambiguator % 10);
+ }
+ return Emit(digits + leading_digit_index);
+ }
+
+ // Consumes an optional disambiguator (s123_) from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. If the optional disambiguator was omitted,
+ // value is 0. On parse failure returns false and sets value to -1.
+ Y_ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
+ value = -1;
+
+ // disambiguator = s base-62-number
+ //
+ // Disambiguators are optional. An omitted disambiguator is zero.
+ if (!Eat('s')) {
+ value = 0;
+ return true;
+ }
+ int base_62_value = 0;
+ if (!ParseBase62Number(base_62_value)) return false;
+ value = base_62_value < 0 ? -1 : base_62_value + 1;
+ return true;
+ }
+
+ // Consumes a base-62 number like _ or 123_ from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. On parse failure returns false and sets
+ // value to -1.
+ Y_ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
+ value = -1;
+
+ // base-62-number = (digit | lower | upper)* _
+ //
+ // An empty base-62 digit sequence means 0.
+ if (Eat('_')) {
+ value = 0;
+ return true;
+ }
+
+ // A nonempty digit sequence denotes its base-62 value plus 1.
+ int encoded_number = 0;
+ bool overflowed = false;
+ while (IsAlpha(Peek()) || IsDigit(Peek())) {
+ const char c = Take();
+ if (encoded_number >= std::numeric_limits<int>::max()/62) {
+ // If we are close to overflowing an int, keep parsing but stop updating
+ // encoded_number and remember to return -1 at the end. The point is to
+ // avoid undefined behavior while parsing crate-root disambiguators,
+ // which are large in practice but not shown in demangling, while
+ // successfully computing closure and shim disambiguators, which are
+ // typically small and are printed out.
+ overflowed = true;
+ } else {
+ int digit;
+ if (IsDigit(c)) {
+ digit = c - '0';
+ } else if (IsLower(c)) {
+ digit = c - 'a' + 10;
+ } else {
+ digit = c - 'A' + 36;
+ }
+ encoded_number = 62 * encoded_number + digit;
+ }
+ }
+
+ if (!Eat('_')) return false;
+ if (!overflowed) value = encoded_number + 1;
+ return true;
+ }
+
+ // Consumes an identifier from the input, returning true on success.
+ //
+ // A nonzero uppercase_namespace specifies the character after the N in a
+ // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
+ // write out the name with the conventional decoration for that namespace.
+ Y_ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
+ // identifier -> disambiguator? undisambiguated-identifier
+ int disambiguator = 0;
+ if (!ParseDisambiguator(disambiguator)) return false;
+
+ return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
+ }
+
+ // Consumes from the input an identifier with no preceding disambiguator,
+ // returning true on success.
+ //
+ // When ParseIdentifier calls this, it passes the N<namespace> character and
+ // disambiguator value so that "{closure#42}" and similar forms can be
+ // rendered correctly.
+ //
+ // At other appearances of undisambiguated-identifier in the grammar, this
+ // treatment is not applicable, and the call site omits both arguments.
+ Y_ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
+ char uppercase_namespace = '\0', int disambiguator = 0) {
+ // undisambiguated-identifier -> u? decimal-number _? bytes
+ const bool is_punycoded = Eat('u');
+ if (!IsDigit(Peek())) return false;
+ int num_bytes = 0;
+ if (!ParseDecimalNumber(num_bytes)) return false;
+ (void)Eat('_'); // optional separator, needed if a digit follows
+ if (is_punycoded) {
+ DecodeRustPunycodeOptions options;
+ options.punycode_begin = &encoding_[pos_];
+ options.punycode_end = &encoding_[pos_] + num_bytes;
+ options.out_begin = out_;
+ options.out_end = out_end_;
+ out_ = DecodeRustPunycode(options);
+ if (out_ == nullptr) return false;
+ pos_ += static_cast<size_t>(num_bytes);
+ }
+
+ // Emit the beginnings of braced forms like {shim:vtable#0}.
+ if (uppercase_namespace != '\0') {
+ switch (uppercase_namespace) {
+ case 'C':
+ if (!Emit("{closure")) return false;
+ break;
+ case 'S':
+ if (!Emit("{shim")) return false;
+ break;
+ default:
+ if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
+ break;
+ }
+ if (num_bytes > 0 && !Emit(":")) return false;
+ }
+
+ // Emit the name itself.
+ if (!is_punycoded) {
+ for (int i = 0; i < num_bytes; ++i) {
+ const char c = Take();
+ if (!IsIdentifierChar(c) &&
+ // The spec gives toolchains the choice of Punycode or raw UTF-8 for
+ // identifiers containing code points above 0x7f, so accept bytes
+ // with the high bit set.
+ (c & 0x80) == 0) {
+ return false;
+ }
+ if (!EmitChar(c)) return false;
+ }
+ }
+
+ // Emit the endings of braced forms, e.g., "#42}".
+ if (uppercase_namespace != '\0') {
+ if (!EmitChar('#')) return false;
+ if (!EmitDisambiguator(disambiguator)) return false;
+ if (!EmitChar('}')) return false;
+ }
+
+ return true;
+ }
+
+ // Consumes a decimal number like 0 or 123 from the input. On success returns
+ // true and fills value with the encoded value. If the encoded value is too
+ // large or otherwise unparsable, returns false and sets value to -1.
+ Y_ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
+ value = -1;
+ if (!IsDigit(Peek())) return false;
+ int encoded_number = Take() - '0';
+ if (encoded_number == 0) {
+ // Decimal numbers are never encoded with extra leading zeroes.
+ value = 0;
+ return true;
+ }
+ while (IsDigit(Peek()) &&
+ // avoid overflow
+ encoded_number < std::numeric_limits<int>::max()/10) {
+ encoded_number = 10 * encoded_number + (Take() - '0');
+ }
+ if (IsDigit(Peek())) return false; // too big
+ value = encoded_number;
+ return true;
+ }
+
+ // Consumes a binder of higher-ranked lifetimes if one is present. On success
+ // returns true and discards the encoded lifetime count. On parse failure
+ // returns false.
+ Y_ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
+ // binder -> G base-62-number
+ if (!Eat('G')) return true;
+ int ignored_binding_count;
+ return ParseBase62Number(ignored_binding_count);
+ }
+
+ // Consumes a lifetime if one is present.
+ //
+ // On success returns true and discards the lifetime index. We do not print
+ // or even range-check lifetimes because they are a finer detail than other
+ // things we omit from output, such as the entire contents of generic-args.
+ //
+ // On parse failure returns false.
+ Y_ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
+ // lifetime -> L base-62-number
+ if (!Eat('L')) return true;
+ int ignored_de_bruijn_index;
+ return ParseBase62Number(ignored_de_bruijn_index);
+ }
+
+ // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
+ // there is no lifetime here.
+ Y_ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
+ if (Peek() != 'L') return false;
+ return ParseOptionalLifetime();
+ }
+
+ // Pushes ns onto the namespace stack and returns true if the stack is not
+ // full, else returns false.
+ Y_ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
+ if (namespace_depth_ == kNamespaceStackSize) return false;
+ namespace_stack_[namespace_depth_++] = ns;
+ return true;
+ }
+
+ // Pops the last pushed namespace. Requires that the namespace stack is not
+ // empty (namespace_depth_ > 0).
+ char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
+
+ // Pushes position onto the position stack and returns true if the stack is
+ // not full, else returns false.
+ Y_ABSL_MUST_USE_RESULT bool PushPosition(int position) {
+ if (position_depth_ == kPositionStackSize) return false;
+ position_stack_[position_depth_++] = position;
+ return true;
+ }
+
+ // Pops the last pushed input position. Requires that the position stack is
+ // not empty (position_depth_ > 0).
+ int PopPosition() { return position_stack_[--position_depth_]; }
+
+ // Consumes a base-62-number denoting a backref target, pushes the current
+ // input position on the data stack, and sets the input position to the
+ // beginning of the backref target. Returns true on success. Returns false
+ // if parsing failed, the stack is exhausted, or the backref target position
+ // is out of range.
+ Y_ABSL_MUST_USE_RESULT bool BeginBackref() {
+ // backref = B base-62-number (B already consumed)
+ //
+ // Reject backrefs that don't parse, overflow int, or don't point backward.
+ // If the offset looks fine, adjust it to account for the _R prefix.
+ int offset = 0;
+ const int offset_of_this_backref =
+ pos_ - 2 /* _R */ - 1 /* B already consumed */;
+ if (!ParseBase62Number(offset) || offset < 0 ||
+ offset >= offset_of_this_backref) {
+ return false;
+ }
+ offset += 2;
+
+ // Save the old position to restore later.
+ if (!PushPosition(pos_)) return false;
+
+ // Move the input position to the backref target.
+ //
+ // Note that we do not check whether the new position points to the
+ // beginning of a construct matching the context in which the backref
+ // appeared. We just jump to it and see whether nested parsing succeeds.
+ // We therefore accept various wrong manglings, e.g., a type backref
+ // pointing to an 'l' character inside an identifier, which happens to mean
+ // i32 when parsed as a type mangling. This saves the complexity and RAM
+ // footprint of remembering which offsets began which kinds of
+ // substructures. Existing demanglers take similar shortcuts.
+ pos_ = offset;
+ return true;
+ }
+
+ // Cleans up after a backref production by restoring the previous input
+ // position from the data stack.
+ void EndBackref() { pos_ = PopPosition(); }
+
+ // The leftmost recursion_depth_ elements of recursion_stack_ contain the
+ // ReturnAddresses pushed by Y_ABSL_DEMANGLER_RECURSE calls not yet completed.
+ ReturnAddress recursion_stack_[kStackSize] = {};
+ int recursion_depth_ = 0;
+
+ // The leftmost namespace_depth_ elements of namespace_stack_ contain the
+ // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
+ // closure.
+ char namespace_stack_[kNamespaceStackSize] = {};
+ int namespace_depth_ = 0;
+
+ // The leftmost position_depth_ elements of position_stack_ contain the input
+ // positions to return to after fully printing the targets of backrefs.
+ int position_stack_[kPositionStackSize] = {};
+ int position_depth_ = 0;
+
+ // Anything parsed while silence_depth_ > 0 contributes nothing to the
+ // demangled output. For constructs omitted from the demangling, such as
+ // impl-path and the contents of generic-args, we will increment
+ // silence_depth_ on the way in and decrement silence_depth_ on the way out.
+ int silence_depth_ = 0;
+
+ // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
+ // the next input character to be scanned.
+ int pos_ = 0;
+ const char* encoding_ = nullptr;
+
+ // Output: *out_ is where the next output character should be written, and
+ // out_end_ points past the last byte of available space.
+ char* out_ = nullptr;
+ char* out_end_ = nullptr;
+};
+
+} // namespace
+
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ size_t out_size) {
+ return RustSymbolParser(mangled, out, out + out_size).Parse();
+}
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.h
new file mode 100644
index 0000000000..70ec90d3b9
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/demangle_rust.h
@@ -0,0 +1,42 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef Y_ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+#define Y_ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+
+#include <cstddef>
+
+#include "y_absl/base/config.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+// Demangle the Rust encoding `mangled`. On success, return true and write the
+// demangled symbol name to `out`. Otherwise, return false, leaving unspecified
+// contents in `out`. For example, calling DemangleRustSymbolEncoding with
+// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`,
+// provided `out_size` is large enough for that value and its trailing NUL.
+//
+// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++
+// call-stack space. It is suitable for symbolizing stack traces in a signal
+// handler.
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ size_t out_size);
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
+
+#endif // Y_ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.cc
index cead525c93..f9fc638597 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.cc
@@ -20,8 +20,11 @@
#ifdef Y_ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
#include <string.h>
+
#include <cassert>
#include <cstddef>
+#include <cstdint>
+
#include "y_absl/base/config.h"
#include "y_absl/base/internal/raw_logging.h"
@@ -86,20 +89,14 @@ ElfMemImage::ElfMemImage(const void *base) {
Init(base);
}
-int ElfMemImage::GetNumSymbols() const {
- if (!hash_) {
- return 0;
- }
- // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
- return static_cast<int>(hash_[1]);
-}
+uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
-const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
+const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
Y_ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return dynsym_ + index;
}
-const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
+const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
Y_ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return versym_ + index;
}
@@ -154,7 +151,7 @@ void ElfMemImage::Init(const void *base) {
dynstr_ = nullptr;
versym_ = nullptr;
verdef_ = nullptr;
- hash_ = nullptr;
+ num_syms_ = 0;
strsize_ = 0;
verdefnum_ = 0;
// Sentinel: PT_LOAD .p_vaddr can't possibly be this.
@@ -219,12 +216,17 @@ void ElfMemImage::Init(const void *base) {
base_as_char - reinterpret_cast<const char *>(link_base_);
ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
+ uint32_t *sysv_hash = nullptr;
+ uint32_t *gnu_hash = nullptr;
for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
const auto value =
static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
switch (dynamic_entry->d_tag) {
case DT_HASH:
- hash_ = reinterpret_cast<ElfW(Word) *>(value);
+ sysv_hash = reinterpret_cast<uint32_t *>(value);
+ break;
+ case DT_GNU_HASH:
+ gnu_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_SYMTAB:
dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
@@ -249,13 +251,38 @@ void ElfMemImage::Init(const void *base) {
break;
}
}
- if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
+ if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
!verdef_ || !verdefnum_ || !strsize_) {
assert(false); // invalid VDSO
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
+ if (sysv_hash) {
+ num_syms_ = sysv_hash[1];
+ } else {
+ assert(gnu_hash);
+ // Compute the number of symbols for DT_GNU_HASH, which is specified by
+ // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
+ uint32_t nbuckets = gnu_hash[0];
+ // The buckets array is located after the header (4 uint32) and the bloom
+ // filter (size_t array of gnu_hash[2] elements).
+ uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
+ // Find the chain of the last non-empty bucket.
+ uint32_t idx = 0;
+ for (uint32_t i = nbuckets; i > 0;) {
+ idx = buckets[--i];
+ if (idx != 0) break;
+ }
+ if (idx != 0) {
+ // Find the last element of the chain, which has an odd value.
+ // Add one to get the number of symbols.
+ uint32_t *chain = buckets + nbuckets - gnu_hash[1];
+ while (chain[idx++] % 2 == 0) {
+ }
+ }
+ num_syms_ = idx;
+ }
}
bool ElfMemImage::LookupSymbol(const char *name,
@@ -300,9 +327,9 @@ bool ElfMemImage::LookupSymbolByAddress(const void *address,
return false;
}
-ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
- : index_(index), image_(image) {
-}
+ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
+ uint32_t index)
+ : index_(index), image_(image) {}
const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
return &info_;
@@ -335,7 +362,7 @@ ElfMemImage::SymbolIterator ElfMemImage::end() const {
return SymbolIterator(this, GetNumSymbols());
}
-void ElfMemImage::SymbolIterator::Update(int increment) {
+void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
Y_ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
if (!image->IsPresent()) {
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.h
index 9c5e588fbb..4a3ee56edf 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.h
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/elf_mem_image.h
@@ -22,6 +22,7 @@
// Including this will define the __GLIBC__ macro if glibc is being
// used.
#include <climits>
+#include <cstdint>
#include "y_absl/base/config.h"
@@ -82,10 +83,10 @@ class ElfMemImage {
bool operator!=(const SymbolIterator &rhs) const;
bool operator==(const SymbolIterator &rhs) const;
private:
- SymbolIterator(const void *const image, int index);
- void Update(int incr);
+ SymbolIterator(const void *const image, uint32_t index);
+ void Update(uint32_t incr);
SymbolInfo info_;
- int index_;
+ uint32_t index_;
const void *const image_;
};
@@ -94,14 +95,14 @@ class ElfMemImage {
void Init(const void *base);
bool IsPresent() const { return ehdr_ != nullptr; }
const ElfW(Phdr)* GetPhdr(int index) const;
- const ElfW(Sym)* GetDynsym(int index) const;
- const ElfW(Versym)* GetVersym(int index) const;
+ const ElfW(Sym) * GetDynsym(uint32_t index) const;
+ const ElfW(Versym)* GetVersym(uint32_t index) const;
const ElfW(Verdef)* GetVerdef(int index) const;
const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
const char* GetDynstr(ElfW(Word) offset) const;
const void* GetSymAddr(const ElfW(Sym) *sym) const;
const char* GetVerstr(ElfW(Word) offset) const;
- int GetNumSymbols() const;
+ uint32_t GetNumSymbols() const;
SymbolIterator begin() const;
SymbolIterator end() const;
@@ -124,8 +125,8 @@ class ElfMemImage {
const ElfW(Sym) *dynsym_;
const ElfW(Versym) *versym_;
const ElfW(Verdef) *verdef_;
- const ElfW(Word) *hash_;
const char *dynstr_;
+ uint32_t num_syms_;
size_t strsize_;
size_t verdefnum_;
ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD).
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/stacktrace_aarch64-inl.inc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/stacktrace_aarch64-inl.inc
index 7c7c0304e3..1bf78f03e1 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/stacktrace_aarch64-inl.inc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/stacktrace_aarch64-inl.inc
@@ -89,6 +89,8 @@ struct StackInfo {
static bool InsideSignalStack(void** ptr, const StackInfo* stack_info) {
uintptr_t comparable_ptr = reinterpret_cast<uintptr_t>(ptr);
+ if (stack_info->sig_stack_high == kUnknownStackEnd)
+ return false;
return (comparable_ptr >= stack_info->sig_stack_low &&
comparable_ptr < stack_info->sig_stack_high);
}
@@ -122,13 +124,6 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc,
if (pre_signal_frame_pointer >= old_frame_pointer) {
new_frame_pointer = pre_signal_frame_pointer;
}
- // Check that alleged frame pointer is actually readable. This is to
- // prevent "double fault" in case we hit the first fault due to e.g.
- // stack corruption.
- if (!y_absl::debugging_internal::AddressIsReadable(
- new_frame_pointer))
- return nullptr;
- }
}
#endif
@@ -136,6 +131,14 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc,
if ((reinterpret_cast<uintptr_t>(new_frame_pointer) & 7) != 0)
return nullptr;
+ // Check that alleged frame pointer is actually readable. This is to
+ // prevent "double fault" in case we hit the first fault due to e.g.
+ // stack corruption.
+ if (!y_absl::debugging_internal::AddressIsReadable(
+ new_frame_pointer))
+ return nullptr;
+ }
+
// Only check the size if both frames are in the same stack.
if (InsideSignalStack(new_frame_pointer, stack_info) ==
InsideSignalStack(old_frame_pointer, stack_info)) {
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.cc
new file mode 100644
index 0000000000..ed158c62b3
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.cc
@@ -0,0 +1,70 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "y_absl/debugging/internal/utf8_for_code_point.h"
+
+#include <cstdint>
+
+#include "y_absl/base/config.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+// UTF-8 encoding bounds.
+constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff;
+constexpr uint32_t kMax1ByteCodePoint = 0x7f;
+constexpr uint32_t kMax2ByteCodePoint = 0x7ff;
+constexpr uint32_t kMax3ByteCodePoint = 0xffff;
+constexpr uint32_t kMaxCodePoint = 0x10ffff;
+
+} // namespace
+
+Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) {
+ if (code_point <= kMax1ByteCodePoint) {
+ length = 1;
+ bytes[0] = static_cast<char>(code_point);
+ return;
+ }
+
+ if (code_point <= kMax2ByteCodePoint) {
+ length = 2;
+ bytes[0] = static_cast<char>(0xc0 | (code_point >> 6));
+ bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f));
+ return;
+ }
+
+ if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return;
+
+ if (code_point <= kMax3ByteCodePoint) {
+ length = 3;
+ bytes[0] = static_cast<char>(0xe0 | (code_point >> 12));
+ bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
+ bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f));
+ return;
+ }
+
+ if (code_point > kMaxCodePoint) return;
+
+ length = 4;
+ bytes[0] = static_cast<char>(0xf0 | (code_point >> 18));
+ bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f));
+ bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
+ bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f));
+}
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.h b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.h
new file mode 100644
index 0000000000..aa0dc65b82
--- /dev/null
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/debugging/internal/utf8_for_code_point.h
@@ -0,0 +1,47 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef Y_ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
+#define Y_ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
+
+#include <cstdint>
+
+#include "y_absl/base/config.h"
+
+namespace y_absl {
+Y_ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+struct Utf8ForCodePoint {
+ // Converts a Unicode code point to the corresponding UTF-8 byte sequence.
+ // Async-signal-safe to support use in symbolizing stack traces from a signal
+ // handler.
+ explicit Utf8ForCodePoint(uint64_t code_point);
+
+ // Returns true if the constructor's code_point argument was valid.
+ bool ok() const { return length != 0; }
+
+ // If code_point was in range, then 1 <= length <= 4, and the UTF-8 encoding
+ // is found in bytes[0 .. (length - 1)]. If code_point was invalid, then
+ // length == 0. In either case, the contents of bytes[length .. 3] are
+ // unspecified.
+ char bytes[4] = {};
+ uint32_t length = 0;
+};
+
+} // namespace debugging_internal
+Y_ABSL_NAMESPACE_END
+} // namespace y_absl
+
+#endif // Y_ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_