Update contrib/restricted/abseil-cpp-tstring to 20240722.0

83a5727000e16bc5a94523a0cf1cce75fa86a191
author: thegeorg <thegeorg@yandex-team.com> 2024-08-06 11:28:07 +0300
committer: thegeorg <thegeorg@yandex-team.com> 2024-08-06 12:50:21 +0300
commit: de4d7efd8871b850e3ea79164d7661e2299836b7 (patch)
tree: 47d8cf597b3789a807a4b1cec0a9fd66788767c2 /contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal
parent: e003b4c129e1381591dcb75a96bf9a970b2b47fb (diff)
download: ydb-de4d7efd8871b850e3ea79164d7661e2299836b7.tar.gz
7 files changed, 67 insertions, 54 deletions
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc32_x86_arm_combined_simd.h b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc32_x86_arm_combined_simd.h
index 0e53b0f573..c1bbdf498f 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc32_x86_arm_combined_simd.h
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc32_x86_arm_combined_simd.h
@@ -33,14 +33,15 @@
 #include <x86intrin.h>
 #define Y_ABSL_CRC_INTERNAL_HAVE_X86_SIMD
 
-#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__)
+#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) && \
+    defined(_M_AMD64)
 
 // MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ.
 #include <intrin.h>
 #define Y_ABSL_CRC_INTERNAL_HAVE_X86_SIMD
 
-#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
-    defined(__ARM_FEATURE_CRC32) && defined(Y_ABSL_INTERNAL_HAVE_ARM_NEON) &&  \
+#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) &&                 \
+    defined(__ARM_FEATURE_CRC32) && defined(Y_ABSL_INTERNAL_HAVE_ARM_NEON) && \
     defined(__ARM_FEATURE_CRYPTO)
 
 #include <arm_acle.h>
@@ -101,10 +102,11 @@ V128 V128_Xor(const V128 l, const V128 r);
 // Produces an AND operation of |l| and |r|.
 V128 V128_And(const V128 l, const V128 r);
 
-// Sets two 64 bit integers to one 128 bit vector. The order is reverse.
+// Sets the lower half of a 128 bit register to the given 64-bit value and
+// zeroes the upper half.
 // dst[63:0] := |r|
-// dst[127:64] := |l|
-V128 V128_From2x64(const uint64_t l, const uint64_t r);
+// dst[127:64] := |0|
+V128 V128_From64WithZeroFill(const uint64_t r);
 
 // Shift |l| right by |imm| bytes while shifting in zeros.
 template <int imm>
@@ -121,8 +123,8 @@ uint64_t V128_Extract64(const V128 l);
 // Extracts the low 64 bits from V128.
 int64_t V128_Low64(const V128 l);
 
-// Left-shifts packed 64-bit integers in l by r.
-V128 V128_ShiftLeft64(const V128 l, const V128 r);
+// Add packed 64-bit integers in |l| and |r|.
+V128 V128_Add64(const V128 l, const V128 r);
 
 #endif
 
@@ -170,8 +172,8 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return _mm_xor_si128(l, r); }
 
 inline V128 V128_And(const V128 l, const V128 r) { return _mm_and_si128(l, r); }
 
-inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
-  return _mm_set_epi64x(static_cast<int64_t>(l), static_cast<int64_t>(r));
+inline V128 V128_From64WithZeroFill(const uint64_t r) {
+  return _mm_set_epi64x(static_cast<int64_t>(0), static_cast<int64_t>(r));
 }
 
 template <int imm>
@@ -191,8 +193,8 @@ inline uint64_t V128_Extract64(const V128 l) {
 
 inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); }
 
-inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
-  return _mm_sll_epi64(l, r);
+inline V128 V128_Add64(const V128 l, const V128 r) {
+  return _mm_add_epi64(l, r);
 }
 
 #elif defined(Y_ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
@@ -261,10 +263,12 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return veorq_u64(l, r); }
 
 inline V128 V128_And(const V128 l, const V128 r) { return vandq_u64(l, r); }
 
-inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
-  return vcombine_u64(vcreate_u64(r), vcreate_u64(l));
+inline V128 V128_From64WithZeroFill(const uint64_t r){
+  constexpr uint64x2_t kZero = {0, 0};
+  return vsetq_lane_u64(r, kZero, 0);
 }
 
+
 template <int imm>
 inline V128 V128_ShiftRight(const V128 l) {
   return vreinterpretq_u64_s8(
@@ -285,9 +289,7 @@ inline int64_t V128_Low64(const V128 l) {
   return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0);
 }
 
-inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
-  return vshlq_u64(l, vreinterpretq_s64_u64(r));
-}
+inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); }
 
 #endif
 
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_cord_state.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_cord_state.cc
index 695f2db14a..71a4088dec 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_cord_state.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_cord_state.cc
@@ -17,6 +17,7 @@
 #include <cassert>
 
 #include "y_absl/base/config.h"
+#include "y_absl/base/no_destructor.h"
 #include "y_absl/numeric/bits.h"
 
 namespace y_absl {
@@ -24,14 +25,14 @@ Y_ABSL_NAMESPACE_BEGIN
 namespace crc_internal {
 
 CrcCordState::RefcountedRep* CrcCordState::RefSharedEmptyRep() {
-  static CrcCordState::RefcountedRep* empty = new CrcCordState::RefcountedRep;
+  static y_absl::NoDestructor<CrcCordState::RefcountedRep> empty;
 
   assert(empty->count.load(std::memory_order_relaxed) >= 1);
   assert(empty->rep.removed_prefix.length == 0);
   assert(empty->rep.prefix_crc.empty());
 
-  Ref(empty);
-  return empty;
+  Ref(empty.get());
+  return empty.get();
 }
 
 CrcCordState::CrcCordState() : refcounted_rep_(new RefcountedRep) {}
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_fallback.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_fallback.cc
index cf03a5e10c..5b0cac2542 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_fallback.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_fallback.cc
@@ -12,12 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <cstdint>
+#include <cstring>
 #include <memory>
 
 #include "y_absl/base/config.h"
 #include "y_absl/crc/crc32c.h"
 #include "y_absl/crc/internal/crc_memcpy.h"
+#include "y_absl/strings/string_view.h"
 
 namespace y_absl {
 Y_ABSL_NAMESPACE_BEGIN
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_x86_arm_combined.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_x86_arm_combined.cc
index e88e235cd4..ab65d3f8a0 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_x86_arm_combined.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_memcpy_x86_arm_combined.cc
@@ -52,6 +52,7 @@
 #include <cstring>
 #include <memory>
 
+#include "y_absl/base/attributes.h"
 #include "y_absl/base/config.h"
 #include "y_absl/base/optimization.h"
 #include "y_absl/base/prefetch.h"
@@ -88,9 +89,11 @@ inline crc32c_t ShortCrcCopy(char* dst, const char* src, std::size_t length,
 constexpr size_t kIntLoadsPerVec = sizeof(V128) / sizeof(uint64_t);
 
 // Common function for copying the tails of multiple large regions.
+// Disable ubsan for benign unaligned access. See b/254108538.
 template <size_t vec_regions, size_t int_regions>
-inline void LargeTailCopy(crc32c_t* crcs, char** dst, const char** src,
-                          size_t region_size, size_t copy_rounds) {
+Y_ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED inline void LargeTailCopy(
+    crc32c_t* crcs, char** dst, const char** src, size_t region_size,
+    size_t copy_rounds) {
   std::array<V128, vec_regions> data;
   std::array<uint64_t, kIntLoadsPerVec * int_regions> int_data;
 
@@ -127,8 +130,8 @@ inline void LargeTailCopy(crc32c_t* crcs, char** dst, const char** src,
         size_t data_index = i * kIntLoadsPerVec + j;
 
         int_data[data_index] = *(usrc + j);
-        crcs[region] = crc32c_t{static_cast<uint32_t>(CRC32_u64(
-            static_cast<uint32_t>(crcs[region]), int_data[data_index]))};
+        crcs[region] = crc32c_t{CRC32_u64(static_cast<uint32_t>(crcs[region]),
+                                          int_data[data_index])};
 
         *(udst + j) = int_data[data_index];
       }
@@ -155,8 +158,10 @@ class AcceleratedCrcMemcpyEngine : public CrcMemcpyEngine {
                    std::size_t length, crc32c_t initial_crc) const override;
 };
 
+// Disable ubsan for benign unaligned access. See b/254108538.
 template <size_t vec_regions, size_t int_regions>
-crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
+Y_ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED crc32c_t
+AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
     void* __restrict dst, const void* __restrict src, std::size_t length,
     crc32c_t initial_crc) const {
   constexpr std::size_t kRegions = vec_regions + int_regions;
@@ -196,7 +201,6 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
 
   // Start work on the CRC: undo the XOR from the previous calculation or set up
   // the initial value of the CRC.
-  // initial_crc ^= kCrcDataXor;
   initial_crc = crc32c_t{static_cast<uint32_t>(initial_crc) ^ kCrcDataXor};
 
   // Do an initial alignment copy, so we can use aligned store instructions to
@@ -295,8 +299,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
 
           // Load and CRC the data.
           int_data[data_index] = *(usrc + i * kIntLoadsPerVec + k);
-          crcs[region] = crc32c_t{static_cast<uint32_t>(CRC32_u64(
-              static_cast<uint32_t>(crcs[region]), int_data[data_index]))};
+          crcs[region] = crc32c_t{CRC32_u64(static_cast<uint32_t>(crcs[region]),
+                                            int_data[data_index])};
 
           // Store the data.
           *(udst + i * kIntLoadsPerVec + k) = int_data[data_index];
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_non_temporal_memcpy.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_non_temporal_memcpy.cc
index e73e6487cf..78dec49f28 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_non_temporal_memcpy.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_non_temporal_memcpy.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <cstdint>
+#include <cstddef>
 
 #include "y_absl/base/config.h"
 #include "y_absl/crc/crc32c.h"
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_x86_arm_combined.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_x86_arm_combined.cc
index d72151a5f1..88a953efad 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_x86_arm_combined.cc
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/crc_x86_arm_combined.cc
@@ -101,13 +101,17 @@ constexpr size_t kMediumCutoff = 2048;
 namespace {
 
 uint32_t multiply(uint32_t a, uint32_t b) {
-  V128 shifts = V128_From2x64(0, 1);
-  V128 power = V128_From2x64(0, a);
-  V128 crc = V128_From2x64(0, b);
+  V128 power = V128_From64WithZeroFill(a);
+  V128 crc = V128_From64WithZeroFill(b);
   V128 res = V128_PMulLow(power, crc);
 
-  // Combine crc values
-  res = V128_ShiftLeft64(res, shifts);
+  // Combine crc values.
+  //
+  // Adding res to itself is equivalent to multiplying by 2,
+  // or shifting left by 1. Addition is used as not all compilers
+  // are able to generate optimal code without this hint.
+  // https://godbolt.org/z/rr3fMnf39
+  res = V128_Add64(res, res);
   return static_cast<uint32_t>(V128_Extract32<1>(res)) ^
          CRC32_u32(0, static_cast<uint32_t>(V128_Low64(res)));
 }
@@ -444,11 +448,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
 
         V128 magic = *(reinterpret_cast<const V128*>(kClmulConstants) + bs - 1);
 
-        V128 tmp = V128_From2x64(0, l64);
+        V128 tmp = V128_From64WithZeroFill(l64);
 
         V128 res1 = V128_PMulLow(tmp, magic);
 
-        tmp = V128_From2x64(0, l641);
+        tmp = V128_From64WithZeroFill(l641);
 
         V128 res2 = V128_PMul10(tmp, magic);
         V128 x = V128_Xor(res1, res2);
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/non_temporal_memcpy.h b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/non_temporal_memcpy.h
index fce0007046..80f3671424 100644
--- a/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/non_temporal_memcpy.h
+++ b/contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal/non_temporal_memcpy.h
@@ -19,19 +19,8 @@
 #include <intrin.h>
 #endif
 
-#ifdef __SSE__
-#include <xmmintrin.h>
-#endif
-
-#ifdef __SSE2__
-#include <emmintrin.h>
-#endif
-
-#ifdef __SSE3__
-#include <pmmintrin.h>
-#endif
-
-#ifdef __AVX__
+#if defined(__SSE__) || defined(__AVX__)
+// Pulls in both SSE and AVX intrinsics.
 #include <immintrin.h>
 #endif
 
@@ -44,6 +33,7 @@
 #include <cstdint>
 #include <cstring>
 
+#include "y_absl/base/attributes.h"
 #include "y_absl/base/config.h"
 #include "y_absl/base/optimization.h"
 
@@ -57,7 +47,9 @@ namespace crc_internal {
 // memcpy can save 1 DRAM load of the destination cacheline.
 constexpr size_t kCacheLineSize = Y_ABSL_CACHELINE_SIZE;
 
-// If the objects overlap, the behavior is undefined.
+// If the objects overlap, the behavior is undefined. Uses regular memcpy
+// instead of non-temporal memcpy if the required CPU intrinsics are unavailable
+// at compile time.
 inline void *non_temporal_store_memcpy(void *__restrict dst,
                                        const void *__restrict src, size_t len) {
 #if defined(__SSE3__) || defined(__aarch64__) || \
@@ -119,10 +111,20 @@ inline void *non_temporal_store_memcpy(void *__restrict dst,
 #endif  // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__)
 }
 
+// If the objects overlap, the behavior is undefined. Uses regular memcpy
+// instead of non-temporal memcpy if the required CPU intrinsics are unavailable
+// at compile time.
+#if Y_ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && \
+    (defined(__x86_64__) || defined(__i386__))
+[[gnu::target("avx")]]
+#endif
 inline void *non_temporal_store_memcpy_avx(void *__restrict dst,
                                            const void *__restrict src,
                                            size_t len) {
-#ifdef __AVX__
+  // This function requires AVX. For clang and gcc we compile it with AVX even
+  // if the translation unit isn't built with AVX support. This works because we
+  // only select this implementation at runtime if the CPU supports AVX.
+#if defined(__SSE3__) || (defined(_MSC_VER) && defined(__AVX__))
   uint8_t *d = reinterpret_cast<uint8_t *>(dst);
   const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
 
@@ -168,9 +170,8 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst,
   }
   return dst;
 #else
-  // Fallback to regular memcpy when AVX is not available.
   return memcpy(dst, src, len);
-#endif  // __AVX__
+#endif  // __SSE3__ || (_MSC_VER && __AVX__)
 }
 
 }  // namespace crc_internal
author	thegeorg <thegeorg@yandex-team.com>	2024-08-06 11:28:07 +0300
committer	thegeorg <thegeorg@yandex-team.com>	2024-08-06 12:50:21 +0300
commit	de4d7efd8871b850e3ea79164d7661e2299836b7 (patch)
tree	47d8cf597b3789a807a4b1cec0a9fd66788767c2 /contrib/restricted/abseil-cpp-tstring/y_absl/crc/internal
parent	e003b4c129e1381591dcb75a96bf9a970b2b47fb (diff)
download	ydb-de4d7efd8871b850e3ea79164d7661e2299836b7.tar.gz