aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/crcutil/crc32c_sse4.h
diff options
context:
space:
mode:
authorf0b0s <f0b0s@yandex-team.ru>2022-02-10 16:46:51 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:51 +0300
commitcdae02d225fb5b3afbb28990e79a7ac6c9125327 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/crcutil/crc32c_sse4.h
parentdeabc5260ac2e17b8f5152ee060bec1740613540 (diff)
downloadydb-cdae02d225fb5b3afbb28990e79a7ac6c9125327.tar.gz
Restoring authorship annotation for <f0b0s@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/crcutil/crc32c_sse4.h')
-rw-r--r--contrib/libs/crcutil/crc32c_sse4.h504
1 files changed, 252 insertions, 252 deletions
diff --git a/contrib/libs/crcutil/crc32c_sse4.h b/contrib/libs/crcutil/crc32c_sse4.h
index 24aa815699..ac3d8425b8 100644
--- a/contrib/libs/crcutil/crc32c_sse4.h
+++ b/contrib/libs/crcutil/crc32c_sse4.h
@@ -1,252 +1,252 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements CRC32C using Intel's SSE4 crc32 instruction.
-// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
-// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
-
-#ifndef CRCUTIL_CRC32C_SSE4_H_
-#define CRCUTIL_CRC32C_SSE4_H_
-
-#include "gf_util.h" // base types, gf_util class, etc.
-#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
-
-#if HAVE_I386 || HAVE_AMD64
-
-#if CRCUTIL_USE_MM_CRC32
-
-#if HAVE_I386
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
-#else
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
-#endif // HAVE_I386
-
-#define CRC_UPDATE_BYTE(crc, value) \
- (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
-
-#else
-
-#include "generic_crc.h"
-
-#define CRC_UPDATE_WORD(crc, value) do { \
- size_t buf = (value); \
- CRC_WORD(this, crc, buf); \
-} while (0)
-#define CRC_UPDATE_BYTE(crc, value) do { \
- CRC_BYTE(this, crc, (value)); \
-} while (0)
-
-#endif // CRCUTIL_USE_MM_CRC32
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Since the same pieces should be parameterized in many different places
-// and we do not want to introduce a mistake which is rather hard to find,
-// use a macro to enumerate all block sizes.
-//
-// Block sizes and number of stripes were tuned for best performance.
-//
-// All constants should be literal constants (too lazy to fix the macro).
-//
-// The use of different "macro_first", "macro", and "macro_last"
-// allows generation of different code for smallest, in between,
-// and largest block sizes.
-//
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_smallest(512, 3); \
- macro(1024, 3); \
- macro(4096, 3); \
- macro_largest(32768, 3)
-
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_largest(32768, 3); \
- macro(4096, 3); \
- macro(1024, 3); \
- macro_smallest(512, 3)
-
-// Enumerates all block sizes.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
-
-#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
- (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
-
-#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
- (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
-
-#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- mul_table_##block_size##_##num_blocks##_
-
-class RollingCrc32cSSE4;
-
-class Crc32cSSE4 {
- public:
- // Exports Crc, TableEntry, and Word (needed by RollingCrc).
- typedef size_t Crc;
- typedef Crc Word;
- typedef Crc TableEntry;
-
- Crc32cSSE4() {}
-
- // Initializes the tables given generating polynomial of degree (degree).
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- explicit Crc32cSSE4(bool canonical) {
- Init(canonical);
- }
- void Init(bool canonical);
-
- // Initializes the tables given generating polynomial of degree.
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- // Provided for compatibility with GenericCrc.
- Crc32cSSE4(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- Init(generating_polynomial, degree, canonical);
- }
- void Init(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- if (generating_polynomial == FixedGeneratingPolynomial() &&
- degree == FixedDegree()) {
- Init(canonical);
- }
- }
-
- // Returns fixed generating polymonial the class implements.
- static Crc FixedGeneratingPolynomial() {
- return 0x82f63b78;
- }
-
- // Returns degree of fixed generating polymonial the class implements.
- static Crc FixedDegree() {
- return 32;
- }
-
- // Returns base class.
- const GfUtil<Crc> &Base() const { return base_; }
-
- // Computes CRC32.
- size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
- return Crc32c(data, bytes, crc);
- }
-
- // Returns true iff crc32 instruction is available.
- static bool IsSSE42Available();
-
- protected:
- // Actual implementation.
- size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
-
- enum {
- kTableEntryBits = 8,
- kTableEntries = 1 << kTableEntryBits,
- kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
- kNumTablesHalfLo = kNumTables / 2,
- kNumTablesHalfHi = (kNumTables + 1) / 2,
-
- kUnrolledLoopCount = 8,
- kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
- };
-
- // May be set to size_t or uint32, whichever is faster.
- typedef uint32 Entry;
-
-#define DECLARE_MUL_TABLE(block_size, num_stripes) \
- Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTables][kTableEntries]
-
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
-
-#undef DECLARE_MUL_TABLE
-
- GfUtil<Crc> base_;
-
-#if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
- friend class RollingCrc32cSSE4;
-#endif // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-class RollingCrc32cSSE4 {
- public:
- typedef Crc32cSSE4::Crc Crc;
- typedef Crc32cSSE4::TableEntry TableEntry;
- typedef Crc32cSSE4::Word Word;
-
- RollingCrc32cSSE4() {}
-
- // Initializes internal data structures.
- // Retains reference to "crc" instance -- it is used by Start().
- RollingCrc32cSSE4(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- Init(crc, roll_window_bytes, start_value);
- }
- void Init(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value);
-
- // Computes crc of "roll_window_bytes" using
- // "start_value" of "crc" (see Init()).
- Crc Start(const void *data) const {
- return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
- }
-
- // Computes CRC of "roll_window_bytes" starting in next position.
- Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
- Crc crc = old_crc;
- CRC_UPDATE_BYTE(crc, byte_in);
- crc ^= out_[byte_out];
- return crc;
- }
-
- // Returns start value.
- Crc StartValue() const { return start_value_; }
-
- // Returns length of roll window.
- size_t WindowBytes() const { return roll_window_bytes_; }
-
- protected:
- typedef Crc Entry;
- Entry out_[256];
-
- // Used only by Start().
- Crc start_value_;
- const Crc32cSSE4 *crc_;
- size_t roll_window_bytes_;
-
-#if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
-#endif // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // HAVE_I386 || HAVE_AMD64
-
-#endif // CRCUTIL_CRC32C_SSE4_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements CRC32C using Intel's SSE4 crc32 instruction.
+// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
+// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
+
+#ifndef CRCUTIL_CRC32C_SSE4_H_
+#define CRCUTIL_CRC32C_SSE4_H_
+
+#include "gf_util.h" // base types, gf_util class, etc.
+#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
+
+#if HAVE_I386 || HAVE_AMD64
+
+#if CRCUTIL_USE_MM_CRC32
+
+#if HAVE_I386
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
+#else
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
+#endif // HAVE_I386
+
+#define CRC_UPDATE_BYTE(crc, value) \
+ (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
+
+#else
+
+#include "generic_crc.h"
+
+#define CRC_UPDATE_WORD(crc, value) do { \
+ size_t buf = (value); \
+ CRC_WORD(this, crc, buf); \
+} while (0)
+#define CRC_UPDATE_BYTE(crc, value) do { \
+ CRC_BYTE(this, crc, (value)); \
+} while (0)
+
+#endif // CRCUTIL_USE_MM_CRC32
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// Since the same pieces should be parameterized in many different places
+// and we do not want to introduce a mistake which is rather hard to find,
+// use a macro to enumerate all block sizes.
+//
+// Block sizes and number of stripes were tuned for best performance.
+//
+// All constants should be literal constants (too lazy to fix the macro).
+//
+// The use of different "macro_first", "macro", and "macro_last"
+// allows generation of different code for smallest, in between,
+// and largest block sizes.
+//
+// This macro shall be kept in sync with
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
+// Failure to do so will cause compile-time error.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
+ macro_smallest, macro, macro_largest) \
+ macro_smallest(512, 3); \
+ macro(1024, 3); \
+ macro(4096, 3); \
+ macro_largest(32768, 3)
+
+// This macro shall be kept in sync with
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
+// Failure to do so will cause compile-time error.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
+ macro_smallest, macro, macro_largest) \
+ macro_largest(32768, 3); \
+ macro(4096, 3); \
+ macro(1024, 3); \
+ macro_smallest(512, 3)
+
+// Enumerates all block sizes.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
+
+#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
+ (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
+
+#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
+ (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
+
+#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ mul_table_##block_size##_##num_blocks##_
+
+class RollingCrc32cSSE4;
+
+class Crc32cSSE4 {
+ public:
+ // Exports Crc, TableEntry, and Word (needed by RollingCrc).
+ typedef size_t Crc;
+ typedef Crc Word;
+ typedef Crc TableEntry;
+
+ Crc32cSSE4() {}
+
+ // Initializes the tables given generating polynomial of degree (degree).
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
+ // after actual CRC computation.
+ explicit Crc32cSSE4(bool canonical) {
+ Init(canonical);
+ }
+ void Init(bool canonical);
+
+ // Initializes the tables given generating polynomial of degree.
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
+ // after actual CRC computation.
+ // Provided for compatibility with GenericCrc.
+ Crc32cSSE4(const Crc &generating_polynomial,
+ size_t degree,
+ bool canonical) {
+ Init(generating_polynomial, degree, canonical);
+ }
+ void Init(const Crc &generating_polynomial,
+ size_t degree,
+ bool canonical) {
+ if (generating_polynomial == FixedGeneratingPolynomial() &&
+ degree == FixedDegree()) {
+ Init(canonical);
+ }
+ }
+
+ // Returns fixed generating polymonial the class implements.
+ static Crc FixedGeneratingPolynomial() {
+ return 0x82f63b78;
+ }
+
+ // Returns degree of fixed generating polymonial the class implements.
+ static Crc FixedDegree() {
+ return 32;
+ }
+
+ // Returns base class.
+ const GfUtil<Crc> &Base() const { return base_; }
+
+ // Computes CRC32.
+ size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
+ return Crc32c(data, bytes, crc);
+ }
+
+ // Returns true iff crc32 instruction is available.
+ static bool IsSSE42Available();
+
+ protected:
+ // Actual implementation.
+ size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
+
+ enum {
+ kTableEntryBits = 8,
+ kTableEntries = 1 << kTableEntryBits,
+ kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
+ kNumTablesHalfLo = kNumTables / 2,
+ kNumTablesHalfHi = (kNumTables + 1) / 2,
+
+ kUnrolledLoopCount = 8,
+ kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
+ };
+
+ // May be set to size_t or uint32, whichever is faster.
+ typedef uint32 Entry;
+
+#define DECLARE_MUL_TABLE(block_size, num_stripes) \
+ Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTables][kTableEntries]
+
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
+
+#undef DECLARE_MUL_TABLE
+
+ GfUtil<Crc> base_;
+
+#if !CRCUTIL_USE_MM_CRC32
+ TableEntry crc_word_[sizeof(Word)][256];
+ friend class RollingCrc32cSSE4;
+#endif // !CRCUTIL_USE_MM_CRC32
+} GCC_ALIGN_ATTRIBUTE(16);
+
+class RollingCrc32cSSE4 {
+ public:
+ typedef Crc32cSSE4::Crc Crc;
+ typedef Crc32cSSE4::TableEntry TableEntry;
+ typedef Crc32cSSE4::Word Word;
+
+ RollingCrc32cSSE4() {}
+
+ // Initializes internal data structures.
+ // Retains reference to "crc" instance -- it is used by Start().
+ RollingCrc32cSSE4(const Crc32cSSE4 &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value) {
+ Init(crc, roll_window_bytes, start_value);
+ }
+ void Init(const Crc32cSSE4 &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value);
+
+ // Computes crc of "roll_window_bytes" using
+ // "start_value" of "crc" (see Init()).
+ Crc Start(const void *data) const {
+ return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
+ }
+
+ // Computes CRC of "roll_window_bytes" starting in next position.
+ Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
+ Crc crc = old_crc;
+ CRC_UPDATE_BYTE(crc, byte_in);
+ crc ^= out_[byte_out];
+ return crc;
+ }
+
+ // Returns start value.
+ Crc StartValue() const { return start_value_; }
+
+ // Returns length of roll window.
+ size_t WindowBytes() const { return roll_window_bytes_; }
+
+ protected:
+ typedef Crc Entry;
+ Entry out_[256];
+
+ // Used only by Start().
+ Crc start_value_;
+ const Crc32cSSE4 *crc_;
+ size_t roll_window_bytes_;
+
+#if !CRCUTIL_USE_MM_CRC32
+ TableEntry crc_word_[sizeof(Word)][256];
+#endif // !CRCUTIL_USE_MM_CRC32
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // HAVE_I386 || HAVE_AMD64
+
+#endif // CRCUTIL_CRC32C_SSE4_H_