diff options
author | f0b0s <f0b0s@yandex-team.ru> | 2022-02-10 16:46:51 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:51 +0300 |
commit | cdae02d225fb5b3afbb28990e79a7ac6c9125327 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/crcutil/crc32c_sse4.h | |
parent | deabc5260ac2e17b8f5152ee060bec1740613540 (diff) | |
download | ydb-cdae02d225fb5b3afbb28990e79a7ac6c9125327.tar.gz |
Restoring authorship annotation for <f0b0s@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/crcutil/crc32c_sse4.h')
-rw-r--r-- | contrib/libs/crcutil/crc32c_sse4.h | 504 |
1 files changed, 252 insertions, 252 deletions
diff --git a/contrib/libs/crcutil/crc32c_sse4.h b/contrib/libs/crcutil/crc32c_sse4.h index 24aa815699..ac3d8425b8 100644 --- a/contrib/libs/crcutil/crc32c_sse4.h +++ b/contrib/libs/crcutil/crc32c_sse4.h @@ -1,252 +1,252 @@ -// Copyright 2010 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Implements CRC32C using Intel's SSE4 crc32 instruction. -// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, -// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. - -#ifndef CRCUTIL_CRC32C_SSE4_H_ -#define CRCUTIL_CRC32C_SSE4_H_ - -#include "gf_util.h" // base types, gf_util class, etc. -#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics - -#if HAVE_I386 || HAVE_AMD64 - -#if CRCUTIL_USE_MM_CRC32 - -#if HAVE_I386 -#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) -#else -#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) -#endif // HAVE_I386 - -#define CRC_UPDATE_BYTE(crc, value) \ - (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value))) - -#else - -#include "generic_crc.h" - -#define CRC_UPDATE_WORD(crc, value) do { \ - size_t buf = (value); \ - CRC_WORD(this, crc, buf); \ -} while (0) -#define CRC_UPDATE_BYTE(crc, value) do { \ - CRC_BYTE(this, crc, (value)); \ -} while (0) - -#endif // CRCUTIL_USE_MM_CRC32 - -namespace crcutil { - -#pragma pack(push, 16) - -// Since the same pieces should be parameterized in many different places -// and we do not want to introduce a mistake which is rather hard to find, -// use a macro to enumerate all block sizes. -// -// Block sizes and number of stripes were tuned for best performance. -// -// All constants should be literal constants (too lazy to fix the macro). -// -// The use of different "macro_first", "macro", and "macro_last" -// allows generation of different code for smallest, in between, -// and largest block sizes. -// -// This macro shall be kept in sync with -// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. -// Failure to do so will cause compile-time error. -#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ - macro_smallest, macro, macro_largest) \ - macro_smallest(512, 3); \ - macro(1024, 3); \ - macro(4096, 3); \ - macro_largest(32768, 3) - -// This macro shall be kept in sync with -// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. -// Failure to do so will cause compile-time error. -#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ - macro_smallest, macro, macro_largest) \ - macro_largest(32768, 3); \ - macro(4096, 3); \ - macro(1024, 3); \ - macro_smallest(512, 3) - -// Enumerates all block sizes. -#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ - CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) - -#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ - (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) - -#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ - (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) - -#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ - mul_table_##block_size##_##num_blocks##_ - -class RollingCrc32cSSE4; - -class Crc32cSSE4 { - public: - // Exports Crc, TableEntry, and Word (needed by RollingCrc). - typedef size_t Crc; - typedef Crc Word; - typedef Crc TableEntry; - - Crc32cSSE4() {} - - // Initializes the tables given generating polynomial of degree (degree). - // If "canonical" is true, crc value will be XOR'ed with (-1) before and - // after actual CRC computation. - explicit Crc32cSSE4(bool canonical) { - Init(canonical); - } - void Init(bool canonical); - - // Initializes the tables given generating polynomial of degree. - // If "canonical" is true, crc value will be XOR'ed with (-1) before and - // after actual CRC computation. - // Provided for compatibility with GenericCrc. - Crc32cSSE4(const Crc &generating_polynomial, - size_t degree, - bool canonical) { - Init(generating_polynomial, degree, canonical); - } - void Init(const Crc &generating_polynomial, - size_t degree, - bool canonical) { - if (generating_polynomial == FixedGeneratingPolynomial() && - degree == FixedDegree()) { - Init(canonical); - } - } - - // Returns fixed generating polymonial the class implements. - static Crc FixedGeneratingPolynomial() { - return 0x82f63b78; - } - - // Returns degree of fixed generating polymonial the class implements. - static Crc FixedDegree() { - return 32; - } - - // Returns base class. - const GfUtil<Crc> &Base() const { return base_; } - - // Computes CRC32. - size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { - return Crc32c(data, bytes, crc); - } - - // Returns true iff crc32 instruction is available. - static bool IsSSE42Available(); - - protected: - // Actual implementation. - size_t Crc32c(const void *data, size_t bytes, Crc crc) const; - - enum { - kTableEntryBits = 8, - kTableEntries = 1 << kTableEntryBits, - kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, - kNumTablesHalfLo = kNumTables / 2, - kNumTablesHalfHi = (kNumTables + 1) / 2, - - kUnrolledLoopCount = 8, - kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), - }; - - // May be set to size_t or uint32, whichever is faster. - typedef uint32 Entry; - -#define DECLARE_MUL_TABLE(block_size, num_stripes) \ - Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ - [kNumTables][kTableEntries] - - CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); - -#undef DECLARE_MUL_TABLE - - GfUtil<Crc> base_; - -#if !CRCUTIL_USE_MM_CRC32 - TableEntry crc_word_[sizeof(Word)][256]; - friend class RollingCrc32cSSE4; -#endif // !CRCUTIL_USE_MM_CRC32 -} GCC_ALIGN_ATTRIBUTE(16); - -class RollingCrc32cSSE4 { - public: - typedef Crc32cSSE4::Crc Crc; - typedef Crc32cSSE4::TableEntry TableEntry; - typedef Crc32cSSE4::Word Word; - - RollingCrc32cSSE4() {} - - // Initializes internal data structures. - // Retains reference to "crc" instance -- it is used by Start(). - RollingCrc32cSSE4(const Crc32cSSE4 &crc, - size_t roll_window_bytes, - const Crc &start_value) { - Init(crc, roll_window_bytes, start_value); - } - void Init(const Crc32cSSE4 &crc, - size_t roll_window_bytes, - const Crc &start_value); - - // Computes crc of "roll_window_bytes" using - // "start_value" of "crc" (see Init()). - Crc Start(const void *data) const { - return crc_->CrcDefault(data, roll_window_bytes_, start_value_); - } - - // Computes CRC of "roll_window_bytes" starting in next position. - Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { - Crc crc = old_crc; - CRC_UPDATE_BYTE(crc, byte_in); - crc ^= out_[byte_out]; - return crc; - } - - // Returns start value. - Crc StartValue() const { return start_value_; } - - // Returns length of roll window. - size_t WindowBytes() const { return roll_window_bytes_; } - - protected: - typedef Crc Entry; - Entry out_[256]; - - // Used only by Start(). - Crc start_value_; - const Crc32cSSE4 *crc_; - size_t roll_window_bytes_; - -#if !CRCUTIL_USE_MM_CRC32 - TableEntry crc_word_[sizeof(Word)][256]; -#endif // !CRCUTIL_USE_MM_CRC32 -} GCC_ALIGN_ATTRIBUTE(16); - -#pragma pack(pop) - -} // namespace crcutil - -#endif // HAVE_I386 || HAVE_AMD64 - -#endif // CRCUTIL_CRC32C_SSE4_H_ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements CRC32C using Intel's SSE4 crc32 instruction. +// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, +// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. + +#ifndef CRCUTIL_CRC32C_SSE4_H_ +#define CRCUTIL_CRC32C_SSE4_H_ + +#include "gf_util.h" // base types, gf_util class, etc. +#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics + +#if HAVE_I386 || HAVE_AMD64 + +#if CRCUTIL_USE_MM_CRC32 + +#if HAVE_I386 +#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) +#else +#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) +#endif // HAVE_I386 + +#define CRC_UPDATE_BYTE(crc, value) \ + (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value))) + +#else + +#include "generic_crc.h" + +#define CRC_UPDATE_WORD(crc, value) do { \ + size_t buf = (value); \ + CRC_WORD(this, crc, buf); \ +} while (0) +#define CRC_UPDATE_BYTE(crc, value) do { \ + CRC_BYTE(this, crc, (value)); \ +} while (0) + +#endif // CRCUTIL_USE_MM_CRC32 + +namespace crcutil { + +#pragma pack(push, 16) + +// Since the same pieces should be parameterized in many different places +// and we do not want to introduce a mistake which is rather hard to find, +// use a macro to enumerate all block sizes. +// +// Block sizes and number of stripes were tuned for best performance. +// +// All constants should be literal constants (too lazy to fix the macro). +// +// The use of different "macro_first", "macro", and "macro_last" +// allows generation of different code for smallest, in between, +// and largest block sizes. +// +// This macro shall be kept in sync with +// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. +// Failure to do so will cause compile-time error. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ + macro_smallest, macro, macro_largest) \ + macro_smallest(512, 3); \ + macro(1024, 3); \ + macro(4096, 3); \ + macro_largest(32768, 3) + +// This macro shall be kept in sync with +// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. +// Failure to do so will cause compile-time error. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ + macro_smallest, macro, macro_largest) \ + macro_largest(32768, 3); \ + macro(4096, 3); \ + macro(1024, 3); \ + macro_smallest(512, 3) + +// Enumerates all block sizes. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) + +#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ + (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) + +#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ + (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) + +#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + mul_table_##block_size##_##num_blocks##_ + +class RollingCrc32cSSE4; + +class Crc32cSSE4 { + public: + // Exports Crc, TableEntry, and Word (needed by RollingCrc). + typedef size_t Crc; + typedef Crc Word; + typedef Crc TableEntry; + + Crc32cSSE4() {} + + // Initializes the tables given generating polynomial of degree (degree). + // If "canonical" is true, crc value will be XOR'ed with (-1) before and + // after actual CRC computation. + explicit Crc32cSSE4(bool canonical) { + Init(canonical); + } + void Init(bool canonical); + + // Initializes the tables given generating polynomial of degree. + // If "canonical" is true, crc value will be XOR'ed with (-1) before and + // after actual CRC computation. + // Provided for compatibility with GenericCrc. + Crc32cSSE4(const Crc &generating_polynomial, + size_t degree, + bool canonical) { + Init(generating_polynomial, degree, canonical); + } + void Init(const Crc &generating_polynomial, + size_t degree, + bool canonical) { + if (generating_polynomial == FixedGeneratingPolynomial() && + degree == FixedDegree()) { + Init(canonical); + } + } + + // Returns fixed generating polymonial the class implements. + static Crc FixedGeneratingPolynomial() { + return 0x82f63b78; + } + + // Returns degree of fixed generating polymonial the class implements. + static Crc FixedDegree() { + return 32; + } + + // Returns base class. + const GfUtil<Crc> &Base() const { return base_; } + + // Computes CRC32. + size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { + return Crc32c(data, bytes, crc); + } + + // Returns true iff crc32 instruction is available. + static bool IsSSE42Available(); + + protected: + // Actual implementation. + size_t Crc32c(const void *data, size_t bytes, Crc crc) const; + + enum { + kTableEntryBits = 8, + kTableEntries = 1 << kTableEntryBits, + kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, + kNumTablesHalfLo = kNumTables / 2, + kNumTablesHalfHi = (kNumTables + 1) / 2, + + kUnrolledLoopCount = 8, + kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), + }; + + // May be set to size_t or uint32, whichever is faster. + typedef uint32 Entry; + +#define DECLARE_MUL_TABLE(block_size, num_stripes) \ + Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTables][kTableEntries] + + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); + +#undef DECLARE_MUL_TABLE + + GfUtil<Crc> base_; + +#if !CRCUTIL_USE_MM_CRC32 + TableEntry crc_word_[sizeof(Word)][256]; + friend class RollingCrc32cSSE4; +#endif // !CRCUTIL_USE_MM_CRC32 +} GCC_ALIGN_ATTRIBUTE(16); + +class RollingCrc32cSSE4 { + public: + typedef Crc32cSSE4::Crc Crc; + typedef Crc32cSSE4::TableEntry TableEntry; + typedef Crc32cSSE4::Word Word; + + RollingCrc32cSSE4() {} + + // Initializes internal data structures. + // Retains reference to "crc" instance -- it is used by Start(). + RollingCrc32cSSE4(const Crc32cSSE4 &crc, + size_t roll_window_bytes, + const Crc &start_value) { + Init(crc, roll_window_bytes, start_value); + } + void Init(const Crc32cSSE4 &crc, + size_t roll_window_bytes, + const Crc &start_value); + + // Computes crc of "roll_window_bytes" using + // "start_value" of "crc" (see Init()). + Crc Start(const void *data) const { + return crc_->CrcDefault(data, roll_window_bytes_, start_value_); + } + + // Computes CRC of "roll_window_bytes" starting in next position. + Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { + Crc crc = old_crc; + CRC_UPDATE_BYTE(crc, byte_in); + crc ^= out_[byte_out]; + return crc; + } + + // Returns start value. + Crc StartValue() const { return start_value_; } + + // Returns length of roll window. + size_t WindowBytes() const { return roll_window_bytes_; } + + protected: + typedef Crc Entry; + Entry out_[256]; + + // Used only by Start(). + Crc start_value_; + const Crc32cSSE4 *crc_; + size_t roll_window_bytes_; + +#if !CRCUTIL_USE_MM_CRC32 + TableEntry crc_word_[sizeof(Word)][256]; +#endif // !CRCUTIL_USE_MM_CRC32 +} GCC_ALIGN_ATTRIBUTE(16); + +#pragma pack(pop) + +} // namespace crcutil + +#endif // HAVE_I386 || HAVE_AMD64 + +#endif // CRCUTIL_CRC32C_SSE4_H_ |