aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/crcutil
diff options
context:
space:
mode:
authorf0b0s <f0b0s@yandex-team.ru>2022-02-10 16:46:51 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:51 +0300
commitdeabc5260ac2e17b8f5152ee060bec1740613540 (patch)
treebc498b2fe3c447d13c2abea85b429fee8dd485ef /contrib/libs/crcutil
parent2e6009493e74f88988b81f219b301f450331648d (diff)
downloadydb-deabc5260ac2e17b8f5152ee060bec1740613540.tar.gz
Restoring authorship annotation for <f0b0s@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/crcutil')
-rw-r--r--contrib/libs/crcutil/aligned_alloc.h132
-rw-r--r--contrib/libs/crcutil/base_types.h146
-rw-r--r--contrib/libs/crcutil/bob_jenkins_rng.h252
-rw-r--r--contrib/libs/crcutil/crc32c_sse4.cc726
-rw-r--r--contrib/libs/crcutil/crc32c_sse4.h504
-rw-r--r--contrib/libs/crcutil/crc32c_sse4_intrin.h198
-rw-r--r--contrib/libs/crcutil/crc_casts.h136
-rw-r--r--contrib/libs/crcutil/generic_crc.h1374
-rw-r--r--contrib/libs/crcutil/gf_util.h608
-rw-r--r--contrib/libs/crcutil/interface.cc612
-rw-r--r--contrib/libs/crcutil/interface.h408
-rw-r--r--contrib/libs/crcutil/multiword_128_64_gcc_amd64_sse2.cc582
-rw-r--r--contrib/libs/crcutil/multiword_64_64_cl_i386_mmx.cc608
-rw-r--r--contrib/libs/crcutil/multiword_64_64_gcc_amd64_asm.cc596
-rw-r--r--contrib/libs/crcutil/multiword_64_64_gcc_i386_mmx.cc516
-rw-r--r--contrib/libs/crcutil/multiword_64_64_intrinsic_i386_mmx.cc486
-rw-r--r--contrib/libs/crcutil/platform.h490
-rw-r--r--contrib/libs/crcutil/protected_crc.h122
-rw-r--r--contrib/libs/crcutil/rdtsc.h118
-rw-r--r--contrib/libs/crcutil/rolling_crc.h212
-rw-r--r--contrib/libs/crcutil/std_headers.h102
-rw-r--r--contrib/libs/crcutil/uint128_sse2.h620
-rw-r--r--contrib/libs/crcutil/ya.make22
23 files changed, 4785 insertions, 4785 deletions
diff --git a/contrib/libs/crcutil/aligned_alloc.h b/contrib/libs/crcutil/aligned_alloc.h
index 37cefbacd9..57bdc91d8a 100644
--- a/contrib/libs/crcutil/aligned_alloc.h
+++ b/contrib/libs/crcutil/aligned_alloc.h
@@ -1,66 +1,66 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Poor man's platform-independent implementation of aligned memory allocator.
-
-#ifndef CRCUTIL_ALIGNED_ALLOC_H_
-#define CRCUTIL_ALIGNED_ALLOC_H_
-
-#include "std_headers.h" // size_t, ptrdiff_t
-
-namespace crcutil {
-
-// Allocates a block of memory of "size" bytes so that a field
-// at "field_offset" is aligned on "align" boundary.
-//
-// NB #1: "align" shall be exact power of two.
-//
-// NB #2: memory allocated by AlignedAlloc should be release by AlignedFree().
-//
-inline void *AlignedAlloc(size_t size,
- size_t field_offset,
- size_t align,
- const void **allocated_mem) {
- if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) {
- align = sizeof(*allocated_mem);
- }
- size += align - 1 + sizeof(*allocated_mem);
- char *allocated_memory = new char[size];
- char *aligned_memory = allocated_memory + sizeof(*allocated_mem);
- field_offset &= align - 1;
- size_t actual_alignment =
- reinterpret_cast<size_t>(aligned_memory + field_offset) & (align - 1);
- if (actual_alignment != 0) {
- aligned_memory += align - actual_alignment;
- }
- reinterpret_cast<char **>(aligned_memory)[-1] = allocated_memory;
-
- if (allocated_mem != NULL) {
- *allocated_mem = allocated_memory;
- }
-
- return aligned_memory;
-}
-
-// Frees memory allocated by AlignedAlloc().
-inline void AlignedFree(void *aligned_memory) {
- if (aligned_memory != NULL) {
- char *allocated_memory = reinterpret_cast<char **>(aligned_memory)[-1];
- delete[] allocated_memory;
- }
-}
-
-} // namespace crcutil
-
-#endif // CRCUTIL_ALIGNED_ALLOC_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Poor man's platform-independent implementation of aligned memory allocator.
+
+#ifndef CRCUTIL_ALIGNED_ALLOC_H_
+#define CRCUTIL_ALIGNED_ALLOC_H_
+
+#include "std_headers.h" // size_t, ptrdiff_t
+
+namespace crcutil {
+
+// Allocates a block of memory of "size" bytes so that a field
+// at "field_offset" is aligned on "align" boundary.
+//
+// NB #1: "align" shall be exact power of two.
+//
+// NB #2: memory allocated by AlignedAlloc should be release by AlignedFree().
+//
+inline void *AlignedAlloc(size_t size,
+ size_t field_offset,
+ size_t align,
+ const void **allocated_mem) {
+ if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) {
+ align = sizeof(*allocated_mem);
+ }
+ size += align - 1 + sizeof(*allocated_mem);
+ char *allocated_memory = new char[size];
+ char *aligned_memory = allocated_memory + sizeof(*allocated_mem);
+ field_offset &= align - 1;
+ size_t actual_alignment =
+ reinterpret_cast<size_t>(aligned_memory + field_offset) & (align - 1);
+ if (actual_alignment != 0) {
+ aligned_memory += align - actual_alignment;
+ }
+ reinterpret_cast<char **>(aligned_memory)[-1] = allocated_memory;
+
+ if (allocated_mem != NULL) {
+ *allocated_mem = allocated_memory;
+ }
+
+ return aligned_memory;
+}
+
+// Frees memory allocated by AlignedAlloc().
+inline void AlignedFree(void *aligned_memory) {
+ if (aligned_memory != NULL) {
+ char *allocated_memory = reinterpret_cast<char **>(aligned_memory)[-1];
+ delete[] allocated_memory;
+ }
+}
+
+} // namespace crcutil
+
+#endif // CRCUTIL_ALIGNED_ALLOC_H_
diff --git a/contrib/libs/crcutil/base_types.h b/contrib/libs/crcutil/base_types.h
index 5b743640a4..a0d9d9a093 100644
--- a/contrib/libs/crcutil/base_types.h
+++ b/contrib/libs/crcutil/base_types.h
@@ -1,73 +1,73 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines 8/16/32/64-bit integer types.
-//
-// Either uint64 or uint32 will map to size_t.
-// This way, specialized variants of CRC implementation
-// parameterized by "size_t" will be reused when
-// parameterized by "uint64" or "uint32".
-// In their turn, specialized verisons are parameterized
-// by "size_t" so that one version of the code is optimal
-// both on 32-bit and 64-bit platforms.
-
-#ifndef CRCUTIL_BASE_TYPES_H_
-#define CRCUTIL_BASE_TYPES_H_
-
-#include "std_headers.h" // size_t, ptrdiff_t
-
-namespace crcutil {
-
-template<typename A, typename B> class ChooseFirstIfSame {
- public:
- template<bool same_size, typename AA, typename BB> class ChooseFirstIfTrue {
- public:
- typedef AA Type;
- };
- template<typename AA, typename BB> class ChooseFirstIfTrue<false, AA, BB> {
- public:
- typedef BB Type;
- };
-
- typedef typename ChooseFirstIfTrue<sizeof(A) == sizeof(B), A, B>::Type Type;
-};
-
-typedef unsigned char uint8;
-typedef signed char int8;
-
-typedef unsigned short uint16;
-typedef short int16;
-
-typedef ChooseFirstIfSame<size_t, unsigned int>::Type uint32;
-typedef ChooseFirstIfSame<ptrdiff_t, int>::Type int32;
-
-#if defined(_MSC_VER)
-typedef ChooseFirstIfSame<size_t, unsigned __int64>::Type uint64;
-typedef ChooseFirstIfSame<ptrdiff_t, __int64>::Type int64;
-#define HAVE_UINT64 1
-#elif defined(__GNUC__)
-typedef ChooseFirstIfSame<size_t, unsigned long long>::Type uint64;
-typedef ChooseFirstIfSame<ptrdiff_t, long long>::Type int64;
-#define HAVE_UINT64 1
-#else
-// TODO: ensure that everything compiles and works when HAVE_UINT64 is false.
-// TODO: remove HAVE_UINT64 and use sizeof(uint64) instead?
-#define HAVE_UINT64 0
-typedef uint32 uint64;
-typedef int32 int64;
-#endif
-
-} // namespace crcutil
-
-#endif // CRCUTIL_BASE_TYPES_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Defines 8/16/32/64-bit integer types.
+//
+// Either uint64 or uint32 will map to size_t.
+// This way, specialized variants of CRC implementation
+// parameterized by "size_t" will be reused when
+// parameterized by "uint64" or "uint32".
+// In their turn, specialized verisons are parameterized
+// by "size_t" so that one version of the code is optimal
+// both on 32-bit and 64-bit platforms.
+
+#ifndef CRCUTIL_BASE_TYPES_H_
+#define CRCUTIL_BASE_TYPES_H_
+
+#include "std_headers.h" // size_t, ptrdiff_t
+
+namespace crcutil {
+
+template<typename A, typename B> class ChooseFirstIfSame {
+ public:
+ template<bool same_size, typename AA, typename BB> class ChooseFirstIfTrue {
+ public:
+ typedef AA Type;
+ };
+ template<typename AA, typename BB> class ChooseFirstIfTrue<false, AA, BB> {
+ public:
+ typedef BB Type;
+ };
+
+ typedef typename ChooseFirstIfTrue<sizeof(A) == sizeof(B), A, B>::Type Type;
+};
+
+typedef unsigned char uint8;
+typedef signed char int8;
+
+typedef unsigned short uint16;
+typedef short int16;
+
+typedef ChooseFirstIfSame<size_t, unsigned int>::Type uint32;
+typedef ChooseFirstIfSame<ptrdiff_t, int>::Type int32;
+
+#if defined(_MSC_VER)
+typedef ChooseFirstIfSame<size_t, unsigned __int64>::Type uint64;
+typedef ChooseFirstIfSame<ptrdiff_t, __int64>::Type int64;
+#define HAVE_UINT64 1
+#elif defined(__GNUC__)
+typedef ChooseFirstIfSame<size_t, unsigned long long>::Type uint64;
+typedef ChooseFirstIfSame<ptrdiff_t, long long>::Type int64;
+#define HAVE_UINT64 1
+#else
+// TODO: ensure that everything compiles and works when HAVE_UINT64 is false.
+// TODO: remove HAVE_UINT64 and use sizeof(uint64) instead?
+#define HAVE_UINT64 0
+typedef uint32 uint64;
+typedef int32 int64;
+#endif
+
+} // namespace crcutil
+
+#endif // CRCUTIL_BASE_TYPES_H_
diff --git a/contrib/libs/crcutil/bob_jenkins_rng.h b/contrib/libs/crcutil/bob_jenkins_rng.h
index aed178c2a7..0925af960d 100644
--- a/contrib/libs/crcutil/bob_jenkins_rng.h
+++ b/contrib/libs/crcutil/bob_jenkins_rng.h
@@ -1,126 +1,126 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Glorified C++ version of Bob Jenkins' random number generator.
-// See http://burtleburtle.net/bob/rand/smallprng.html for more details.
-
-#ifndef CRCUTIL_BOB_JENKINS_RNG_H_
-#define CRCUTIL_BOB_JENKINS_RNG_H_
-
-#include "base_types.h"
-
-#if !defined(_MSC_VER)
-#define _rotl(value, bits) \
- static_cast<uint32>(((value) << (bits)) + ((value) >> (32 - (bits))))
-#define _rotl64(value, bits) \
- static_cast<uint64>(((value) << (bits)) + ((value) >> (64 - (bits))))
-#endif // !defined(_MSC_VER)
-
-namespace crcutil {
-
-#pragma pack(push, 8)
-
-template<typename T> class BobJenkinsRng;
-
-template<> class BobJenkinsRng<uint32> {
- public:
- typedef uint32 value;
-
- value Get() {
- value e = a_ - _rotl(b_, 23);
- a_ = b_ ^ _rotl(c_, 16);
- b_ = c_ + _rotl(d_, 11);
- c_ = d_ + e;
- d_ = e + a_;
- return (d_);
- }
-
- void Init(value seed) {
- a_ = 0xf1ea5eed;
- b_ = seed;
- c_ = seed;
- d_ = seed;
- for (size_t i = 0; i < 20; ++i) {
- (void) Get();
- }
- }
-
- explicit BobJenkinsRng(value seed) {
- Init(seed);
- }
-
- BobJenkinsRng() {
- Init(0x1234567);
- }
-
- private:
- value a_;
- value b_;
- value c_;
- value d_;
-};
-
-
-#if HAVE_UINT64
-
-template<> class BobJenkinsRng<uint64> {
- public:
- typedef uint64 value;
-
- value Get() {
- value e = a_ - _rotl64(b_, 7);
- a_ = b_ ^ _rotl64(c_, 13);
- b_ = c_ + _rotl64(d_, 37);
- c_ = d_ + e;
- d_ = e + a_;
- return d_;
- }
-
- void Init(value seed) {
- a_ = 0xf1ea5eed;
- b_ = seed;
- c_ = seed;
- d_ = seed;
- for (size_t i = 0; i < 20; ++i) {
- (void) Get();
- }
- }
-
- explicit BobJenkinsRng(value seed) {
- Init(seed);
- }
-
- BobJenkinsRng() {
- Init(0x1234567);
- }
-
- private:
- value a_;
- value b_;
- value c_;
- value d_;
-};
-
-#endif // HAVE_UINT64
-
-#if !defined(_MSC_VER)
-#undef _rotl
-#undef _rotl64
-#endif // !defined(_MSC_VER)
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_BOB_JENKINS_RNG_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Glorified C++ version of Bob Jenkins' random number generator.
+// See http://burtleburtle.net/bob/rand/smallprng.html for more details.
+
+#ifndef CRCUTIL_BOB_JENKINS_RNG_H_
+#define CRCUTIL_BOB_JENKINS_RNG_H_
+
+#include "base_types.h"
+
+#if !defined(_MSC_VER)
+#define _rotl(value, bits) \
+ static_cast<uint32>(((value) << (bits)) + ((value) >> (32 - (bits))))
+#define _rotl64(value, bits) \
+ static_cast<uint64>(((value) << (bits)) + ((value) >> (64 - (bits))))
+#endif // !defined(_MSC_VER)
+
+namespace crcutil {
+
+#pragma pack(push, 8)
+
+template<typename T> class BobJenkinsRng;
+
+template<> class BobJenkinsRng<uint32> {
+ public:
+ typedef uint32 value;
+
+ value Get() {
+ value e = a_ - _rotl(b_, 23);
+ a_ = b_ ^ _rotl(c_, 16);
+ b_ = c_ + _rotl(d_, 11);
+ c_ = d_ + e;
+ d_ = e + a_;
+ return (d_);
+ }
+
+ void Init(value seed) {
+ a_ = 0xf1ea5eed;
+ b_ = seed;
+ c_ = seed;
+ d_ = seed;
+ for (size_t i = 0; i < 20; ++i) {
+ (void) Get();
+ }
+ }
+
+ explicit BobJenkinsRng(value seed) {
+ Init(seed);
+ }
+
+ BobJenkinsRng() {
+ Init(0x1234567);
+ }
+
+ private:
+ value a_;
+ value b_;
+ value c_;
+ value d_;
+};
+
+
+#if HAVE_UINT64
+
+template<> class BobJenkinsRng<uint64> {
+ public:
+ typedef uint64 value;
+
+ value Get() {
+ value e = a_ - _rotl64(b_, 7);
+ a_ = b_ ^ _rotl64(c_, 13);
+ b_ = c_ + _rotl64(d_, 37);
+ c_ = d_ + e;
+ d_ = e + a_;
+ return d_;
+ }
+
+ void Init(value seed) {
+ a_ = 0xf1ea5eed;
+ b_ = seed;
+ c_ = seed;
+ d_ = seed;
+ for (size_t i = 0; i < 20; ++i) {
+ (void) Get();
+ }
+ }
+
+ explicit BobJenkinsRng(value seed) {
+ Init(seed);
+ }
+
+ BobJenkinsRng() {
+ Init(0x1234567);
+ }
+
+ private:
+ value a_;
+ value b_;
+ value c_;
+ value d_;
+};
+
+#endif // HAVE_UINT64
+
+#if !defined(_MSC_VER)
+#undef _rotl
+#undef _rotl64
+#endif // !defined(_MSC_VER)
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_BOB_JENKINS_RNG_H_
diff --git a/contrib/libs/crcutil/crc32c_sse4.cc b/contrib/libs/crcutil/crc32c_sse4.cc
index ed0f64410a..9875ab4ff2 100644
--- a/contrib/libs/crcutil/crc32c_sse4.cc
+++ b/contrib/libs/crcutil/crc32c_sse4.cc
@@ -1,369 +1,369 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements CRC32C using Intel's SSE4 crc32 instruction.
-// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
-// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
-
-#include "crc32c_sse4.h"
-
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements CRC32C using Intel's SSE4 crc32 instruction.
+// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
+// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
+
+#include "crc32c_sse4.h"
+
#include <util/system/compiler.h>
-#if HAVE_I386 || HAVE_AMD64
-
-namespace crcutil {
-
-#define UPDATE_STRIPE_CRCS(index, block_size, num_stripes) do { \
- CRC_UPDATE_WORD(crc0, \
- reinterpret_cast<const size_t *>(src + \
- 0 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
- CRC_UPDATE_WORD(crc1, \
- reinterpret_cast<const size_t *>(src + \
- 1 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
- CRC_UPDATE_WORD(crc2, \
- reinterpret_cast<const size_t *>(src + \
- 2 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
- if (num_stripes > 3) { \
- CRC_UPDATE_WORD(crc3, \
- reinterpret_cast<const size_t *>(src + \
- 3 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
- } \
-} while (0)
-
-// Multiplies "crc" by "x**(8 * STRIPE_SIZE(block_size)"
-// using appropriate multiplication table(s).
-//
-#if 0
-
-// This variant is for illustration purposes only.
-// Actual implementation below:
-// 1. Splits the computation into 2 data-independent paths
-// by independently multiplying lower and upper halves
-// of "crc0" in interleaved manner, and combining the
-// results in the end.
-// 2. Removing redundant "crc0 = 0" etc. in the beginning.
-// 3. Removing redundant shifts of "tmp0" and "tmp1" in the last round.
-#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
- size_t tmp0 = crc0; \
- crc0 = 0; \
- for (size_t i = 0; i < kNumTables; ++i) { \
- crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [i][tmp0 & (kTableEntries - 1)]; \
- tmp0 >>= kTableEntryBits; \
- } \
-} while (0)
-
-#else
-
-#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
- size_t tmp0 = crc0; \
- size_t tmp1 = crc0 >> (kTableEntryBits * kNumTablesHalfHi); \
- crc0 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [0][tmp0 & (kTableEntries - 1)]; \
- tmp0 >>= kTableEntryBits; \
- size_t crc1 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
- tmp1 >>= kTableEntryBits; \
- for (size_t i = 1; i < kNumTablesHalfLo - 1; ++i) { \
- crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [i][tmp0 & (kTableEntries - 1)]; \
- tmp0 >>= kTableEntryBits; \
- crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [i + kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
- tmp1 >>= kTableEntryBits; \
- } \
- crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTablesHalfLo - 1][tmp0 & (kTableEntries - 1)]; \
- if (kNumTables & 1) { \
- tmp0 >>= kTableEntryBits; \
- } \
- crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTables - 1][tmp1]; \
- if (kNumTables & 1) { \
- crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTablesHalfLo][tmp0 & (kTableEntries - 1)]; \
- } \
- crc0 ^= crc1; \
-} while (0)
-
-#endif
-
-// Given CRCs (crc0, crc1, etc.) of consequitive
-// stripes of STRIPE_SIZE(block_size) bytes each,
-// produces CRC of concatenated stripes.
-#define COMBINE_STRIPE_CRCS(block_size, num_stripes) do { \
- MULTIPLY_CRC(crc0, block_size, num_stripes); \
- crc0 ^= crc1; \
- MULTIPLY_CRC(crc0, block_size, num_stripes); \
- crc0 ^= crc2; \
- if (num_stripes > 3) { \
- MULTIPLY_CRC(crc0, block_size, num_stripes); \
- crc0 ^= crc3; \
- } \
-} while (0)
-
-// Processes input BLOCK_SIZE(block) bytes per iteration
-// by splitting a block of BLOCK_SIZE(block) bytes into N
-// equally-sized stripes of STRIPE_SIZE(block_size) each,
-// computing CRC of each stripe, and concatenating stripe CRCs.
-#define PROCESS_BLOCK(block_size, num_stripes) do { \
- while (bytes >= CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
- Crc crc1 = 0; \
- Crc crc2 = 0; \
- Crc crc3; \
- if (num_stripes > 3) crc3 = 0; \
- { \
- const uint8 *stripe_end = src + \
- (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) / \
- kUnrolledLoopBytes) * kUnrolledLoopBytes; \
- do { \
- UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(1, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(2, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(3, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(4, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(5, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(6, block_size, num_stripes); \
- UPDATE_STRIPE_CRCS(7, block_size, num_stripes); \
- src += kUnrolledLoopBytes; \
- } while (src < stripe_end); \
- if ((CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
- kUnrolledLoopBytes) != 0) { \
- stripe_end += \
- CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
- kUnrolledLoopBytes; \
- do { \
- UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
- src += sizeof(size_t); \
- } while (src < stripe_end); \
- } \
- } \
- COMBINE_STRIPE_CRCS(block_size, num_stripes); \
- src += CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * \
- ((num_stripes) - 1); \
- bytes = static_cast<size_t>(end - src); \
- } \
- no_more_##block_size##_##num_stripes:; \
-} while (0)
-
+#if HAVE_I386 || HAVE_AMD64
+
+namespace crcutil {
+
+#define UPDATE_STRIPE_CRCS(index, block_size, num_stripes) do { \
+ CRC_UPDATE_WORD(crc0, \
+ reinterpret_cast<const size_t *>(src + \
+ 0 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
+ CRC_UPDATE_WORD(crc1, \
+ reinterpret_cast<const size_t *>(src + \
+ 1 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
+ CRC_UPDATE_WORD(crc2, \
+ reinterpret_cast<const size_t *>(src + \
+ 2 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
+ if (num_stripes > 3) { \
+ CRC_UPDATE_WORD(crc3, \
+ reinterpret_cast<const size_t *>(src + \
+ 3 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
+ } \
+} while (0)
+
+// Multiplies "crc" by "x**(8 * STRIPE_SIZE(block_size)"
+// using appropriate multiplication table(s).
+//
+#if 0
+
+// This variant is for illustration purposes only.
+// Actual implementation below:
+// 1. Splits the computation into 2 data-independent paths
+// by independently multiplying lower and upper halves
+// of "crc0" in interleaved manner, and combining the
+// results in the end.
+// 2. Removing redundant "crc0 = 0" etc. in the beginning.
+// 3. Removing redundant shifts of "tmp0" and "tmp1" in the last round.
+#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
+ size_t tmp0 = crc0; \
+ crc0 = 0; \
+ for (size_t i = 0; i < kNumTables; ++i) { \
+ crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [i][tmp0 & (kTableEntries - 1)]; \
+ tmp0 >>= kTableEntryBits; \
+ } \
+} while (0)
+
+#else
+
+#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
+ size_t tmp0 = crc0; \
+ size_t tmp1 = crc0 >> (kTableEntryBits * kNumTablesHalfHi); \
+ crc0 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [0][tmp0 & (kTableEntries - 1)]; \
+ tmp0 >>= kTableEntryBits; \
+ size_t crc1 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
+ tmp1 >>= kTableEntryBits; \
+ for (size_t i = 1; i < kNumTablesHalfLo - 1; ++i) { \
+ crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [i][tmp0 & (kTableEntries - 1)]; \
+ tmp0 >>= kTableEntryBits; \
+ crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [i + kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
+ tmp1 >>= kTableEntryBits; \
+ } \
+ crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTablesHalfLo - 1][tmp0 & (kTableEntries - 1)]; \
+ if (kNumTables & 1) { \
+ tmp0 >>= kTableEntryBits; \
+ } \
+ crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTables - 1][tmp1]; \
+ if (kNumTables & 1) { \
+ crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTablesHalfLo][tmp0 & (kTableEntries - 1)]; \
+ } \
+ crc0 ^= crc1; \
+} while (0)
+
+#endif
+
+// Given CRCs (crc0, crc1, etc.) of consequitive
+// stripes of STRIPE_SIZE(block_size) bytes each,
+// produces CRC of concatenated stripes.
+#define COMBINE_STRIPE_CRCS(block_size, num_stripes) do { \
+ MULTIPLY_CRC(crc0, block_size, num_stripes); \
+ crc0 ^= crc1; \
+ MULTIPLY_CRC(crc0, block_size, num_stripes); \
+ crc0 ^= crc2; \
+ if (num_stripes > 3) { \
+ MULTIPLY_CRC(crc0, block_size, num_stripes); \
+ crc0 ^= crc3; \
+ } \
+} while (0)
+
+// Processes input BLOCK_SIZE(block) bytes per iteration
+// by splitting a block of BLOCK_SIZE(block) bytes into N
+// equally-sized stripes of STRIPE_SIZE(block_size) each,
+// computing CRC of each stripe, and concatenating stripe CRCs.
+#define PROCESS_BLOCK(block_size, num_stripes) do { \
+ while (bytes >= CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
+ Crc crc1 = 0; \
+ Crc crc2 = 0; \
+ Crc crc3; \
+ if (num_stripes > 3) crc3 = 0; \
+ { \
+ const uint8 *stripe_end = src + \
+ (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) / \
+ kUnrolledLoopBytes) * kUnrolledLoopBytes; \
+ do { \
+ UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(1, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(2, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(3, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(4, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(5, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(6, block_size, num_stripes); \
+ UPDATE_STRIPE_CRCS(7, block_size, num_stripes); \
+ src += kUnrolledLoopBytes; \
+ } while (src < stripe_end); \
+ if ((CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
+ kUnrolledLoopBytes) != 0) { \
+ stripe_end += \
+ CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
+ kUnrolledLoopBytes; \
+ do { \
+ UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
+ src += sizeof(size_t); \
+ } while (src < stripe_end); \
+ } \
+ } \
+ COMBINE_STRIPE_CRCS(block_size, num_stripes); \
+ src += CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * \
+ ((num_stripes) - 1); \
+ bytes = static_cast<size_t>(end - src); \
+ } \
+ no_more_##block_size##_##num_stripes:; \
+} while (0)
+
Y_NO_SANITIZE("undefined")
-size_t Crc32cSSE4::Crc32c(const void *data, size_t bytes, Crc crc0) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- crc0 ^= Base().Canonize();
-
- // If we don't have too much data to process,
- // do not waste time trying to align input etc.
- // Noticeably improves performance on small inputs.
- if (bytes < 4 * sizeof(size_t)) goto less_than_4_size_t;
- if (bytes < 8 * sizeof(size_t)) goto less_than_8_size_t;
- if (bytes < 16 * sizeof(size_t)) goto less_than_16_size_t;
-
-#define PROCESS_TAIL_IF_SMALL(block_size, num_stripes) do { \
- if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
- goto no_more_##block_size##_##num_stripes; \
- } \
-} while (0)
-#define NOOP(block_size, num_stripes)
-
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(PROCESS_TAIL_IF_SMALL,
- NOOP,
- NOOP);
-
-#undef PROCESS_TAIL_IF_SMALL
-
-
- // Do not use ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() here because:
- // 1. It uses CRC_BYTE() which won't work.
- // 2. Its threshold may be incorrect becuase Crc32 that uses
- // native CPU crc32 instruction is much faster than
- // generic table-based CRC computation.
- //
- // In case of X5550 CPU, break even point is at 2KB -- exactly.
- if (bytes >= 2 * 1024) {
- while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) {
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
- CRC_UPDATE_BYTE(crc0, src[0]);
- src += 1;
- }
- bytes = static_cast<size_t>(end - src);
- }
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
-
- // Quickly skip processing of too large blocks
- // Noticeably improves performance on small inputs.
-#define SKIP_BLOCK_IF_NEEDED(block_size, num_stripes) do { \
- if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
- goto no_more_##block_size##_##num_stripes; \
- } \
-} while (0)
-
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(NOOP,
- SKIP_BLOCK_IF_NEEDED,
- SKIP_BLOCK_IF_NEEDED);
-
-#undef SKIP_BLOCK_IF_NEEDED
-
- // Process data in all blocks.
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING(PROCESS_BLOCK,
- PROCESS_BLOCK,
- PROCESS_BLOCK);
-
- // Finish the tail word-by-word and then byte-by-byte.
-#define CRC_UPDATE_WORD_4(index) do { \
- CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index]); \
- CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 1]); \
- CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 2]); \
- CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 3]); \
-} while (0)
-
- if (bytes >= 4 * 4 * sizeof(size_t)) {
- end -= 4 * 4 * sizeof(size_t);
- do {
- CRC_UPDATE_WORD_4(4 * 0);
- CRC_UPDATE_WORD_4(4 * 1);
- CRC_UPDATE_WORD_4(4 * 2);
- CRC_UPDATE_WORD_4(4 * 3);
- src += 4 * 4 * sizeof(size_t);
- } while (src <= end);
- end += 4 * 4 * sizeof(size_t);
- bytes = static_cast<size_t>(end - src);
- }
- less_than_16_size_t:
-
- if (bytes >= 4 * 2 * sizeof(size_t)) {
- CRC_UPDATE_WORD_4(4 * 0);
- CRC_UPDATE_WORD_4(4 * 1);
- src += 4 * 2 * sizeof(size_t);
- bytes -= 4 * 2 * sizeof(size_t);
- }
- less_than_8_size_t:
-
- if (bytes >= 4 * sizeof(size_t)) {
- CRC_UPDATE_WORD_4(0);
- src += 4 * sizeof(size_t);
- bytes -= 4 * sizeof(size_t);
- }
- less_than_4_size_t:
-
- if (bytes >= 1 * sizeof(size_t)) {
- end -= 1 * sizeof(size_t);
- do {
- CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[0]);
- src += 1 * sizeof(size_t);
- } while (src <= end);
- end += 1 * sizeof(size_t);
- }
-
- while (src < end) {
- CRC_UPDATE_BYTE(crc0, src[0]);
- src += 1;
- }
-
- return (crc0 ^ Base().Canonize());
-}
-
-
-void Crc32cSSE4::Init(bool constant) {
- base_.Init(FixedGeneratingPolynomial(), FixedDegree(), constant);
-
-#define INIT_MUL_TABLE(block_size, num_stripes) do { \
- size_t multiplier = \
- Base().Xpow8N(CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes)); \
- for (size_t table = 0; table < kNumTables; ++table) { \
- for (size_t entry = 0; entry < kTableEntries; ++entry) { \
- size_t value = static_cast<uint32>(entry << (kTableEntryBits * table)); \
- CRC32C_SSE4_MUL_TABLE(block_size, num_stripes)[table][entry] = \
- static_cast<Entry>(Base().Multiply(value, multiplier)); \
- } \
- } \
-} while (0)
-
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(INIT_MUL_TABLE);
-
-#undef INIT_MUL_TABLE
-
-#if !CRCUTIL_USE_MM_CRC32
- for (size_t j = 0; j < sizeof(Word); ++j) {
- Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + 32);
- for (size_t i = 0; i < 256; ++i) {
- crc_word_[j][i] = Base().MultiplyUnnormalized(i, 8, k);
- }
- }
-#endif // !CRCUTIL_USE_MM_CRC32
-}
-
-
-bool Crc32cSSE4::IsSSE42Available() {
-#if defined(_MSC_VER)
- int cpu_info[4];
- __cpuid(cpu_info, 1);
+size_t Crc32cSSE4::Crc32c(const void *data, size_t bytes, Crc crc0) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ crc0 ^= Base().Canonize();
+
+ // If we don't have too much data to process,
+ // do not waste time trying to align input etc.
+ // Noticeably improves performance on small inputs.
+ if (bytes < 4 * sizeof(size_t)) goto less_than_4_size_t;
+ if (bytes < 8 * sizeof(size_t)) goto less_than_8_size_t;
+ if (bytes < 16 * sizeof(size_t)) goto less_than_16_size_t;
+
+#define PROCESS_TAIL_IF_SMALL(block_size, num_stripes) do { \
+ if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
+ goto no_more_##block_size##_##num_stripes; \
+ } \
+} while (0)
+#define NOOP(block_size, num_stripes)
+
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(PROCESS_TAIL_IF_SMALL,
+ NOOP,
+ NOOP);
+
+#undef PROCESS_TAIL_IF_SMALL
+
+
+ // Do not use ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() here because:
+ // 1. It uses CRC_BYTE() which won't work.
+ // 2. Its threshold may be incorrect becuase Crc32 that uses
+ // native CPU crc32 instruction is much faster than
+ // generic table-based CRC computation.
+ //
+ // In case of X5550 CPU, break even point is at 2KB -- exactly.
+ if (bytes >= 2 * 1024) {
+ while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) {
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+ CRC_UPDATE_BYTE(crc0, src[0]);
+ src += 1;
+ }
+ bytes = static_cast<size_t>(end - src);
+ }
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Quickly skip processing of too large blocks
+ // Noticeably improves performance on small inputs.
+#define SKIP_BLOCK_IF_NEEDED(block_size, num_stripes) do { \
+ if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
+ goto no_more_##block_size##_##num_stripes; \
+ } \
+} while (0)
+
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(NOOP,
+ SKIP_BLOCK_IF_NEEDED,
+ SKIP_BLOCK_IF_NEEDED);
+
+#undef SKIP_BLOCK_IF_NEEDED
+
+ // Process data in all blocks.
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING(PROCESS_BLOCK,
+ PROCESS_BLOCK,
+ PROCESS_BLOCK);
+
+ // Finish the tail word-by-word and then byte-by-byte.
+#define CRC_UPDATE_WORD_4(index) do { \
+ CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index]); \
+ CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 1]); \
+ CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 2]); \
+ CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 3]); \
+} while (0)
+
+ if (bytes >= 4 * 4 * sizeof(size_t)) {
+ end -= 4 * 4 * sizeof(size_t);
+ do {
+ CRC_UPDATE_WORD_4(4 * 0);
+ CRC_UPDATE_WORD_4(4 * 1);
+ CRC_UPDATE_WORD_4(4 * 2);
+ CRC_UPDATE_WORD_4(4 * 3);
+ src += 4 * 4 * sizeof(size_t);
+ } while (src <= end);
+ end += 4 * 4 * sizeof(size_t);
+ bytes = static_cast<size_t>(end - src);
+ }
+ less_than_16_size_t:
+
+ if (bytes >= 4 * 2 * sizeof(size_t)) {
+ CRC_UPDATE_WORD_4(4 * 0);
+ CRC_UPDATE_WORD_4(4 * 1);
+ src += 4 * 2 * sizeof(size_t);
+ bytes -= 4 * 2 * sizeof(size_t);
+ }
+ less_than_8_size_t:
+
+ if (bytes >= 4 * sizeof(size_t)) {
+ CRC_UPDATE_WORD_4(0);
+ src += 4 * sizeof(size_t);
+ bytes -= 4 * sizeof(size_t);
+ }
+ less_than_4_size_t:
+
+ if (bytes >= 1 * sizeof(size_t)) {
+ end -= 1 * sizeof(size_t);
+ do {
+ CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[0]);
+ src += 1 * sizeof(size_t);
+ } while (src <= end);
+ end += 1 * sizeof(size_t);
+ }
+
+ while (src < end) {
+ CRC_UPDATE_BYTE(crc0, src[0]);
+ src += 1;
+ }
+
+ return (crc0 ^ Base().Canonize());
+}
+
+
+void Crc32cSSE4::Init(bool constant) {
+ base_.Init(FixedGeneratingPolynomial(), FixedDegree(), constant);
+
+#define INIT_MUL_TABLE(block_size, num_stripes) do { \
+ size_t multiplier = \
+ Base().Xpow8N(CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes)); \
+ for (size_t table = 0; table < kNumTables; ++table) { \
+ for (size_t entry = 0; entry < kTableEntries; ++entry) { \
+ size_t value = static_cast<uint32>(entry << (kTableEntryBits * table)); \
+ CRC32C_SSE4_MUL_TABLE(block_size, num_stripes)[table][entry] = \
+ static_cast<Entry>(Base().Multiply(value, multiplier)); \
+ } \
+ } \
+} while (0)
+
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(INIT_MUL_TABLE);
+
+#undef INIT_MUL_TABLE
+
+#if !CRCUTIL_USE_MM_CRC32
+ for (size_t j = 0; j < sizeof(Word); ++j) {
+ Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + 32);
+ for (size_t i = 0; i < 256; ++i) {
+ crc_word_[j][i] = Base().MultiplyUnnormalized(i, 8, k);
+ }
+ }
+#endif // !CRCUTIL_USE_MM_CRC32
+}
+
+
+bool Crc32cSSE4::IsSSE42Available() {
+#if defined(_MSC_VER)
+ int cpu_info[4];
+ __cpuid(cpu_info, 1);
return ((cpu_info[2] & (1 << 20)) != 0);
-#elif defined(__GNUC__) && (HAVE_AMD64 || HAVE_I386)
- // Not using "cpuid.h" intentionally: it is missing from
- // too many installations.
- uint32 eax;
- uint32 ecx;
- uint32 edx;
- __asm__ volatile(
-#if HAVE_I386 && defined(__PIC__)
+#elif defined(__GNUC__) && (HAVE_AMD64 || HAVE_I386)
+ // Not using "cpuid.h" intentionally: it is missing from
+ // too many installations.
+ uint32 eax;
+ uint32 ecx;
+ uint32 edx;
+ __asm__ volatile(
+#if HAVE_I386 && defined(__PIC__)
"push %%ebx\n"
- "cpuid\n"
+ "cpuid\n"
"pop %%ebx\n"
-#else
- "cpuid\n"
-#endif // HAVE_I386 && defined(__PIC__)
- : "=a" (eax), "=c" (ecx), "=d" (edx)
- : "a" (1), "2" (0)
- : "%ebx"
- );
- return ((ecx & (1 << 20)) != 0);
-#else
- return false;
-#endif
-}
-
-
-void RollingCrc32cSSE4::Init(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- crc_ = &crc;
- roll_window_bytes_ = roll_window_bytes;
- start_value_ = start_value;
-
- Crc add = crc.Base().Canonize() ^ start_value;
- add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
- add ^= crc.Base().Canonize();
- Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
- add = crc.Base().Multiply(add, mul);
-
- mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
- for (size_t i = 0; i < 256; ++i) {
- out_[i] = static_cast<Entry>(
- crc.Base().MultiplyUnnormalized(
- static_cast<Crc>(i), 8, mul) ^ add);
- }
-
-#if !CRCUTIL_USE_MM_CRC32
- memcpy(crc_word_, crc_->crc_word_, sizeof(crc_word_));
-#endif // !CRCUTIL_USE_MM_CRC32
-}
-
-} // namespace crcutil
-
-#endif // HAVE_I386 || HAVE_AMD64
+#else
+ "cpuid\n"
+#endif // HAVE_I386 && defined(__PIC__)
+ : "=a" (eax), "=c" (ecx), "=d" (edx)
+ : "a" (1), "2" (0)
+ : "%ebx"
+ );
+ return ((ecx & (1 << 20)) != 0);
+#else
+ return false;
+#endif
+}
+
+
+void RollingCrc32cSSE4::Init(const Crc32cSSE4 &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value) {
+ crc_ = &crc;
+ roll_window_bytes_ = roll_window_bytes;
+ start_value_ = start_value;
+
+ Crc add = crc.Base().Canonize() ^ start_value;
+ add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
+ add ^= crc.Base().Canonize();
+ Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
+ add = crc.Base().Multiply(add, mul);
+
+ mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
+ for (size_t i = 0; i < 256; ++i) {
+ out_[i] = static_cast<Entry>(
+ crc.Base().MultiplyUnnormalized(
+ static_cast<Crc>(i), 8, mul) ^ add);
+ }
+
+#if !CRCUTIL_USE_MM_CRC32
+ memcpy(crc_word_, crc_->crc_word_, sizeof(crc_word_));
+#endif // !CRCUTIL_USE_MM_CRC32
+}
+
+} // namespace crcutil
+
+#endif // HAVE_I386 || HAVE_AMD64
diff --git a/contrib/libs/crcutil/crc32c_sse4.h b/contrib/libs/crcutil/crc32c_sse4.h
index ac3d8425b8..24aa815699 100644
--- a/contrib/libs/crcutil/crc32c_sse4.h
+++ b/contrib/libs/crcutil/crc32c_sse4.h
@@ -1,252 +1,252 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements CRC32C using Intel's SSE4 crc32 instruction.
-// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
-// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
-
-#ifndef CRCUTIL_CRC32C_SSE4_H_
-#define CRCUTIL_CRC32C_SSE4_H_
-
-#include "gf_util.h" // base types, gf_util class, etc.
-#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
-
-#if HAVE_I386 || HAVE_AMD64
-
-#if CRCUTIL_USE_MM_CRC32
-
-#if HAVE_I386
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
-#else
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
-#endif // HAVE_I386
-
-#define CRC_UPDATE_BYTE(crc, value) \
- (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
-
-#else
-
-#include "generic_crc.h"
-
-#define CRC_UPDATE_WORD(crc, value) do { \
- size_t buf = (value); \
- CRC_WORD(this, crc, buf); \
-} while (0)
-#define CRC_UPDATE_BYTE(crc, value) do { \
- CRC_BYTE(this, crc, (value)); \
-} while (0)
-
-#endif // CRCUTIL_USE_MM_CRC32
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Since the same pieces should be parameterized in many different places
-// and we do not want to introduce a mistake which is rather hard to find,
-// use a macro to enumerate all block sizes.
-//
-// Block sizes and number of stripes were tuned for best performance.
-//
-// All constants should be literal constants (too lazy to fix the macro).
-//
-// The use of different "macro_first", "macro", and "macro_last"
-// allows generation of different code for smallest, in between,
-// and largest block sizes.
-//
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_smallest(512, 3); \
- macro(1024, 3); \
- macro(4096, 3); \
- macro_largest(32768, 3)
-
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_largest(32768, 3); \
- macro(4096, 3); \
- macro(1024, 3); \
- macro_smallest(512, 3)
-
-// Enumerates all block sizes.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
-
-#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
- (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
-
-#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
- (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
-
-#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- mul_table_##block_size##_##num_blocks##_
-
-class RollingCrc32cSSE4;
-
-class Crc32cSSE4 {
- public:
- // Exports Crc, TableEntry, and Word (needed by RollingCrc).
- typedef size_t Crc;
- typedef Crc Word;
- typedef Crc TableEntry;
-
- Crc32cSSE4() {}
-
- // Initializes the tables given generating polynomial of degree (degree).
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- explicit Crc32cSSE4(bool canonical) {
- Init(canonical);
- }
- void Init(bool canonical);
-
- // Initializes the tables given generating polynomial of degree.
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- // Provided for compatibility with GenericCrc.
- Crc32cSSE4(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- Init(generating_polynomial, degree, canonical);
- }
- void Init(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- if (generating_polynomial == FixedGeneratingPolynomial() &&
- degree == FixedDegree()) {
- Init(canonical);
- }
- }
-
- // Returns fixed generating polymonial the class implements.
- static Crc FixedGeneratingPolynomial() {
- return 0x82f63b78;
- }
-
- // Returns degree of fixed generating polymonial the class implements.
- static Crc FixedDegree() {
- return 32;
- }
-
- // Returns base class.
- const GfUtil<Crc> &Base() const { return base_; }
-
- // Computes CRC32.
- size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
- return Crc32c(data, bytes, crc);
- }
-
- // Returns true iff crc32 instruction is available.
- static bool IsSSE42Available();
-
- protected:
- // Actual implementation.
- size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
-
- enum {
- kTableEntryBits = 8,
- kTableEntries = 1 << kTableEntryBits,
- kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
- kNumTablesHalfLo = kNumTables / 2,
- kNumTablesHalfHi = (kNumTables + 1) / 2,
-
- kUnrolledLoopCount = 8,
- kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
- };
-
- // May be set to size_t or uint32, whichever is faster.
- typedef uint32 Entry;
-
-#define DECLARE_MUL_TABLE(block_size, num_stripes) \
- Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTables][kTableEntries]
-
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
-
-#undef DECLARE_MUL_TABLE
-
- GfUtil<Crc> base_;
-
-#if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
- friend class RollingCrc32cSSE4;
-#endif // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-class RollingCrc32cSSE4 {
- public:
- typedef Crc32cSSE4::Crc Crc;
- typedef Crc32cSSE4::TableEntry TableEntry;
- typedef Crc32cSSE4::Word Word;
-
- RollingCrc32cSSE4() {}
-
- // Initializes internal data structures.
- // Retains reference to "crc" instance -- it is used by Start().
- RollingCrc32cSSE4(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- Init(crc, roll_window_bytes, start_value);
- }
- void Init(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value);
-
- // Computes crc of "roll_window_bytes" using
- // "start_value" of "crc" (see Init()).
- Crc Start(const void *data) const {
- return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
- }
-
- // Computes CRC of "roll_window_bytes" starting in next position.
- Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
- Crc crc = old_crc;
- CRC_UPDATE_BYTE(crc, byte_in);
- crc ^= out_[byte_out];
- return crc;
- }
-
- // Returns start value.
- Crc StartValue() const { return start_value_; }
-
- // Returns length of roll window.
- size_t WindowBytes() const { return roll_window_bytes_; }
-
- protected:
- typedef Crc Entry;
- Entry out_[256];
-
- // Used only by Start().
- Crc start_value_;
- const Crc32cSSE4 *crc_;
- size_t roll_window_bytes_;
-
-#if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
-#endif // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // HAVE_I386 || HAVE_AMD64
-
-#endif // CRCUTIL_CRC32C_SSE4_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements CRC32C using Intel's SSE4 crc32 instruction.
+// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
+// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
+
+#ifndef CRCUTIL_CRC32C_SSE4_H_
+#define CRCUTIL_CRC32C_SSE4_H_
+
+#include "gf_util.h" // base types, gf_util class, etc.
+#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
+
+#if HAVE_I386 || HAVE_AMD64
+
+#if CRCUTIL_USE_MM_CRC32
+
+#if HAVE_I386
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
+#else
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
+#endif // HAVE_I386
+
+#define CRC_UPDATE_BYTE(crc, value) \
+ (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
+
+#else
+
+#include "generic_crc.h"
+
+#define CRC_UPDATE_WORD(crc, value) do { \
+ size_t buf = (value); \
+ CRC_WORD(this, crc, buf); \
+} while (0)
+#define CRC_UPDATE_BYTE(crc, value) do { \
+ CRC_BYTE(this, crc, (value)); \
+} while (0)
+
+#endif // CRCUTIL_USE_MM_CRC32
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// Since the same pieces should be parameterized in many different places
+// and we do not want to introduce a mistake which is rather hard to find,
+// use a macro to enumerate all block sizes.
+//
+// Block sizes and number of stripes were tuned for best performance.
+//
+// All constants should be literal constants (too lazy to fix the macro).
+//
+// The use of different "macro_first", "macro", and "macro_last"
+// allows generation of different code for smallest, in between,
+// and largest block sizes.
+//
+// This macro shall be kept in sync with
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
+// Failure to do so will cause compile-time error.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
+ macro_smallest, macro, macro_largest) \
+ macro_smallest(512, 3); \
+ macro(1024, 3); \
+ macro(4096, 3); \
+ macro_largest(32768, 3)
+
+// This macro shall be kept in sync with
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
+// Failure to do so will cause compile-time error.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
+ macro_smallest, macro, macro_largest) \
+ macro_largest(32768, 3); \
+ macro(4096, 3); \
+ macro(1024, 3); \
+ macro_smallest(512, 3)
+
+// Enumerates all block sizes.
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
+
+#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
+ (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
+
+#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
+ (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
+
+#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ mul_table_##block_size##_##num_blocks##_
+
+class RollingCrc32cSSE4;
+
+class Crc32cSSE4 {
+ public:
+ // Exports Crc, TableEntry, and Word (needed by RollingCrc).
+ typedef size_t Crc;
+ typedef Crc Word;
+ typedef Crc TableEntry;
+
+ Crc32cSSE4() {}
+
+ // Initializes the tables given generating polynomial of degree (degree).
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
+ // after actual CRC computation.
+ explicit Crc32cSSE4(bool canonical) {
+ Init(canonical);
+ }
+ void Init(bool canonical);
+
+ // Initializes the tables given generating polynomial of degree.
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
+ // after actual CRC computation.
+ // Provided for compatibility with GenericCrc.
+ Crc32cSSE4(const Crc &generating_polynomial,
+ size_t degree,
+ bool canonical) {
+ Init(generating_polynomial, degree, canonical);
+ }
+ void Init(const Crc &generating_polynomial,
+ size_t degree,
+ bool canonical) {
+ if (generating_polynomial == FixedGeneratingPolynomial() &&
+ degree == FixedDegree()) {
+ Init(canonical);
+ }
+ }
+
+ // Returns fixed generating polymonial the class implements.
+ static Crc FixedGeneratingPolynomial() {
+ return 0x82f63b78;
+ }
+
+ // Returns degree of fixed generating polymonial the class implements.
+ static Crc FixedDegree() {
+ return 32;
+ }
+
+ // Returns base class.
+ const GfUtil<Crc> &Base() const { return base_; }
+
+ // Computes CRC32.
+ size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
+ return Crc32c(data, bytes, crc);
+ }
+
+ // Returns true iff crc32 instruction is available.
+ static bool IsSSE42Available();
+
+ protected:
+ // Actual implementation.
+ size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
+
+ enum {
+ kTableEntryBits = 8,
+ kTableEntries = 1 << kTableEntryBits,
+ kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
+ kNumTablesHalfLo = kNumTables / 2,
+ kNumTablesHalfHi = (kNumTables + 1) / 2,
+
+ kUnrolledLoopCount = 8,
+ kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
+ };
+
+ // May be set to size_t or uint32, whichever is faster.
+ typedef uint32 Entry;
+
+#define DECLARE_MUL_TABLE(block_size, num_stripes) \
+ Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
+ [kNumTables][kTableEntries]
+
+ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
+
+#undef DECLARE_MUL_TABLE
+
+ GfUtil<Crc> base_;
+
+#if !CRCUTIL_USE_MM_CRC32
+ TableEntry crc_word_[sizeof(Word)][256];
+ friend class RollingCrc32cSSE4;
+#endif // !CRCUTIL_USE_MM_CRC32
+} GCC_ALIGN_ATTRIBUTE(16);
+
+class RollingCrc32cSSE4 {
+ public:
+ typedef Crc32cSSE4::Crc Crc;
+ typedef Crc32cSSE4::TableEntry TableEntry;
+ typedef Crc32cSSE4::Word Word;
+
+ RollingCrc32cSSE4() {}
+
+ // Initializes internal data structures.
+ // Retains reference to "crc" instance -- it is used by Start().
+ RollingCrc32cSSE4(const Crc32cSSE4 &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value) {
+ Init(crc, roll_window_bytes, start_value);
+ }
+ void Init(const Crc32cSSE4 &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value);
+
+ // Computes crc of "roll_window_bytes" using
+ // "start_value" of "crc" (see Init()).
+ Crc Start(const void *data) const {
+ return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
+ }
+
+ // Computes CRC of "roll_window_bytes" starting in next position.
+ Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
+ Crc crc = old_crc;
+ CRC_UPDATE_BYTE(crc, byte_in);
+ crc ^= out_[byte_out];
+ return crc;
+ }
+
+ // Returns start value.
+ Crc StartValue() const { return start_value_; }
+
+ // Returns length of roll window.
+ size_t WindowBytes() const { return roll_window_bytes_; }
+
+ protected:
+ typedef Crc Entry;
+ Entry out_[256];
+
+ // Used only by Start().
+ Crc start_value_;
+ const Crc32cSSE4 *crc_;
+ size_t roll_window_bytes_;
+
+#if !CRCUTIL_USE_MM_CRC32
+ TableEntry crc_word_[sizeof(Word)][256];
+#endif // !CRCUTIL_USE_MM_CRC32
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // HAVE_I386 || HAVE_AMD64
+
+#endif // CRCUTIL_CRC32C_SSE4_H_
diff --git a/contrib/libs/crcutil/crc32c_sse4_intrin.h b/contrib/libs/crcutil/crc32c_sse4_intrin.h
index ab7cc3e03d..5338d21d29 100644
--- a/contrib/libs/crcutil/crc32c_sse4_intrin.h
+++ b/contrib/libs/crcutil/crc32c_sse4_intrin.h
@@ -1,99 +1,99 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Provides _mm_crc32_u64/32/8 intrinsics.
-
-#ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_
-#define CRCUTIL_CRC32C_SSE4_INTRIN_H_
-
-#include "platform.h"
-#include "base_types.h"
-
-#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
-#if defined(_MSC_VER) || defined(__SSE4_2__)
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-// '_M_IA64' is not defined as a preprocessor macro
-#pragma warning(disable: 4668)
-#endif // defined(_MSC_VER)
-
-#include <nmmintrin.h>
-
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#endif // defined(_MSC_VER)
-
-#elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C)
-// Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32
-// is set irrespective of "-msse*" settings. This way, the sources
-// may be compiled with "-msse2 -mcrc32" and work on older CPUs,
-// while taking full advantage of "crc32" instruction on newer
-// CPUs (requires dynamic CPU detection). See "interface.cc".
-//
-// If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set
-// and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen.
-// Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled
-// without -msse4 or -mcrc32. -msse4 could be detected at run time by checking
-// whether __SSE4_2__ is defined, but there is no way to tell whether the
-// sources are compiled with -mcrc32.
-
-extern __inline unsigned int __attribute__((
- __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u8(unsigned int __C, unsigned char __V) {
- return __builtin_ia32_crc32qi(__C, __V);
-}
-#ifdef __x86_64__
-extern __inline unsigned long long __attribute__((
- __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
- return __builtin_ia32_crc32di(__C, __V);
-}
-#else
-extern __inline unsigned int __attribute__((
- __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u32(unsigned int __C, unsigned int __V) {
- return __builtin_ia32_crc32si (__C, __V);
-}
-#endif // __x86_64__
-
-#else
-
-// GCC 4.4.x and earlier: use inline asm.
-
-namespace crcutil {
-
-__forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) {
- asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
- return crc;
-}
-
-__forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) {
- asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
- return crc;
-}
-
-__forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) {
- asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
- return crc;
-}
-
-} // namespace crcutil
-
-#endif
-
-#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
-#endif // CRCUTIL_CRC32C_SSE4_INTRIN_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Provides _mm_crc32_u64/32/8 intrinsics.
+
+#ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_
+#define CRCUTIL_CRC32C_SSE4_INTRIN_H_
+
+#include "platform.h"
+#include "base_types.h"
+
+#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
+
+#if defined(_MSC_VER) || defined(__SSE4_2__)
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// '_M_IA64' is not defined as a preprocessor macro
+#pragma warning(disable: 4668)
+#endif // defined(_MSC_VER)
+
+#include <nmmintrin.h>
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif // defined(_MSC_VER)
+
+#elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C)
+// Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32
+// is set irrespective of "-msse*" settings. This way, the sources
+// may be compiled with "-msse2 -mcrc32" and work on older CPUs,
+// while taking full advantage of "crc32" instruction on newer
+// CPUs (requires dynamic CPU detection). See "interface.cc".
+//
+// If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set
+// and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen.
+// Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled
+// without -msse4 or -mcrc32. -msse4 could be detected at run time by checking
+// whether __SSE4_2__ is defined, but there is no way to tell whether the
+// sources are compiled with -mcrc32.
+
+extern __inline unsigned int __attribute__((
+ __gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u8(unsigned int __C, unsigned char __V) {
+ return __builtin_ia32_crc32qi(__C, __V);
+}
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((
+ __gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
+ return __builtin_ia32_crc32di(__C, __V);
+}
+#else
+extern __inline unsigned int __attribute__((
+ __gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u32(unsigned int __C, unsigned int __V) {
+ return __builtin_ia32_crc32si (__C, __V);
+}
+#endif // __x86_64__
+
+#else
+
+// GCC 4.4.x and earlier: use inline asm.
+
+namespace crcutil {
+
+__forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) {
+ asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
+ return crc;
+}
+
+__forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) {
+ asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
+ return crc;
+}
+
+__forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) {
+ asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
+ return crc;
+}
+
+} // namespace crcutil
+
+#endif
+
+#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
+
+#endif // CRCUTIL_CRC32C_SSE4_INTRIN_H_
diff --git a/contrib/libs/crcutil/crc_casts.h b/contrib/libs/crcutil/crc_casts.h
index a14044fea3..f732329978 100644
--- a/contrib/libs/crcutil/crc_casts.h
+++ b/contrib/libs/crcutil/crc_casts.h
@@ -1,68 +1,68 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Casting between integers and compound CRC types.
-
-#ifndef CRCUTIL_CRC_CASTS_H_
-#define CRCUTIL_CRC_CASTS_H_
-
-#include "base_types.h" // uint8, uint64
-#include "platform.h" // __forceinline
-
-namespace crcutil {
-
-// Downcasts a value of (oftentimes larger) Crc type to (smaller base integer)
-// Result type, enabling specialized downcasts implemented by "large integer"
-// classes (e.g. uint128_sse2).
-template<typename Crc, typename Result>
-__forceinline Result Downcast(const Crc &x) {
- return static_cast<Result>(x);
-}
-
-// Extracts 8 least significant bits from a value of Crc type.
-#define TO_BYTE(x) Downcast<Crc, uint8>(x)
-
-// Converts a pair of uint64 bit values into single value of CRC type.
-// It is caller's responsibility to ensure that the input is correct.
-template<typename Crc>
-__forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) {
- if (sizeof(Crc) <= sizeof(lo)) {
- return static_cast<Crc>(lo);
- } else {
- // static_cast to keep compiler happy.
- Crc result = static_cast<Crc>(hi);
- result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo));
- result ^= lo;
- return result;
- }
-}
-
-// Converts Crc value to a pair of uint64 values.
-template<typename Crc>
-__forceinline void Uint64FromCrc(const Crc &crc,
- uint64 *lo, uint64 *hi = NULL) {
- if (sizeof(*lo) >= sizeof(crc)) {
- *lo = Downcast<Crc, uint64>(crc);
- if (hi != NULL) {
- *hi = 0;
- }
- } else {
- *lo = Downcast<Crc, uint64>(crc);
- *hi = Downcast<Crc, uint64>(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo)));
- }
-}
-
-} // namespace crcutil
-
-#endif // CRCUTIL_CRC_CASTS_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Casting between integers and compound CRC types.
+
+#ifndef CRCUTIL_CRC_CASTS_H_
+#define CRCUTIL_CRC_CASTS_H_
+
+#include "base_types.h" // uint8, uint64
+#include "platform.h" // __forceinline
+
+namespace crcutil {
+
+// Downcasts a value of (oftentimes larger) Crc type to (smaller base integer)
+// Result type, enabling specialized downcasts implemented by "large integer"
+// classes (e.g. uint128_sse2).
+template<typename Crc, typename Result>
+__forceinline Result Downcast(const Crc &x) {
+ return static_cast<Result>(x);
+}
+
+// Extracts 8 least significant bits from a value of Crc type.
+#define TO_BYTE(x) Downcast<Crc, uint8>(x)
+
+// Converts a pair of uint64 bit values into single value of CRC type.
+// It is caller's responsibility to ensure that the input is correct.
+template<typename Crc>
+__forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) {
+ if (sizeof(Crc) <= sizeof(lo)) {
+ return static_cast<Crc>(lo);
+ } else {
+ // static_cast to keep compiler happy.
+ Crc result = static_cast<Crc>(hi);
+ result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo));
+ result ^= lo;
+ return result;
+ }
+}
+
+// Converts Crc value to a pair of uint64 values.
+template<typename Crc>
+__forceinline void Uint64FromCrc(const Crc &crc,
+ uint64 *lo, uint64 *hi = NULL) {
+ if (sizeof(*lo) >= sizeof(crc)) {
+ *lo = Downcast<Crc, uint64>(crc);
+ if (hi != NULL) {
+ *hi = 0;
+ }
+ } else {
+ *lo = Downcast<Crc, uint64>(crc);
+ *hi = Downcast<Crc, uint64>(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo)));
+ }
+}
+
+} // namespace crcutil
+
+#endif // CRCUTIL_CRC_CASTS_H_
diff --git a/contrib/libs/crcutil/generic_crc.h b/contrib/libs/crcutil/generic_crc.h
index 06af21c925..4832005776 100644
--- a/contrib/libs/crcutil/generic_crc.h
+++ b/contrib/libs/crcutil/generic_crc.h
@@ -1,687 +1,687 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines GenericCrc class which implements arbitrary CRCs.
-//
-// Please read crc.pdf to understand how it all works.
-
-#ifndef CRCUTIL_GENERIC_CRC_H_
-#define CRCUTIL_GENERIC_CRC_H_
-
-#include "base_types.h" // uint8
-#include "crc_casts.h" // TO_BYTE(), Downcast<>.
-#include "gf_util.h" // GfUtil<Crc> class.
-#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16)
-#include "uint128_sse2.h" // uint128_sse2 type (if necessary)
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Extends CRC by one byte.
-// Technically, if degree of a polynomial does not exceed 8,
-// right shift by 8 bits is not required, but who cares about CRC-8?
-#define CRC_BYTE(table, crc, byte) do { \
- crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \
- table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \
-} while (0)
-
-#define TABLE_ENTRY(table, byte, buf) \
- table[byte][Downcast<Word, uint8>(buf)]
-
-#define TABLE_ENTRY_LAST(table, buf) \
- table[sizeof(Word) - 1][buf]
-
-// Extends CRC by one word.
-#define CRC_WORD(table, crc, buf) do { \
- buf ^= Downcast<Crc, Word>(crc); \
- if (sizeof(crc) > sizeof(buf)) { \
- crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \
- crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \
- } else { \
- crc = TABLE_ENTRY(table->crc_word_, 0, buf); \
- } \
- buf >>= 8; \
- for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \
- crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \
- buf >>= 8; \
- } \
- crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \
-} while (0)
-
-// Process beginning of data block byte by byte until source pointer
-// becomes perfectly aligned on Word boundary.
-#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \
- while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \
- if (src >= end) { \
- return (crc ^ table->Base().Canonize()); \
- } \
- CRC_BYTE(table, crc, *src); \
- src += 1; \
- } \
-} while (0)
-
-
-// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks
-// but 6-10% faster on larger blocks (>= 2KB).
-// Break-even point (+-1%) is around 1KB (Q9650, E6600).
-//
-#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \
-do { \
- if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \
- ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \
- } \
-} while (0)
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // conditional expression is constant
-#endif // defined(_MSC_VER)
-
-// Forward declarations.
-template<typename CrcImplementation> class RollingCrc;
-
-// Crc is the type used internally and to return values of N-bit CRC.
-// It should be at least as large as "TableEntry" and "Word" but
-// may be larger (e.g. for 16-bit CRC, TableEntry and Word may be
-// set to uint16 but Crc may be set to uint32).
-//
-// TableEntry is the type of values stored in the tables.
-// To implement N-bit CRC, TableEntry should be large enough
-// to store N bits.
-//
-// Word is the type used to read data sizeof(Word) at a time.
-// Ideally, it shoulde be "most suitable for given architecture"
-// integer type -- typically "size_t".
-//
-// kStride is the number of words processed in interleaved manner by
-// CrcMultiword() and CrcWordblock(). Shall be either 3 or 4.
-// Optimal value depends on hardware architecture (AMD64, ARM, etc).
-//
-template<typename _Crc, typename _TableEntry, typename _Word, int kStride>
- class GenericCrc {
- public:
- // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.)
- typedef _Crc Crc;
- typedef _TableEntry TableEntry;
- typedef _Word Word;
-
- GenericCrc() {}
-
- // Initializes the tables given generating polynomial of degree.
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) {
- Init(generating_polynomial, degree, canonical);
- }
- void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
- base_.Init(generating_polynomial, degree, canonical);
-
- // Instead of computing
- // table[j][i] = MultiplyUnnormalized(i, 8, k),
- // for all i = 0...255, we may notice that
- // if i = 2**n then for all m = 1...(i-1)
- // MultiplyUnnormalized(i + m, 8, k) =
- // MultiplyUnnormalized(i ^ m, 8, k) =
- // MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) =
- // MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] =
- // table[i] ^ table[m].
-#if 0
- for (size_t j = 0; j < sizeof(Word); ++j) {
- Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
- for (size_t i = 0; i < 256; ++i) {
- Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
- this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp);
- }
- }
-#else
- for (size_t j = 0; j < sizeof(Word); ++j) {
- Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
- TableEntry *table = this->crc_word_interleaved_[j];
- table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
- for (size_t i = 1; i < 256; i <<= 1) {
- TableEntry value = Downcast<Crc, TableEntry>(
- Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
- table[i] = value;
- for (size_t m = 1; m < i; ++m) {
- table[i + m] = value ^ table[m];
- }
- }
- }
-#endif
-
-#if 0
- for (size_t j = 0; j < sizeof(Word); ++j) {
- Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
- for (size_t i = 0; i < 256; ++i) {
- Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
- this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp);
- }
- }
-#else
- for (size_t j = 0; j < sizeof(Word); ++j) {
- Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
- TableEntry *table = this->crc_word_[j];
- table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
- for (size_t i = 1; i < 256; i <<= 1) {
- TableEntry value = Downcast<Crc, TableEntry>(
- Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
- table[i] = value;
- for (size_t m = 1; m < i; ++m) {
- table[i + m] = value ^ table[m];
- }
- }
- }
-#endif
- }
-
- // Default CRC implementation
- Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const {
-#if HAVE_AMD64 || HAVE_I386
- return CrcMultiword(data, bytes, start);
-#else
- // Very few CPUs have multiple ALUs and speculative execution
- // (Itanium is an exception) so sophisticated algorithms will
- // not perform better than good old Sarwate algorithm.
- return CrcByteUnrolled(data, bytes, start);
-#endif // HAVE_AMD64 || HAVE_I386
- }
-
- // Returns base class.
- const GfUtil<Crc> &Base() const { return base_; }
-
- protected:
- // Canonical, byte-by-byte CRC computation.
- Crc CrcByte(const void *data, size_t bytes, const Crc &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- Crc crc = start ^ Base().Canonize();
- for (const uint8 *end = src + bytes; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ Base().Canonize());
- }
-
- // Byte-by-byte CRC with main loop unrolled.
- Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const {
- if (bytes == 0) {
- return start;
- }
-
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- Crc crc = start ^ Base().Canonize();
-
- // Unroll loop 4 times.
- end -= 3;
- for (; src < end; src += 4) {
- PREFETCH(src);
- CRC_BYTE(this, crc, src[0]);
- CRC_BYTE(this, crc, src[1]);
- CRC_BYTE(this, crc, src[2]);
- CRC_BYTE(this, crc, src[3]);
- }
- end += 3;
-
- // Compute CRC of remaining bytes.
- for (; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
-
- return (crc ^ Base().Canonize());
- }
-
- // Canonical, byte-by-byte CRC computation.
- Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- Crc crc0 = start ^ Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc);
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
-
- // Process 4*sizeof(Crc) bytes at a time.
- end -= 4 * sizeof(Crc) - 1;
- for (; src < end; src += 4 * sizeof(Crc)) {
- for (size_t i = 0; i < 4; ++i) {
- crc0 ^= reinterpret_cast<const Crc *>(src)[i];
- if (i == 0) {
- PREFETCH(src);
- }
- for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
- CRC_BYTE(this, crc0, 0);
- }
- }
- }
- end += 4 * sizeof(Crc) - 1;
-
- // Process sizeof(Crc) bytes at a time.
- end -= sizeof(Crc) - 1;
- for (; src < end; src += sizeof(Crc)) {
- crc0 ^= reinterpret_cast<const Crc *>(src)[0];
- for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
- CRC_BYTE(this, crc0, 0);
- }
- }
- end += sizeof(Crc) - 1;
-
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc0, *src);
- }
-
- return (crc0 ^ Base().Canonize());
- }
-
- // Faster, word-by-word CRC.
- Crc CrcWord(const void *data, size_t bytes, const Crc &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- Crc crc0 = start ^ Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
-
- // Process 4 sizeof(Word) bytes at once.
- end -= 4 * sizeof(Word) - 1;
- for (; src < end; src += 4 * sizeof(Word)) {
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
- PREFETCH(src);
- CRC_WORD(this, crc0, buf0);
- buf0 = reinterpret_cast<const Word *>(src)[1];
- CRC_WORD(this, crc0, buf0);
- buf0 = reinterpret_cast<const Word *>(src)[2];
- CRC_WORD(this, crc0, buf0);
- buf0 = reinterpret_cast<const Word *>(src)[3];
- CRC_WORD(this, crc0, buf0);
- }
- end += 4 * sizeof(Word) - 1;
-
- // Process sizeof(Word) bytes at a time.
- end -= sizeof(Word) - 1;
- for (; src < end; src += sizeof(Word)) {
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
- CRC_WORD(this, crc0, buf0);
- }
- end += sizeof(Word) - 1;
-
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc0, *src);
- }
-
- return (crc0 ^ Base().Canonize());
- }
-
-#define REPEAT_FROM_1(macro) \
- macro(1); \
- macro(2); \
- macro(3); \
- macro(4); \
- macro(5); \
- macro(6); \
- macro(7);
-
-#define REPEAT_FROM_0(macro) \
- macro(0); \
- REPEAT_FROM_1(macro)
-
- // Faster, process adjusent blocks in parallel and concatenate CRCs.
- Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const {
- if (kStride < 2 || kStride > 8) {
- // Unsupported configuration;
- // fall back to something sensible.
- return CrcWord(data, bytes, start);
- }
-
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- Crc crc0 = start ^ Base().Canonize();
- enum {
- // Add 16 to avoid false L1 cache collisions.
- kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1),
- };
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
-
- end -= kStride * kStripe - 1;
- if (src < end) {
- Crc x_pow_8kStripe = Base().Xpow8N(kStripe);
- do {
- const uint8 *stripe_end = src + kStripe;
-
-#define INIT_CRC(reg) \
- Crc crc##reg; \
- if (kStride >= reg) { \
- crc##reg = 0; \
- }
- REPEAT_FROM_1(INIT_CRC);
-#undef INIT_CRC
-
- do {
-#define FIRST(reg) \
- Word buf##reg; \
- if (kStride > reg) { \
- buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \
- buf##reg ^= Downcast<Crc, Word>(crc##reg); \
- if (sizeof(crc##reg) > sizeof(buf##reg)) { \
- crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \
- crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
- } else { \
- crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
- } \
- buf##reg >>= 8; \
- }
- REPEAT_FROM_0(FIRST);
-#undef FIRST
-
- for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) {
-#define NEXT(reg) do { \
- if (kStride > reg) { \
- crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \
- buf##reg >>= 8; \
- } \
-} while (0)
- REPEAT_FROM_0(NEXT);
-#undef NEXT
- }
-
-#define LAST(reg) do { \
- if (kStride > reg) { \
- crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \
- } \
-} while (0)
- REPEAT_FROM_0(LAST);
-#undef LAST
-
- src += sizeof(Word);
- } while (src < stripe_end);
-
-#if 0
-// The code is left for illustrational purposes only.
-#define COMBINE(reg) do { \
- if (reg > 0 && kStride > reg) { \
- crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \
- } \
-} while (0)
-#else
-#define COMBINE(reg) do { \
- if (reg > 0 && kStride > reg) { \
- crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \
- } \
-} while (0)
-#endif
- REPEAT_FROM_0(COMBINE);
-#undef COMBINE
-
- src += (kStride - 1) * kStripe;
- }
- while (src < end);
- }
- end += kStride * kStripe - 1;
-
- // Process sizeof(Word) bytes at a time.
- end -= sizeof(Word) - 1;
- for (; src < end; src += sizeof(Word)) {
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
- CRC_WORD(this, crc0, buf0);
- }
- end += sizeof(Word) - 1;
-
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc0, *src);
- }
-
- return (crc0 ^ Base().Canonize());
- }
-
- // Fastest, interleaved multi-byte CRC.
- Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const {
- if (kStride < 2 || kStride > 8) {
- // Unsupported configuration;
- // fall back to something sensible.
- return CrcWord(data, bytes, start);
- }
-
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- Crc crc0 = start ^ Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
- if (src >= end) {
- return (crc0 ^ Base().Canonize());
- }
-
- // Process kStride Word registers at once;
- // should have have at least 2*kInterleaveBytes of data to start.
- end -= 2*kInterleaveBytes - 1;
- if (src < end) {
- Crc crc_carryover;
- if (sizeof(Crc) > sizeof(Word)) {
- // crc_carryover is used if and only if Crc is wider than Word.
- crc_carryover = 0;
- }
-#define INIT_CRC(reg) \
- Crc crc##reg; \
- if (reg > 0 && kStride > reg) { \
- crc##reg = 0; \
- }
- REPEAT_FROM_1(INIT_CRC);
-#undef INIT_CRC
-
-#define INIT_BUF(reg) \
- Word buf##reg; \
- if (kStride > reg) { \
- buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
- }
- REPEAT_FROM_0(INIT_BUF);
-#undef INIT_BUF
-
- do {
- PREFETCH(src);
- src += kInterleaveBytes;
-
- if (sizeof(Crc) > sizeof(Word)) {
- crc0 ^= crc_carryover;
- }
-
-#define FIRST(reg, next_reg) do { \
- if (kStride > reg) { \
- buf##reg ^= Downcast<Crc, Word>(crc##reg); \
- if (sizeof(Crc) > sizeof(Word)) { \
- if (reg < kStride - 1) { \
- crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
- } else { \
- crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
- } \
- } \
- crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \
- buf##reg >>= 8; \
- } \
-} while (0)
- FIRST(0, 1);
- FIRST(1, 2);
- FIRST(2, 3);
- FIRST(3, 4);
- FIRST(4, 5);
- FIRST(5, 6);
- FIRST(6, 7);
- FIRST(7, 0);
-#undef FIRST
-
- for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) {
-#define NEXT(reg) do { \
- if (kStride > reg) { \
- crc##reg ^= \
- TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \
- buf##reg >>= 8; \
- } \
-} while(0)
- REPEAT_FROM_0(NEXT);
-#undef NEXT
- }
-
-#define LAST(reg) do { \
- if (kStride > reg) { \
- crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \
- buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
- } \
-} while(0)
- REPEAT_FROM_0(LAST);
-#undef LAST
- }
- while (src < end);
-
- if (sizeof(Crc) > sizeof(Word)) {
- crc0 ^= crc_carryover;
- }
-
-#define COMBINE(reg) do { \
- if (kStride > reg) { \
- if (reg != 0) { \
- crc0 ^= crc##reg; \
- } \
- CRC_WORD(this, crc0, buf##reg); \
- } \
-} while (0)
- REPEAT_FROM_0(COMBINE);
-#undef COMBINE
-
- src += kInterleaveBytes;
- }
- end += 2*kInterleaveBytes - 1;
-
- // Process sizeof(Word) bytes at once.
- end -= sizeof(Word) - 1;
- for (; src < end; src += sizeof(Word)) {
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
- CRC_WORD(this, crc0, buf0);
- }
- end += sizeof(Word) - 1;
-
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc0, *src);
- }
-
- return (crc0 ^ Base().Canonize());
- }
-
- protected:
- enum {
- kInterleaveBytes = sizeof(Word) * kStride,
- };
-
- // Multiplication tables used by CRCs.
- TableEntry crc_word_interleaved_[sizeof(Word)][256];
- TableEntry crc_word_[sizeof(Word)][256];
-
- // Base class stored after CRC tables so that the most frequently
- // used table is at offset 0 and may be accessed faster.
- GfUtil<Crc> base_;
-
- friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >;
-
- private:
- // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is
- // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on
- // hardware and compiler). However, there are problems with compilers.
- //
- // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory.
- //
- // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads:
- // CL 15.00.307291.1 C++ >1.2< CPU cycles/byte
- // ICL 11.1.051 -O3 C++ 1.5 CPU cycles/byte
- // GCC 4.5 -O3 C++ 2.0 CPU cycles/byte
- // GCC 4.x -O3 ASM >1.2< CPU cycles/byte
- //
- // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads
- // CL 15.00.307291.1 C++ 2.0 CPU cycles/byte
- // GCC 4.5 -O3 C++ 1.9 CPU cycles/byte
- // ICL 11.1.051 -S C++ 1.6 CPU cycles/byte
- // GCC 4.x -O3 ASM >1.3< CPU cycles/byte
- //
- // So, use inline ASM code for GCC for both i386 and amd64.
-
- Crc CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const Crc &start) const;
- Crc CrcMultiwordGccAmd64(
- const void *data, size_t bytes, const Crc &start) const;
- Crc CrcMultiwordGccAmd64Sse2(
- const uint8 *src, const uint8 *end, const Crc &start) const;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#undef REPEAT_FROM_0
-#undef REPEAT_FROM_1
-
-
-// Specialized variants.
-#if CRCUTIL_USE_ASM
-
-#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
-
-// Declare specialized functions.
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data, size_t bytes, const uint64 &start) const;
-
-#if HAVE_AMD64 && HAVE_SSE2
-template<>
-uint128_sse2
-GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
- const void *data, size_t bytes, const uint128_sse2 &start) const;
-#endif // HAVE_AMD64 && HAVE_SSE2
-
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \
- (HAVE_I386 && HAVE_MMX)
-
-// Work around bug in MSC (present at least in v. 15.00.30729.1)
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data,
- size_t bytes,
- const uint64 &start) const;
-template<> __forceinline
-uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data,
- size_t bytes,
- const uint64 &start) const {
- typedef uint64 Word;
- typedef uint64 Crc;
- if (bytes <= 12) {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint64 crc = start ^ Base().Canonize();
- for (const uint8 *end = src + bytes; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ Base().Canonize());
- }
- return CrcMultiwordI386Mmx(data, bytes, start);
-}
-
-#endif // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
-
-#endif // CRCUTIL_USE_ASM
-
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_GENERIC_CRC_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Defines GenericCrc class which implements arbitrary CRCs.
+//
+// Please read crc.pdf to understand how it all works.
+
+#ifndef CRCUTIL_GENERIC_CRC_H_
+#define CRCUTIL_GENERIC_CRC_H_
+
+#include "base_types.h" // uint8
+#include "crc_casts.h" // TO_BYTE(), Downcast<>.
+#include "gf_util.h" // GfUtil<Crc> class.
+#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16)
+#include "uint128_sse2.h" // uint128_sse2 type (if necessary)
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// Extends CRC by one byte.
+// Technically, if degree of a polynomial does not exceed 8,
+// right shift by 8 bits is not required, but who cares about CRC-8?
+#define CRC_BYTE(table, crc, byte) do { \
+ crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \
+ table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \
+} while (0)
+
+#define TABLE_ENTRY(table, byte, buf) \
+ table[byte][Downcast<Word, uint8>(buf)]
+
+#define TABLE_ENTRY_LAST(table, buf) \
+ table[sizeof(Word) - 1][buf]
+
+// Extends CRC by one word.
+#define CRC_WORD(table, crc, buf) do { \
+ buf ^= Downcast<Crc, Word>(crc); \
+ if (sizeof(crc) > sizeof(buf)) { \
+ crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \
+ crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \
+ } else { \
+ crc = TABLE_ENTRY(table->crc_word_, 0, buf); \
+ } \
+ buf >>= 8; \
+ for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \
+ crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \
+ buf >>= 8; \
+ } \
+ crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \
+} while (0)
+
+// Process beginning of data block byte by byte until source pointer
+// becomes perfectly aligned on Word boundary.
+#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \
+ while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \
+ if (src >= end) { \
+ return (crc ^ table->Base().Canonize()); \
+ } \
+ CRC_BYTE(table, crc, *src); \
+ src += 1; \
+ } \
+} while (0)
+
+
+// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks
+// but 6-10% faster on larger blocks (>= 2KB).
+// Break-even point (+-1%) is around 1KB (Q9650, E6600).
+//
+#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \
+do { \
+ if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \
+ ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \
+ } \
+} while (0)
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif // defined(_MSC_VER)
+
+// Forward declarations.
+template<typename CrcImplementation> class RollingCrc;
+
+// Crc is the type used internally and to return values of N-bit CRC.
+// It should be at least as large as "TableEntry" and "Word" but
+// may be larger (e.g. for 16-bit CRC, TableEntry and Word may be
+// set to uint16 but Crc may be set to uint32).
+//
+// TableEntry is the type of values stored in the tables.
+// To implement N-bit CRC, TableEntry should be large enough
+// to store N bits.
+//
+// Word is the type used to read data sizeof(Word) at a time.
+// Ideally, it shoulde be "most suitable for given architecture"
+// integer type -- typically "size_t".
+//
+// kStride is the number of words processed in interleaved manner by
+// CrcMultiword() and CrcWordblock(). Shall be either 3 or 4.
+// Optimal value depends on hardware architecture (AMD64, ARM, etc).
+//
+template<typename _Crc, typename _TableEntry, typename _Word, int kStride>
+ class GenericCrc {
+ public:
+ // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.)
+ typedef _Crc Crc;
+ typedef _TableEntry TableEntry;
+ typedef _Word Word;
+
+ GenericCrc() {}
+
+ // Initializes the tables given generating polynomial of degree.
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
+ // after actual CRC computation.
+ GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) {
+ Init(generating_polynomial, degree, canonical);
+ }
+ void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
+ base_.Init(generating_polynomial, degree, canonical);
+
+ // Instead of computing
+ // table[j][i] = MultiplyUnnormalized(i, 8, k),
+ // for all i = 0...255, we may notice that
+ // if i = 2**n then for all m = 1...(i-1)
+ // MultiplyUnnormalized(i + m, 8, k) =
+ // MultiplyUnnormalized(i ^ m, 8, k) =
+ // MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) =
+ // MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] =
+ // table[i] ^ table[m].
+#if 0
+ for (size_t j = 0; j < sizeof(Word); ++j) {
+ Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
+ for (size_t i = 0; i < 256; ++i) {
+ Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
+ this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp);
+ }
+ }
+#else
+ for (size_t j = 0; j < sizeof(Word); ++j) {
+ Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
+ TableEntry *table = this->crc_word_interleaved_[j];
+ table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
+ for (size_t i = 1; i < 256; i <<= 1) {
+ TableEntry value = Downcast<Crc, TableEntry>(
+ Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
+ table[i] = value;
+ for (size_t m = 1; m < i; ++m) {
+ table[i + m] = value ^ table[m];
+ }
+ }
+ }
+#endif
+
+#if 0
+ for (size_t j = 0; j < sizeof(Word); ++j) {
+ Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
+ for (size_t i = 0; i < 256; ++i) {
+ Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
+ this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp);
+ }
+ }
+#else
+ for (size_t j = 0; j < sizeof(Word); ++j) {
+ Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
+ TableEntry *table = this->crc_word_[j];
+ table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
+ for (size_t i = 1; i < 256; i <<= 1) {
+ TableEntry value = Downcast<Crc, TableEntry>(
+ Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
+ table[i] = value;
+ for (size_t m = 1; m < i; ++m) {
+ table[i + m] = value ^ table[m];
+ }
+ }
+ }
+#endif
+ }
+
+ // Default CRC implementation
+ Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const {
+#if HAVE_AMD64 || HAVE_I386
+ return CrcMultiword(data, bytes, start);
+#else
+ // Very few CPUs have multiple ALUs and speculative execution
+ // (Itanium is an exception) so sophisticated algorithms will
+ // not perform better than good old Sarwate algorithm.
+ return CrcByteUnrolled(data, bytes, start);
+#endif // HAVE_AMD64 || HAVE_I386
+ }
+
+ // Returns base class.
+ const GfUtil<Crc> &Base() const { return base_; }
+
+ protected:
+ // Canonical, byte-by-byte CRC computation.
+ Crc CrcByte(const void *data, size_t bytes, const Crc &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ Crc crc = start ^ Base().Canonize();
+ for (const uint8 *end = src + bytes; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ Base().Canonize());
+ }
+
+ // Byte-by-byte CRC with main loop unrolled.
+ Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const {
+ if (bytes == 0) {
+ return start;
+ }
+
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ Crc crc = start ^ Base().Canonize();
+
+ // Unroll loop 4 times.
+ end -= 3;
+ for (; src < end; src += 4) {
+ PREFETCH(src);
+ CRC_BYTE(this, crc, src[0]);
+ CRC_BYTE(this, crc, src[1]);
+ CRC_BYTE(this, crc, src[2]);
+ CRC_BYTE(this, crc, src[3]);
+ }
+ end += 3;
+
+ // Compute CRC of remaining bytes.
+ for (; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+
+ return (crc ^ Base().Canonize());
+ }
+
+ // Canonical, byte-by-byte CRC computation.
+ Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ Crc crc0 = start ^ Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc);
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Process 4*sizeof(Crc) bytes at a time.
+ end -= 4 * sizeof(Crc) - 1;
+ for (; src < end; src += 4 * sizeof(Crc)) {
+ for (size_t i = 0; i < 4; ++i) {
+ crc0 ^= reinterpret_cast<const Crc *>(src)[i];
+ if (i == 0) {
+ PREFETCH(src);
+ }
+ for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
+ CRC_BYTE(this, crc0, 0);
+ }
+ }
+ }
+ end += 4 * sizeof(Crc) - 1;
+
+ // Process sizeof(Crc) bytes at a time.
+ end -= sizeof(Crc) - 1;
+ for (; src < end; src += sizeof(Crc)) {
+ crc0 ^= reinterpret_cast<const Crc *>(src)[0];
+ for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
+ CRC_BYTE(this, crc0, 0);
+ }
+ }
+ end += sizeof(Crc) - 1;
+
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc0, *src);
+ }
+
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Faster, word-by-word CRC.
+ Crc CrcWord(const void *data, size_t bytes, const Crc &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ Crc crc0 = start ^ Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Process 4 sizeof(Word) bytes at once.
+ end -= 4 * sizeof(Word) - 1;
+ for (; src < end; src += 4 * sizeof(Word)) {
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
+ PREFETCH(src);
+ CRC_WORD(this, crc0, buf0);
+ buf0 = reinterpret_cast<const Word *>(src)[1];
+ CRC_WORD(this, crc0, buf0);
+ buf0 = reinterpret_cast<const Word *>(src)[2];
+ CRC_WORD(this, crc0, buf0);
+ buf0 = reinterpret_cast<const Word *>(src)[3];
+ CRC_WORD(this, crc0, buf0);
+ }
+ end += 4 * sizeof(Word) - 1;
+
+ // Process sizeof(Word) bytes at a time.
+ end -= sizeof(Word) - 1;
+ for (; src < end; src += sizeof(Word)) {
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
+ CRC_WORD(this, crc0, buf0);
+ }
+ end += sizeof(Word) - 1;
+
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc0, *src);
+ }
+
+ return (crc0 ^ Base().Canonize());
+ }
+
+#define REPEAT_FROM_1(macro) \
+ macro(1); \
+ macro(2); \
+ macro(3); \
+ macro(4); \
+ macro(5); \
+ macro(6); \
+ macro(7);
+
+#define REPEAT_FROM_0(macro) \
+ macro(0); \
+ REPEAT_FROM_1(macro)
+
+ // Faster, process adjusent blocks in parallel and concatenate CRCs.
+ Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const {
+ if (kStride < 2 || kStride > 8) {
+ // Unsupported configuration;
+ // fall back to something sensible.
+ return CrcWord(data, bytes, start);
+ }
+
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ Crc crc0 = start ^ Base().Canonize();
+ enum {
+ // Add 16 to avoid false L1 cache collisions.
+ kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1),
+ };
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+
+ end -= kStride * kStripe - 1;
+ if (src < end) {
+ Crc x_pow_8kStripe = Base().Xpow8N(kStripe);
+ do {
+ const uint8 *stripe_end = src + kStripe;
+
+#define INIT_CRC(reg) \
+ Crc crc##reg; \
+ if (kStride >= reg) { \
+ crc##reg = 0; \
+ }
+ REPEAT_FROM_1(INIT_CRC);
+#undef INIT_CRC
+
+ do {
+#define FIRST(reg) \
+ Word buf##reg; \
+ if (kStride > reg) { \
+ buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \
+ buf##reg ^= Downcast<Crc, Word>(crc##reg); \
+ if (sizeof(crc##reg) > sizeof(buf##reg)) { \
+ crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \
+ crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
+ } else { \
+ crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
+ } \
+ buf##reg >>= 8; \
+ }
+ REPEAT_FROM_0(FIRST);
+#undef FIRST
+
+ for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) {
+#define NEXT(reg) do { \
+ if (kStride > reg) { \
+ crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \
+ buf##reg >>= 8; \
+ } \
+} while (0)
+ REPEAT_FROM_0(NEXT);
+#undef NEXT
+ }
+
+#define LAST(reg) do { \
+ if (kStride > reg) { \
+ crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \
+ } \
+} while (0)
+ REPEAT_FROM_0(LAST);
+#undef LAST
+
+ src += sizeof(Word);
+ } while (src < stripe_end);
+
+#if 0
+// The code is left for illustrational purposes only.
+#define COMBINE(reg) do { \
+ if (reg > 0 && kStride > reg) { \
+ crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \
+ } \
+} while (0)
+#else
+#define COMBINE(reg) do { \
+ if (reg > 0 && kStride > reg) { \
+ crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \
+ } \
+} while (0)
+#endif
+ REPEAT_FROM_0(COMBINE);
+#undef COMBINE
+
+ src += (kStride - 1) * kStripe;
+ }
+ while (src < end);
+ }
+ end += kStride * kStripe - 1;
+
+ // Process sizeof(Word) bytes at a time.
+ end -= sizeof(Word) - 1;
+ for (; src < end; src += sizeof(Word)) {
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
+ CRC_WORD(this, crc0, buf0);
+ }
+ end += sizeof(Word) - 1;
+
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc0, *src);
+ }
+
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Fastest, interleaved multi-byte CRC.
+ Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const {
+ if (kStride < 2 || kStride > 8) {
+ // Unsupported configuration;
+ // fall back to something sensible.
+ return CrcWord(data, bytes, start);
+ }
+
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ Crc crc0 = start ^ Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
+ if (src >= end) {
+ return (crc0 ^ Base().Canonize());
+ }
+
+ // Process kStride Word registers at once;
+ // should have have at least 2*kInterleaveBytes of data to start.
+ end -= 2*kInterleaveBytes - 1;
+ if (src < end) {
+ Crc crc_carryover;
+ if (sizeof(Crc) > sizeof(Word)) {
+ // crc_carryover is used if and only if Crc is wider than Word.
+ crc_carryover = 0;
+ }
+#define INIT_CRC(reg) \
+ Crc crc##reg; \
+ if (reg > 0 && kStride > reg) { \
+ crc##reg = 0; \
+ }
+ REPEAT_FROM_1(INIT_CRC);
+#undef INIT_CRC
+
+#define INIT_BUF(reg) \
+ Word buf##reg; \
+ if (kStride > reg) { \
+ buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
+ }
+ REPEAT_FROM_0(INIT_BUF);
+#undef INIT_BUF
+
+ do {
+ PREFETCH(src);
+ src += kInterleaveBytes;
+
+ if (sizeof(Crc) > sizeof(Word)) {
+ crc0 ^= crc_carryover;
+ }
+
+#define FIRST(reg, next_reg) do { \
+ if (kStride > reg) { \
+ buf##reg ^= Downcast<Crc, Word>(crc##reg); \
+ if (sizeof(Crc) > sizeof(Word)) { \
+ if (reg < kStride - 1) { \
+ crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
+ } else { \
+ crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
+ } \
+ } \
+ crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \
+ buf##reg >>= 8; \
+ } \
+} while (0)
+ FIRST(0, 1);
+ FIRST(1, 2);
+ FIRST(2, 3);
+ FIRST(3, 4);
+ FIRST(4, 5);
+ FIRST(5, 6);
+ FIRST(6, 7);
+ FIRST(7, 0);
+#undef FIRST
+
+ for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) {
+#define NEXT(reg) do { \
+ if (kStride > reg) { \
+ crc##reg ^= \
+ TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \
+ buf##reg >>= 8; \
+ } \
+} while(0)
+ REPEAT_FROM_0(NEXT);
+#undef NEXT
+ }
+
+#define LAST(reg) do { \
+ if (kStride > reg) { \
+ crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \
+ buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
+ } \
+} while(0)
+ REPEAT_FROM_0(LAST);
+#undef LAST
+ }
+ while (src < end);
+
+ if (sizeof(Crc) > sizeof(Word)) {
+ crc0 ^= crc_carryover;
+ }
+
+#define COMBINE(reg) do { \
+ if (kStride > reg) { \
+ if (reg != 0) { \
+ crc0 ^= crc##reg; \
+ } \
+ CRC_WORD(this, crc0, buf##reg); \
+ } \
+} while (0)
+ REPEAT_FROM_0(COMBINE);
+#undef COMBINE
+
+ src += kInterleaveBytes;
+ }
+ end += 2*kInterleaveBytes - 1;
+
+ // Process sizeof(Word) bytes at once.
+ end -= sizeof(Word) - 1;
+ for (; src < end; src += sizeof(Word)) {
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
+ CRC_WORD(this, crc0, buf0);
+ }
+ end += sizeof(Word) - 1;
+
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc0, *src);
+ }
+
+ return (crc0 ^ Base().Canonize());
+ }
+
+ protected:
+ enum {
+ kInterleaveBytes = sizeof(Word) * kStride,
+ };
+
+ // Multiplication tables used by CRCs.
+ TableEntry crc_word_interleaved_[sizeof(Word)][256];
+ TableEntry crc_word_[sizeof(Word)][256];
+
+ // Base class stored after CRC tables so that the most frequently
+ // used table is at offset 0 and may be accessed faster.
+ GfUtil<Crc> base_;
+
+ friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >;
+
+ private:
+ // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is
+ // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on
+ // hardware and compiler). However, there are problems with compilers.
+ //
+ // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory.
+ //
+ // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads:
+ // CL 15.00.307291.1 C++ >1.2< CPU cycles/byte
+ // ICL 11.1.051 -O3 C++ 1.5 CPU cycles/byte
+ // GCC 4.5 -O3 C++ 2.0 CPU cycles/byte
+ // GCC 4.x -O3 ASM >1.2< CPU cycles/byte
+ //
+ // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads
+ // CL 15.00.307291.1 C++ 2.0 CPU cycles/byte
+ // GCC 4.5 -O3 C++ 1.9 CPU cycles/byte
+ // ICL 11.1.051 -S C++ 1.6 CPU cycles/byte
+ // GCC 4.x -O3 ASM >1.3< CPU cycles/byte
+ //
+ // So, use inline ASM code for GCC for both i386 and amd64.
+
+ Crc CrcMultiwordI386Mmx(
+ const void *data, size_t bytes, const Crc &start) const;
+ Crc CrcMultiwordGccAmd64(
+ const void *data, size_t bytes, const Crc &start) const;
+ Crc CrcMultiwordGccAmd64Sse2(
+ const uint8 *src, const uint8 *end, const Crc &start) const;
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#undef REPEAT_FROM_0
+#undef REPEAT_FROM_1
+
+
+// Specialized variants.
+#if CRCUTIL_USE_ASM
+
+#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
+
+// Declare specialized functions.
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
+ const void *data, size_t bytes, const uint64 &start) const;
+
+#if HAVE_AMD64 && HAVE_SSE2
+template<>
+uint128_sse2
+GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
+ const void *data, size_t bytes, const uint128_sse2 &start) const;
+#endif // HAVE_AMD64 && HAVE_SSE2
+
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \
+ (HAVE_I386 && HAVE_MMX)
+
+// Work around bug in MSC (present at least in v. 15.00.30729.1)
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data,
+ size_t bytes,
+ const uint64 &start) const;
+template<> __forceinline
+uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
+ const void *data,
+ size_t bytes,
+ const uint64 &start) const {
+ typedef uint64 Word;
+ typedef uint64 Crc;
+ if (bytes <= 12) {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ uint64 crc = start ^ Base().Canonize();
+ for (const uint8 *end = src + bytes; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ Base().Canonize());
+ }
+ return CrcMultiwordI386Mmx(data, bytes, start);
+}
+
+#endif // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
+
+#endif // CRCUTIL_USE_ASM
+
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_GENERIC_CRC_H_
diff --git a/contrib/libs/crcutil/gf_util.h b/contrib/libs/crcutil/gf_util.h
index 43c1d6b216..2a98dfd9e4 100644
--- a/contrib/libs/crcutil/gf_util.h
+++ b/contrib/libs/crcutil/gf_util.h
@@ -1,304 +1,304 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines GfUtil template class which implements
-// 1. some useful operations in GF(2^n),
-// 2. CRC helper function (e.g. concatenation of CRCs) which are
-// not affected by specific implemenation of CRC computation per se.
-//
-// Please read crc.pdf to understand how it all works.
-
-#ifndef CRCUTIL_GF_UTIL_H_
-#define CRCUTIL_GF_UTIL_H_
-
-#include "base_types.h" // uint8, uint64
-#include "crc_casts.h" // TO_BYTE()
-#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16), SHIFT_*_SAFE
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// "Crc" is the type used internally and to return values of N-bit CRC.
-template<typename Crc> class GfUtil {
- public:
- // Initializes the tables given generating polynomial of degree (degree).
- // If "canonical" is true, starting CRC value and computed CRC value will be
- // XOR-ed with 111...111.
- GfUtil() {}
- GfUtil(const Crc &generating_polynomial, size_t degree, bool canonical) {
- Init(generating_polynomial, degree, canonical);
- }
- void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
- Crc one = 1;
- one <<= degree - 1;
- this->generating_polynomial_ = generating_polynomial;
- this->crc_bytes_ = (degree + 7) >> 3;
- this->degree_ = degree;
- this->one_ = one;
- if (canonical) {
- this->canonize_ = one | (one - 1);
- } else {
- this->canonize_ = 0;
- }
- this->normalize_[0] = 0;
- this->normalize_[1] = generating_polynomial;
-
- Crc k = one >> 1;
- for (size_t i = 0; i < sizeof(uint64) * 8; ++i) {
- this->x_pow_2n_[i] = k;
- k = Multiply(k, k);
- }
-
- this->crc_of_crc_ = Multiply(this->canonize_,
- this->one_ ^ Xpow8N((degree + 7) >> 3));
-
- FindLCD(Xpow8N(this->crc_bytes_), &this->x_pow_minus_W_);
- }
-
- // Returns generating polynomial.
- Crc GeneratingPolynomial() const {
- return this->generating_polynomial_;
- }
-
- // Returns number of bits in CRC (degree of generating polynomial).
- size_t Degree() const {
- return this->degree_;
- }
-
- // Returns start/finish adjustment constant.
- Crc Canonize() const {
- return this->canonize_;
- }
-
- // Returns normalized value of 1.
- Crc One() const {
- return this->one_;
- }
-
- // Returns value of CRC(A, |A|, start_new) given known
- // crc=CRC(A, |A|, start_old) -- without touching the data.
- Crc ChangeStartValue(const Crc &crc, uint64 bytes,
- const Crc &start_old,
- const Crc &start_new) const {
- return (crc ^ Multiply(start_new ^ start_old, Xpow8N(bytes)));
- }
-
- // Returns CRC of concatenation of blocks A and B when CRCs
- // of blocks A and B are known -- without touching the data.
- //
- // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0),
- // returns CRC(AB, |AB|, startA).
- Crc Concatenate(const Crc &crc_A, const Crc &crc_B, uint64 bytes_B) const {
- return ChangeStartValue(crc_B, bytes_B, 0 /* start_B */, crc_A);
- }
-
- // Returns CRC of sequence of zeroes -- without touching the data.
- Crc CrcOfZeroes(uint64 bytes, const Crc &start) const {
- Crc tmp = Multiply(start ^ this->canonize_, Xpow8N(bytes));
- return (tmp ^ this->canonize_);
- }
-
- // Given CRC of a message, stores extra (degree + 7)/8 bytes after
- // the message so that CRC(message+extra, start) = result.
- // Does not change CRC start value (use ChangeStartValue for that).
- // Returns number of stored bytes.
- size_t StoreComplementaryCrc(void *dst,
- const Crc &message_crc,
- const Crc &result) const {
- Crc crc0 = Multiply(result ^ this->canonize_, this->x_pow_minus_W_);
- crc0 ^= message_crc ^ this->canonize_;
- uint8 *d = reinterpret_cast<uint8 *>(dst);
- for (size_t i = 0; i < this->crc_bytes_; ++i) {
- d[i] = TO_BYTE(crc0);
- crc0 >>= 8;
- }
- return this->crc_bytes_;
- }
-
- // Stores given CRC of a message as (degree + 7)/8 bytes filled
- // with 0s to the right. Returns number of stored bytes.
- // CRC of the message and stored CRC is a constant value returned
- // by CrcOfCrc() -- it does not depend on contents of the message.
- size_t StoreCrc(void *dst, const Crc &crc) const {
- uint8 *d = reinterpret_cast<uint8 *>(dst);
- Crc crc0 = crc;
- for (size_t i = 0; i < this->crc_bytes_; ++i) {
- d[i] = TO_BYTE(crc0);
- crc0 >>= 8;
- }
- return this->crc_bytes_;
- }
-
- // Returns expected CRC value of CRC(Message,CRC(Message))
- // when CRC is stored after the message. This value is fixed
- // and does not depend on the message or CRC start value.
- Crc CrcOfCrc() const {
- return this->crc_of_crc_;
- }
-
- // Returns ((a * b) mod P) where "a" and "b" are of degree <= (D-1).
- Crc Multiply(const Crc &aa, const Crc &bb) const {
- Crc a = aa;
- Crc b = bb;
- if ((a ^ (a - 1)) < (b ^ (b - 1))) {
- Crc temp = a;
- a = b;
- b = temp;
- }
-
- if (a == 0) {
- return a;
- }
-
- Crc product = 0;
- Crc one = this->one_;
- for (; a != 0; a <<= 1) {
- if ((a & one) != 0) {
- product ^= b;
- a ^= one;
- }
- b = (b >> 1) ^ this->normalize_[Downcast<Crc, size_t>(b & 1)];
- }
-
- return product;
- }
-
- // Returns ((unnorm * m) mod P) where degree of m is <= (D-1)
- // and degree of value "unnorm" is provided explicitly.
- Crc MultiplyUnnormalized(const Crc &unnorm, size_t degree,
- const Crc &m) const {
- Crc v = unnorm;
- Crc result = 0;
- while (degree > this->degree_) {
- degree -= this->degree_;
- Crc value = v & (this->one_ | (this->one_ - 1));
- result ^= Multiply(value, Multiply(m, XpowN(degree)));
- v >>= this->degree_;
- }
- result ^= Multiply(v << (this->degree_ - degree), m);
- return result;
- }
-
- // returns ((x ** n) mod P).
- Crc XpowN(uint64 n) const {
- Crc one = this->one_;
- Crc result = one;
-
- for (size_t i = 0; n != 0; ++i, n >>= 1) {
- if (n & 1) {
- result = Multiply(result, this->x_pow_2n_[i]);
- }
- }
-
- return result;
- }
-
- // Returns (x ** (8 * n) mod P).
- Crc Xpow8N(uint64 n) const {
- return XpowN(n << 3);
- }
-
- // Returns remainder (A mod B) and sets *q = (A/B) of division
- // of two polynomials:
- // A = dividend + dividend_x_pow_D_coef * x**degree,
- // B = divisor.
- Crc Divide(const Crc &dividend0, int dividend_x_pow_D_coef,
- const Crc &divisor0, Crc *q) const {
- Crc divisor = divisor0;
- Crc dividend = dividend0;
- Crc quotient = 0;
- Crc coef = this->one_;
-
- while ((divisor & 1) == 0) {
- divisor >>= 1;
- coef >>= 1;
- }
-
- if (dividend_x_pow_D_coef) {
- quotient = coef >> 1;
- dividend ^= divisor >> 1;
- }
-
- Crc x_pow_degree_b = 1;
- for (;;) {
- if ((dividend & x_pow_degree_b) != 0) {
- dividend ^= divisor;
- quotient ^= coef;
- }
- if (coef == this->one_) {
- break;
- }
- coef <<= 1;
- x_pow_degree_b <<= 1;
- divisor <<= 1;
- }
-
- *q = quotient;
- return dividend;
- }
-
- // Extended Euclid's algorith -- for given A finds LCD(A, P) and
- // value B such that (A * B) mod P = LCD(A, P).
- Crc FindLCD(const Crc &A, Crc *B) const {
- if (A == 0 || A == this->one_) {
- *B = A;
- return A;
- }
-
- // Actually, generating polynomial is
- // (generating_polynomial_ + x**degree).
- int r0_x_pow_D_coef = 1;
- Crc r0 = this->generating_polynomial_;
- Crc b0 = 0;
- Crc r1 = A;
- Crc b1 = this->one_;
-
- for (;;) {
- Crc q;
- Crc r = Divide(r0, r0_x_pow_D_coef, r1, &q);
- if (r == 0) {
- break;
- }
- r0_x_pow_D_coef = 0;
-
- r0 = r1;
- r1 = r;
-
- Crc b = b0 ^ Multiply(q, b1);
- b0 = b1;
- b1 = b;
- }
-
- *B = b1;
- return r1;
- }
-
- protected:
- Crc canonize_;
- Crc x_pow_2n_[sizeof(uint64) * 8];
- Crc generating_polynomial_;
- Crc one_;
- Crc x_pow_minus_W_;
- Crc crc_of_crc_;
- Crc normalize_[2];
- size_t crc_bytes_;
- size_t degree_;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_GF_UTIL_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Defines GfUtil template class which implements
+// 1. some useful operations in GF(2^n),
+// 2. CRC helper function (e.g. concatenation of CRCs) which are
+// not affected by specific implemenation of CRC computation per se.
+//
+// Please read crc.pdf to understand how it all works.
+
+#ifndef CRCUTIL_GF_UTIL_H_
+#define CRCUTIL_GF_UTIL_H_
+
+#include "base_types.h" // uint8, uint64
+#include "crc_casts.h" // TO_BYTE()
+#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16), SHIFT_*_SAFE
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// "Crc" is the type used internally and to return values of N-bit CRC.
+template<typename Crc> class GfUtil {
+ public:
+ // Initializes the tables given generating polynomial of degree (degree).
+ // If "canonical" is true, starting CRC value and computed CRC value will be
+ // XOR-ed with 111...111.
+ GfUtil() {}
+ GfUtil(const Crc &generating_polynomial, size_t degree, bool canonical) {
+ Init(generating_polynomial, degree, canonical);
+ }
+ void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
+ Crc one = 1;
+ one <<= degree - 1;
+ this->generating_polynomial_ = generating_polynomial;
+ this->crc_bytes_ = (degree + 7) >> 3;
+ this->degree_ = degree;
+ this->one_ = one;
+ if (canonical) {
+ this->canonize_ = one | (one - 1);
+ } else {
+ this->canonize_ = 0;
+ }
+ this->normalize_[0] = 0;
+ this->normalize_[1] = generating_polynomial;
+
+ Crc k = one >> 1;
+ for (size_t i = 0; i < sizeof(uint64) * 8; ++i) {
+ this->x_pow_2n_[i] = k;
+ k = Multiply(k, k);
+ }
+
+ this->crc_of_crc_ = Multiply(this->canonize_,
+ this->one_ ^ Xpow8N((degree + 7) >> 3));
+
+ FindLCD(Xpow8N(this->crc_bytes_), &this->x_pow_minus_W_);
+ }
+
+ // Returns generating polynomial.
+ Crc GeneratingPolynomial() const {
+ return this->generating_polynomial_;
+ }
+
+ // Returns number of bits in CRC (degree of generating polynomial).
+ size_t Degree() const {
+ return this->degree_;
+ }
+
+ // Returns start/finish adjustment constant.
+ Crc Canonize() const {
+ return this->canonize_;
+ }
+
+ // Returns normalized value of 1.
+ Crc One() const {
+ return this->one_;
+ }
+
+ // Returns value of CRC(A, |A|, start_new) given known
+ // crc=CRC(A, |A|, start_old) -- without touching the data.
+ Crc ChangeStartValue(const Crc &crc, uint64 bytes,
+ const Crc &start_old,
+ const Crc &start_new) const {
+ return (crc ^ Multiply(start_new ^ start_old, Xpow8N(bytes)));
+ }
+
+ // Returns CRC of concatenation of blocks A and B when CRCs
+ // of blocks A and B are known -- without touching the data.
+ //
+ // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0),
+ // returns CRC(AB, |AB|, startA).
+ Crc Concatenate(const Crc &crc_A, const Crc &crc_B, uint64 bytes_B) const {
+ return ChangeStartValue(crc_B, bytes_B, 0 /* start_B */, crc_A);
+ }
+
+ // Returns CRC of sequence of zeroes -- without touching the data.
+ Crc CrcOfZeroes(uint64 bytes, const Crc &start) const {
+ Crc tmp = Multiply(start ^ this->canonize_, Xpow8N(bytes));
+ return (tmp ^ this->canonize_);
+ }
+
+ // Given CRC of a message, stores extra (degree + 7)/8 bytes after
+ // the message so that CRC(message+extra, start) = result.
+ // Does not change CRC start value (use ChangeStartValue for that).
+ // Returns number of stored bytes.
+ size_t StoreComplementaryCrc(void *dst,
+ const Crc &message_crc,
+ const Crc &result) const {
+ Crc crc0 = Multiply(result ^ this->canonize_, this->x_pow_minus_W_);
+ crc0 ^= message_crc ^ this->canonize_;
+ uint8 *d = reinterpret_cast<uint8 *>(dst);
+ for (size_t i = 0; i < this->crc_bytes_; ++i) {
+ d[i] = TO_BYTE(crc0);
+ crc0 >>= 8;
+ }
+ return this->crc_bytes_;
+ }
+
+ // Stores given CRC of a message as (degree + 7)/8 bytes filled
+ // with 0s to the right. Returns number of stored bytes.
+ // CRC of the message and stored CRC is a constant value returned
+ // by CrcOfCrc() -- it does not depend on contents of the message.
+ size_t StoreCrc(void *dst, const Crc &crc) const {
+ uint8 *d = reinterpret_cast<uint8 *>(dst);
+ Crc crc0 = crc;
+ for (size_t i = 0; i < this->crc_bytes_; ++i) {
+ d[i] = TO_BYTE(crc0);
+ crc0 >>= 8;
+ }
+ return this->crc_bytes_;
+ }
+
+ // Returns expected CRC value of CRC(Message,CRC(Message))
+ // when CRC is stored after the message. This value is fixed
+ // and does not depend on the message or CRC start value.
+ Crc CrcOfCrc() const {
+ return this->crc_of_crc_;
+ }
+
+ // Returns ((a * b) mod P) where "a" and "b" are of degree <= (D-1).
+ Crc Multiply(const Crc &aa, const Crc &bb) const {
+ Crc a = aa;
+ Crc b = bb;
+ if ((a ^ (a - 1)) < (b ^ (b - 1))) {
+ Crc temp = a;
+ a = b;
+ b = temp;
+ }
+
+ if (a == 0) {
+ return a;
+ }
+
+ Crc product = 0;
+ Crc one = this->one_;
+ for (; a != 0; a <<= 1) {
+ if ((a & one) != 0) {
+ product ^= b;
+ a ^= one;
+ }
+ b = (b >> 1) ^ this->normalize_[Downcast<Crc, size_t>(b & 1)];
+ }
+
+ return product;
+ }
+
+ // Returns ((unnorm * m) mod P) where degree of m is <= (D-1)
+ // and degree of value "unnorm" is provided explicitly.
+ Crc MultiplyUnnormalized(const Crc &unnorm, size_t degree,
+ const Crc &m) const {
+ Crc v = unnorm;
+ Crc result = 0;
+ while (degree > this->degree_) {
+ degree -= this->degree_;
+ Crc value = v & (this->one_ | (this->one_ - 1));
+ result ^= Multiply(value, Multiply(m, XpowN(degree)));
+ v >>= this->degree_;
+ }
+ result ^= Multiply(v << (this->degree_ - degree), m);
+ return result;
+ }
+
+ // returns ((x ** n) mod P).
+ Crc XpowN(uint64 n) const {
+ Crc one = this->one_;
+ Crc result = one;
+
+ for (size_t i = 0; n != 0; ++i, n >>= 1) {
+ if (n & 1) {
+ result = Multiply(result, this->x_pow_2n_[i]);
+ }
+ }
+
+ return result;
+ }
+
+ // Returns (x ** (8 * n) mod P).
+ Crc Xpow8N(uint64 n) const {
+ return XpowN(n << 3);
+ }
+
+ // Returns remainder (A mod B) and sets *q = (A/B) of division
+ // of two polynomials:
+ // A = dividend + dividend_x_pow_D_coef * x**degree,
+ // B = divisor.
+ Crc Divide(const Crc &dividend0, int dividend_x_pow_D_coef,
+ const Crc &divisor0, Crc *q) const {
+ Crc divisor = divisor0;
+ Crc dividend = dividend0;
+ Crc quotient = 0;
+ Crc coef = this->one_;
+
+ while ((divisor & 1) == 0) {
+ divisor >>= 1;
+ coef >>= 1;
+ }
+
+ if (dividend_x_pow_D_coef) {
+ quotient = coef >> 1;
+ dividend ^= divisor >> 1;
+ }
+
+ Crc x_pow_degree_b = 1;
+ for (;;) {
+ if ((dividend & x_pow_degree_b) != 0) {
+ dividend ^= divisor;
+ quotient ^= coef;
+ }
+ if (coef == this->one_) {
+ break;
+ }
+ coef <<= 1;
+ x_pow_degree_b <<= 1;
+ divisor <<= 1;
+ }
+
+ *q = quotient;
+ return dividend;
+ }
+
+ // Extended Euclid's algorith -- for given A finds LCD(A, P) and
+ // value B such that (A * B) mod P = LCD(A, P).
+ Crc FindLCD(const Crc &A, Crc *B) const {
+ if (A == 0 || A == this->one_) {
+ *B = A;
+ return A;
+ }
+
+ // Actually, generating polynomial is
+ // (generating_polynomial_ + x**degree).
+ int r0_x_pow_D_coef = 1;
+ Crc r0 = this->generating_polynomial_;
+ Crc b0 = 0;
+ Crc r1 = A;
+ Crc b1 = this->one_;
+
+ for (;;) {
+ Crc q;
+ Crc r = Divide(r0, r0_x_pow_D_coef, r1, &q);
+ if (r == 0) {
+ break;
+ }
+ r0_x_pow_D_coef = 0;
+
+ r0 = r1;
+ r1 = r;
+
+ Crc b = b0 ^ Multiply(q, b1);
+ b0 = b1;
+ b1 = b;
+ }
+
+ *B = b1;
+ return r1;
+ }
+
+ protected:
+ Crc canonize_;
+ Crc x_pow_2n_[sizeof(uint64) * 8];
+ Crc generating_polynomial_;
+ Crc one_;
+ Crc x_pow_minus_W_;
+ Crc crc_of_crc_;
+ Crc normalize_[2];
+ size_t crc_bytes_;
+ size_t degree_;
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_GF_UTIL_H_
diff --git a/contrib/libs/crcutil/interface.cc b/contrib/libs/crcutil/interface.cc
index dfefebba06..cecc0e3c95 100644
--- a/contrib/libs/crcutil/interface.cc
+++ b/contrib/libs/crcutil/interface.cc
@@ -1,307 +1,307 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This is the only file where all details of CRC implementation are buried.
-
-#include "interface.h"
-
-#include "aligned_alloc.h"
-#include "crc32c_sse4.h"
-#include "generic_crc.h"
-#include "protected_crc.h"
-#include "rolling_crc.h"
-
-// Align all CRC tables on kAlign boundary.
-// Shall be exact power of 2.
-static size_t kAlign = 4 * 1024;
-
-using namespace crcutil;
-
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is the only file where all details of CRC implementation are buried.
+
+#include "interface.h"
+
+#include "aligned_alloc.h"
+#include "crc32c_sse4.h"
+#include "generic_crc.h"
+#include "protected_crc.h"
+#include "rolling_crc.h"
+
+// Align all CRC tables on kAlign boundary.
+// Shall be exact power of 2.
+static size_t kAlign = 4 * 1024;
+
+using namespace crcutil;
+
#if (!defined(__clang__) && defined(__GNUC__))
-// Suppress 'invalid access to non-static data member ... of NULL object'
-#undef offsetof
-#define offsetof(TYPE, MEMBER) (reinterpret_cast <size_t> \
- ((&reinterpret_cast <const char &>( \
- reinterpret_cast <const TYPE *>(1)->MEMBER))) - 1)
-#endif // defined(__GNUC__)
-
-namespace crcutil_interface {
-
-template<typename CrcImplementation, typename RollingCrcImplementation>
- class Implementation : public CRC {
- public:
- typedef typename CrcImplementation::Crc Crc;
- typedef Implementation<CrcImplementation, RollingCrcImplementation> Self;
-
- Implementation(const Crc &poly,
- size_t degree,
- bool canonical,
- const Crc &roll_start_value,
- size_t roll_length)
- : crc_(poly, degree, canonical),
- rolling_crc_(crc_, roll_length, roll_start_value) {
- }
-
- static Self *Create(const Crc &poly,
- size_t degree,
- bool canonical,
- const Crc &roll_start_value,
- size_t roll_length,
- const void **allocated_memory) {
- void *memory = AlignedAlloc(sizeof(Self),
- offsetof(Self, crc_),
- kAlign,
- allocated_memory);
- return new(memory) Self(poly,
- degree,
- canonical,
- roll_start_value,
- roll_length);
- }
-
- virtual void Delete() {
- AlignedFree(this);
- }
-
- void *operator new(size_t, void *p) {
- return p;
- }
-
- virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.Base().GeneratingPolynomial(), lo, hi);
- }
-
- virtual size_t Degree() const {
- return crc_.Base().Degree();
- }
-
- virtual void CanonizeValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.Base().Canonize(), lo, hi);
- }
-
- virtual void RollStartValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const {
- SetValue(rolling_crc_.StartValue(), lo, hi);
- }
-
- virtual size_t RollWindowBytes() const {
- return rolling_crc_.WindowBytes();
- }
-
- virtual void SelfCheckValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const {
- Crc crc = crc_.CrcDefault(&crc_, sizeof(crc_), 0);
- crc = crc_.CrcDefault(&rolling_crc_, sizeof(rolling_crc_), crc);
- SetValue(crc, lo, hi);
- }
-
- virtual void Compute(const void *data,
- size_t bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.CrcDefault(data, bytes, GetValue(lo, hi)), lo, hi);
- }
-
- virtual void RollStart(const void *data,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const {
- SetValue(rolling_crc_.Start(data), lo, hi);
- }
-
- virtual void Roll(size_t byte_out,
- size_t byte_in,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const {
- SetValue(rolling_crc_.Roll(GetValue(lo, hi), byte_out, byte_in), lo, hi);
- }
-
- virtual void CrcOfZeroes(UINT64 bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.Base().CrcOfZeroes(bytes, GetValue(lo, hi)), lo, hi);
- }
-
- virtual void ChangeStartValue(
- UINT64 start_old_lo, UINT64 start_old_hi,
- UINT64 start_new_lo, UINT64 start_new_hi,
- UINT64 bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.Base().ChangeStartValue(
- GetValue(lo, hi),
- bytes,
- GetValue(start_old_lo, start_old_hi),
- GetValue(start_new_lo, start_new_hi)),
- lo,
- hi);
- }
-
- virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi,
- UINT64 bytes_B,
- /* INOUT */ UINT64* crcA_lo,
- /* INOUT */ UINT64* crcA_hi = NULL) const {
- SetValue(crc_.Base().Concatenate(GetValue(crcA_lo, crcA_hi),
- GetValue(crcB_lo, crcB_hi),
- bytes_B),
- crcA_lo,
- crcA_hi);
- }
-
- virtual size_t StoreComplementaryCrc(
- void *dst,
- UINT64 message_crc_lo, UINT64 message_crc_hi,
- UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const {
- return crc_.Base().StoreComplementaryCrc(
- dst,
- GetValue(message_crc_lo, message_crc_hi),
- GetValue(result_crc_lo, result_crc_hi));
- }
-
- virtual size_t StoreCrc(void *dst,
- UINT64 lo,
- UINT64 hi = 0) const {
- return crc_.Base().StoreCrc(dst, GetValue(lo, hi));
- }
-
- virtual void CrcOfCrc(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const {
- SetValue(crc_.Base().CrcOfCrc(), lo, hi);
- }
-
- private:
- static Crc GetValue(UINT64 *lo, UINT64 *hi) {
- if (sizeof(Crc) <= sizeof(*lo)) {
- return CrcFromUint64<Crc>(*lo);
- } else {
- return CrcFromUint64<Crc>(*lo, *hi);
- }
- }
-
- static Crc GetValue(UINT64 lo, UINT64 hi) {
- return CrcFromUint64<Crc>(lo, hi);
- }
-
- static void SetValue(const Crc &crc, UINT64 *lo, UINT64 *hi) {
- Uint64FromCrc<Crc>(crc,
- reinterpret_cast<crcutil::uint64 *>(lo),
- reinterpret_cast<crcutil::uint64 *>(hi));
- }
-
- const CrcImplementation crc_;
- const RollingCrcImplementation rolling_crc_;
-
- const Self &operator =(const Self &) {}
-};
-
-#if defined(_MSC_VER)
-// 'use_sse4_2' : unreferenced formal parameter
-#pragma warning(disable: 4100)
-#endif // defined(_MSC_VER)
-
-bool CRC::IsSSE42Available() {
-#if HAVE_AMD64 || HAVE_I386
- return Crc32cSSE4::IsSSE42Available();
-#else
- return false;
-#endif // HAVE_AMD64 || HAVE_I386
-}
-
-CRC::~CRC() {}
-CRC::CRC() {}
-
-CRC *CRC::Create(UINT64 poly_lo,
- UINT64 poly_hi,
- size_t degree,
- bool canonical,
- UINT64 roll_start_value_lo,
- UINT64 roll_start_value_hi,
- size_t roll_length,
- bool use_sse4_2,
- const void **allocated_memory) {
- if (degree == 0) {
- return NULL;
- }
-
- if (degree > 64) {
-#if !HAVE_SSE2
- return NULL;
-#else
- if (degree > 128) {
- return NULL;
- }
- uint128_sse2 poly = CrcFromUint64<uint128_sse2>(poly_lo, poly_hi);
- if (degree != 128 && (poly >> degree) != 0) {
- return NULL;
- }
- uint128_sse2 roll_start_value =
- CrcFromUint64<uint128_sse2>(roll_start_value_lo, roll_start_value_hi);
- if (degree != 128 && (roll_start_value >> degree) != 0) {
- return NULL;
- }
-#if HAVE_I386
- typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint32, 3> Crc128;
-#elif defined(__GNUC__) && GCC_VERSION_AVAILABLE(4, 5)
- typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 6> Crc128;
-#else
- typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 4> Crc128;
-#endif // HAVE_I386
- return Implementation<Crc128, RollingCrc<Crc128> >::Create(
- poly,
- degree,
- canonical,
- roll_start_value,
- roll_length,
- allocated_memory);
-#endif // !HAVE_SSE2
- }
-
-#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
- if (use_sse4_2 &&
- degree == Crc32cSSE4::FixedDegree() &&
- poly_lo == Crc32cSSE4::FixedGeneratingPolynomial() &&
- poly_hi == 0) {
- if (roll_start_value_hi != 0 || (roll_start_value_lo >> 32) != 0) {
- return NULL;
- }
- return Implementation<Crc32cSSE4, RollingCrc32cSSE4>::Create(
- static_cast<size_t>(poly_lo),
- degree,
- canonical,
- static_cast<size_t>(roll_start_value_lo),
- static_cast<size_t>(roll_length),
- allocated_memory);
- }
-#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
- if (poly_hi != 0 || (degree != 64 && (poly_lo >> degree) != 0)) {
- return NULL;
- }
- if (roll_start_value_hi != 0 ||
- (degree != 64 && (roll_start_value_lo >> degree) != 0)) {
- return NULL;
- }
- typedef GenericCrc<crcutil::uint64, crcutil::uint64, crcutil::uint64, 4>
- Crc64;
- return Implementation<Crc64, RollingCrc<Crc64> >::Create(
- poly_lo,
- degree,
- canonical,
- roll_start_value_lo,
- roll_length,
- allocated_memory);
-}
-
-} // namespace crcutil_interface
+// Suppress 'invalid access to non-static data member ... of NULL object'
+#undef offsetof
+#define offsetof(TYPE, MEMBER) (reinterpret_cast <size_t> \
+ ((&reinterpret_cast <const char &>( \
+ reinterpret_cast <const TYPE *>(1)->MEMBER))) - 1)
+#endif // defined(__GNUC__)
+
+namespace crcutil_interface {
+
+template<typename CrcImplementation, typename RollingCrcImplementation>
+ class Implementation : public CRC {
+ public:
+ typedef typename CrcImplementation::Crc Crc;
+ typedef Implementation<CrcImplementation, RollingCrcImplementation> Self;
+
+ Implementation(const Crc &poly,
+ size_t degree,
+ bool canonical,
+ const Crc &roll_start_value,
+ size_t roll_length)
+ : crc_(poly, degree, canonical),
+ rolling_crc_(crc_, roll_length, roll_start_value) {
+ }
+
+ static Self *Create(const Crc &poly,
+ size_t degree,
+ bool canonical,
+ const Crc &roll_start_value,
+ size_t roll_length,
+ const void **allocated_memory) {
+ void *memory = AlignedAlloc(sizeof(Self),
+ offsetof(Self, crc_),
+ kAlign,
+ allocated_memory);
+ return new(memory) Self(poly,
+ degree,
+ canonical,
+ roll_start_value,
+ roll_length);
+ }
+
+ virtual void Delete() {
+ AlignedFree(this);
+ }
+
+ void *operator new(size_t, void *p) {
+ return p;
+ }
+
+ virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.Base().GeneratingPolynomial(), lo, hi);
+ }
+
+ virtual size_t Degree() const {
+ return crc_.Base().Degree();
+ }
+
+ virtual void CanonizeValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.Base().Canonize(), lo, hi);
+ }
+
+ virtual void RollStartValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const {
+ SetValue(rolling_crc_.StartValue(), lo, hi);
+ }
+
+ virtual size_t RollWindowBytes() const {
+ return rolling_crc_.WindowBytes();
+ }
+
+ virtual void SelfCheckValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const {
+ Crc crc = crc_.CrcDefault(&crc_, sizeof(crc_), 0);
+ crc = crc_.CrcDefault(&rolling_crc_, sizeof(rolling_crc_), crc);
+ SetValue(crc, lo, hi);
+ }
+
+ virtual void Compute(const void *data,
+ size_t bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.CrcDefault(data, bytes, GetValue(lo, hi)), lo, hi);
+ }
+
+ virtual void RollStart(const void *data,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const {
+ SetValue(rolling_crc_.Start(data), lo, hi);
+ }
+
+ virtual void Roll(size_t byte_out,
+ size_t byte_in,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const {
+ SetValue(rolling_crc_.Roll(GetValue(lo, hi), byte_out, byte_in), lo, hi);
+ }
+
+ virtual void CrcOfZeroes(UINT64 bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.Base().CrcOfZeroes(bytes, GetValue(lo, hi)), lo, hi);
+ }
+
+ virtual void ChangeStartValue(
+ UINT64 start_old_lo, UINT64 start_old_hi,
+ UINT64 start_new_lo, UINT64 start_new_hi,
+ UINT64 bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.Base().ChangeStartValue(
+ GetValue(lo, hi),
+ bytes,
+ GetValue(start_old_lo, start_old_hi),
+ GetValue(start_new_lo, start_new_hi)),
+ lo,
+ hi);
+ }
+
+ virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi,
+ UINT64 bytes_B,
+ /* INOUT */ UINT64* crcA_lo,
+ /* INOUT */ UINT64* crcA_hi = NULL) const {
+ SetValue(crc_.Base().Concatenate(GetValue(crcA_lo, crcA_hi),
+ GetValue(crcB_lo, crcB_hi),
+ bytes_B),
+ crcA_lo,
+ crcA_hi);
+ }
+
+ virtual size_t StoreComplementaryCrc(
+ void *dst,
+ UINT64 message_crc_lo, UINT64 message_crc_hi,
+ UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const {
+ return crc_.Base().StoreComplementaryCrc(
+ dst,
+ GetValue(message_crc_lo, message_crc_hi),
+ GetValue(result_crc_lo, result_crc_hi));
+ }
+
+ virtual size_t StoreCrc(void *dst,
+ UINT64 lo,
+ UINT64 hi = 0) const {
+ return crc_.Base().StoreCrc(dst, GetValue(lo, hi));
+ }
+
+ virtual void CrcOfCrc(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const {
+ SetValue(crc_.Base().CrcOfCrc(), lo, hi);
+ }
+
+ private:
+ static Crc GetValue(UINT64 *lo, UINT64 *hi) {
+ if (sizeof(Crc) <= sizeof(*lo)) {
+ return CrcFromUint64<Crc>(*lo);
+ } else {
+ return CrcFromUint64<Crc>(*lo, *hi);
+ }
+ }
+
+ static Crc GetValue(UINT64 lo, UINT64 hi) {
+ return CrcFromUint64<Crc>(lo, hi);
+ }
+
+ static void SetValue(const Crc &crc, UINT64 *lo, UINT64 *hi) {
+ Uint64FromCrc<Crc>(crc,
+ reinterpret_cast<crcutil::uint64 *>(lo),
+ reinterpret_cast<crcutil::uint64 *>(hi));
+ }
+
+ const CrcImplementation crc_;
+ const RollingCrcImplementation rolling_crc_;
+
+ const Self &operator =(const Self &) {}
+};
+
+#if defined(_MSC_VER)
+// 'use_sse4_2' : unreferenced formal parameter
+#pragma warning(disable: 4100)
+#endif // defined(_MSC_VER)
+
+bool CRC::IsSSE42Available() {
+#if HAVE_AMD64 || HAVE_I386
+ return Crc32cSSE4::IsSSE42Available();
+#else
+ return false;
+#endif // HAVE_AMD64 || HAVE_I386
+}
+
+CRC::~CRC() {}
+CRC::CRC() {}
+
+CRC *CRC::Create(UINT64 poly_lo,
+ UINT64 poly_hi,
+ size_t degree,
+ bool canonical,
+ UINT64 roll_start_value_lo,
+ UINT64 roll_start_value_hi,
+ size_t roll_length,
+ bool use_sse4_2,
+ const void **allocated_memory) {
+ if (degree == 0) {
+ return NULL;
+ }
+
+ if (degree > 64) {
+#if !HAVE_SSE2
+ return NULL;
+#else
+ if (degree > 128) {
+ return NULL;
+ }
+ uint128_sse2 poly = CrcFromUint64<uint128_sse2>(poly_lo, poly_hi);
+ if (degree != 128 && (poly >> degree) != 0) {
+ return NULL;
+ }
+ uint128_sse2 roll_start_value =
+ CrcFromUint64<uint128_sse2>(roll_start_value_lo, roll_start_value_hi);
+ if (degree != 128 && (roll_start_value >> degree) != 0) {
+ return NULL;
+ }
+#if HAVE_I386
+ typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint32, 3> Crc128;
+#elif defined(__GNUC__) && GCC_VERSION_AVAILABLE(4, 5)
+ typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 6> Crc128;
+#else
+ typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 4> Crc128;
+#endif // HAVE_I386
+ return Implementation<Crc128, RollingCrc<Crc128> >::Create(
+ poly,
+ degree,
+ canonical,
+ roll_start_value,
+ roll_length,
+ allocated_memory);
+#endif // !HAVE_SSE2
+ }
+
+#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
+ if (use_sse4_2 &&
+ degree == Crc32cSSE4::FixedDegree() &&
+ poly_lo == Crc32cSSE4::FixedGeneratingPolynomial() &&
+ poly_hi == 0) {
+ if (roll_start_value_hi != 0 || (roll_start_value_lo >> 32) != 0) {
+ return NULL;
+ }
+ return Implementation<Crc32cSSE4, RollingCrc32cSSE4>::Create(
+ static_cast<size_t>(poly_lo),
+ degree,
+ canonical,
+ static_cast<size_t>(roll_start_value_lo),
+ static_cast<size_t>(roll_length),
+ allocated_memory);
+ }
+#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
+
+ if (poly_hi != 0 || (degree != 64 && (poly_lo >> degree) != 0)) {
+ return NULL;
+ }
+ if (roll_start_value_hi != 0 ||
+ (degree != 64 && (roll_start_value_lo >> degree) != 0)) {
+ return NULL;
+ }
+ typedef GenericCrc<crcutil::uint64, crcutil::uint64, crcutil::uint64, 4>
+ Crc64;
+ return Implementation<Crc64, RollingCrc<Crc64> >::Create(
+ poly_lo,
+ degree,
+ canonical,
+ roll_start_value_lo,
+ roll_length,
+ allocated_memory);
+}
+
+} // namespace crcutil_interface
diff --git a/contrib/libs/crcutil/interface.h b/contrib/libs/crcutil/interface.h
index 2b3e2ee939..2c43f95ba9 100644
--- a/contrib/libs/crcutil/interface.h
+++ b/contrib/libs/crcutil/interface.h
@@ -1,204 +1,204 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Example how to use CRC implementation via the interface which
-// hides details of implementation.
-//
-// The raw implementation is not indended to be used in a project
-// directly because:
-// - Implementation lives in the header files because that is the
-// only way to use templates efficiently.
-// - Header files are quite "dirty" -- they define and use a
-// lot of macros. Bringing these macros to all files in
-// a project is not particularly good idea.
-// - The code takes forever to compile with GCC (e.g. GCC
-// 4.4.3 and 4.5.0 compile the unittest for about 30 seconds).
-//
-// Solution:
-// - Create your own, clean interface.
-// - Do not expose interface internals in a header file.
-// - Proxy all calls to your interface to CRC implementation.
-// - Keep only one copy of actual implementation.
-
-#ifndef CRCUTIL_INTERFACE_H_
-#define CRCUTIL_INTERFACE_H_
-
-#include "std_headers.h" // size_t
-
-namespace crcutil_interface {
-
-// Many projects define their own uint64. Do it here.
-typedef unsigned long long UINT64;
-
-class CRC {
- public:
- // Creates new instance of CRC class.
- // If arguments are illegal (e.g. provided generating polynomial
- // has more bits than provided degree), returns NULL.
- //
- // poly_* - generating polynomial (reversed bit format).
- // degree - degree of generating polynomial.
- // canonical - if true, input CRC value will be XOR'ed with
- // (inverted) before and after CRC computation.
- // roll_start_value - starting value of rolling CRC.
- // roll_window_bytes - length of rolling CRC window in bytes.
- // If roll_length is 0, roll_start_value
- // shall be 0.
- // use_sse4_2 - if true, use SSE4.2 crc32 instruction to compute
- // CRC when generating polynomial is CRC32C (Castagnoli)
- // allocated_memory - optional (may be NULL) address of a variable
- // to store the address of actually allocated memory.
- static CRC *Create(UINT64 poly_lo,
- UINT64 poly_hi,
- size_t degree,
- bool canonical,
- UINT64 roll_start_value_lo,
- UINT64 roll_start_value_hi,
- size_t roll_window_bytes,
- bool use_sse4_2,
- const void **allocated_memory);
-
- // Deletes the instance of CRC class.
- virtual void Delete() = 0;
-
- // Returns true if SSE4.2 is available.
- static bool IsSSE42Available();
-
- // Returns generating polynomial.
- virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- // Returns degree of generating polynomial.
- virtual size_t Degree() const = 0;
-
- // Returns canonization constant used to XOR crc value
- // before and after CRC computation.
- virtual void CanonizeValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- // Returns rolling CRC starting value.
- virtual void RollStartValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- // Returns length of rolling CRC window.
- virtual size_t RollWindowBytes() const = 0;
-
- // Returns CRC of CRC tables to enable verification
- // of integrity of CRC function itself by comparing
- // the result with pre-computed value.
- virtual void SelfCheckValue(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- // Given CRC value of previous chunk of data,
- // extends it to new chunk, retuning the result in-place.
- //
- // If degree of CRC polynomial is 64 or less,
- // (*hi) will not be touched.
- virtual void Compute(const void *data,
- size_t bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const = 0;
-
- // Starts rolling CRC by computing CRC of first
- // "roll_length" bytes of "data", using "roll_start_value"
- // as starting value (see Create()).
- // Should not be called if the value of "roll_value" was 0.
- virtual void RollStart(const void *data,
- /* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- // Rolls CRC by 1 byte, given the bytes leaving and
- // entering the window of "roll_length" bytes.
- // RollStart() should be called before "Roll".
- // Should not be called if the value of "roll_value" was 0.
- virtual void Roll(size_t byte_out,
- size_t byte_in,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const = 0;
-
- // Computes CRC of sequence of zeroes -- without touching the data.
- virtual void CrcOfZeroes(UINT64 bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const = 0;
-
- // Computes value of CRC(A, bytes, start_new) given known
- // crc=CRC(A, bytes, start_old) -- without touching the data.
- virtual void ChangeStartValue(
- UINT64 start_old_lo, UINT64 start_old_hi,
- UINT64 start_new_lo, UINT64 start_new_hi,
- UINT64 bytes,
- /* INOUT */ UINT64 *lo,
- /* INOUT */ UINT64 *hi = NULL) const = 0;
-
- // Returns CRC of concatenation of blocks A and B when CRCs
- // of blocks A and B are known -- without touching the data.
- //
- // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0),
- // returns CRC(AB, |AB|, startA).
- virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi,
- UINT64 bytes_B,
- /* INOUT */ UINT64* crcA_lo,
- /* INOUT */ UINT64* crcA_hi = NULL) const = 0;
-
- // Given CRC of a message, stores extra (degree + 7)/8 bytes after
- // the message so that CRC(message+extra, start) = result.
- // Does not change CRC start value (use ChangeStartValue for that).
- // Returns number of stored bytes.
- virtual size_t StoreComplementaryCrc(
- void *dst,
- UINT64 message_crc_lo, UINT64 message_crc_hi,
- UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const = 0;
-
- // Stores given CRC of a message as (degree + 7)/8 bytes filled
- // with 0s to the right. Returns number of stored bytes.
- // CRC of the message and stored CRC is a constant value returned
- // by CrcOfCrc() -- it does not depend on contents of the message.
- virtual size_t StoreCrc(/* OUT */ void *dst,
- UINT64 lo,
- UINT64 hi = 0) const = 0;
-
- // Computes expected CRC value of CRC(Message,CRC(Message))
- // when CRC is stored after the message. This value is fixed
- // and does not depend on the message or CRC start value.
- virtual void CrcOfCrc(/* OUT */ UINT64 *lo,
- /* OUT */ UINT64 *hi = NULL) const = 0;
-
- protected:
- // CRC instance should be created only once (most of the time):
- // - Creation and initializion is relatively expensive.
- // - CRC is fully defined by its generating polynomials
- // (well, and few more parameters).
- // - CRC instances are pure constants. There is no
- // reason to have 2 instances of the same CRC.
- // - There are not too many generating polynomials that are
- // used on practice. It is hard to imagine a project
- // which uses 50 different generating polynomials.
- // Thus, a handful of CRC instances is sufficient
- // to cover the needs of even very large project.
- // - Finally and most importantly, CRC tables should be
- // aligned properly. No, the instances of CRC class
- // are not created by blind "new" -- they use placement
- // "new" and, in absense of placement "delete",
- // should be deleted by calling explicit Delete() method.
- virtual ~CRC();
-
- // Cannot instantiate the class -- instances may be created
- // by CRC::Create() only.
- CRC();
-};
-
-} // namespace crcutil_interface
-
-
-#endif // CRCUTIL_INTERFACE_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Example how to use CRC implementation via the interface which
+// hides details of implementation.
+//
+// The raw implementation is not indended to be used in a project
+// directly because:
+// - Implementation lives in the header files because that is the
+// only way to use templates efficiently.
+// - Header files are quite "dirty" -- they define and use a
+// lot of macros. Bringing these macros to all files in
+// a project is not particularly good idea.
+// - The code takes forever to compile with GCC (e.g. GCC
+// 4.4.3 and 4.5.0 compile the unittest for about 30 seconds).
+//
+// Solution:
+// - Create your own, clean interface.
+// - Do not expose interface internals in a header file.
+// - Proxy all calls to your interface to CRC implementation.
+// - Keep only one copy of actual implementation.
+
+#ifndef CRCUTIL_INTERFACE_H_
+#define CRCUTIL_INTERFACE_H_
+
+#include "std_headers.h" // size_t
+
+namespace crcutil_interface {
+
+// Many projects define their own uint64. Do it here.
+typedef unsigned long long UINT64;
+
+class CRC {
+ public:
+ // Creates new instance of CRC class.
+ // If arguments are illegal (e.g. provided generating polynomial
+ // has more bits than provided degree), returns NULL.
+ //
+ // poly_* - generating polynomial (reversed bit format).
+ // degree - degree of generating polynomial.
+ // canonical - if true, input CRC value will be XOR'ed with
+ // (inverted) before and after CRC computation.
+ // roll_start_value - starting value of rolling CRC.
+ // roll_window_bytes - length of rolling CRC window in bytes.
+ // If roll_length is 0, roll_start_value
+ // shall be 0.
+ // use_sse4_2 - if true, use SSE4.2 crc32 instruction to compute
+ // CRC when generating polynomial is CRC32C (Castagnoli)
+ // allocated_memory - optional (may be NULL) address of a variable
+ // to store the address of actually allocated memory.
+ static CRC *Create(UINT64 poly_lo,
+ UINT64 poly_hi,
+ size_t degree,
+ bool canonical,
+ UINT64 roll_start_value_lo,
+ UINT64 roll_start_value_hi,
+ size_t roll_window_bytes,
+ bool use_sse4_2,
+ const void **allocated_memory);
+
+ // Deletes the instance of CRC class.
+ virtual void Delete() = 0;
+
+ // Returns true if SSE4.2 is available.
+ static bool IsSSE42Available();
+
+ // Returns generating polynomial.
+ virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ // Returns degree of generating polynomial.
+ virtual size_t Degree() const = 0;
+
+ // Returns canonization constant used to XOR crc value
+ // before and after CRC computation.
+ virtual void CanonizeValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ // Returns rolling CRC starting value.
+ virtual void RollStartValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ // Returns length of rolling CRC window.
+ virtual size_t RollWindowBytes() const = 0;
+
+ // Returns CRC of CRC tables to enable verification
+ // of integrity of CRC function itself by comparing
+ // the result with pre-computed value.
+ virtual void SelfCheckValue(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ // Given CRC value of previous chunk of data,
+ // extends it to new chunk, retuning the result in-place.
+ //
+ // If degree of CRC polynomial is 64 or less,
+ // (*hi) will not be touched.
+ virtual void Compute(const void *data,
+ size_t bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const = 0;
+
+ // Starts rolling CRC by computing CRC of first
+ // "roll_length" bytes of "data", using "roll_start_value"
+ // as starting value (see Create()).
+ // Should not be called if the value of "roll_value" was 0.
+ virtual void RollStart(const void *data,
+ /* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ // Rolls CRC by 1 byte, given the bytes leaving and
+ // entering the window of "roll_length" bytes.
+ // RollStart() should be called before "Roll".
+ // Should not be called if the value of "roll_value" was 0.
+ virtual void Roll(size_t byte_out,
+ size_t byte_in,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const = 0;
+
+ // Computes CRC of sequence of zeroes -- without touching the data.
+ virtual void CrcOfZeroes(UINT64 bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const = 0;
+
+ // Computes value of CRC(A, bytes, start_new) given known
+ // crc=CRC(A, bytes, start_old) -- without touching the data.
+ virtual void ChangeStartValue(
+ UINT64 start_old_lo, UINT64 start_old_hi,
+ UINT64 start_new_lo, UINT64 start_new_hi,
+ UINT64 bytes,
+ /* INOUT */ UINT64 *lo,
+ /* INOUT */ UINT64 *hi = NULL) const = 0;
+
+ // Returns CRC of concatenation of blocks A and B when CRCs
+ // of blocks A and B are known -- without touching the data.
+ //
+ // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0),
+ // returns CRC(AB, |AB|, startA).
+ virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi,
+ UINT64 bytes_B,
+ /* INOUT */ UINT64* crcA_lo,
+ /* INOUT */ UINT64* crcA_hi = NULL) const = 0;
+
+ // Given CRC of a message, stores extra (degree + 7)/8 bytes after
+ // the message so that CRC(message+extra, start) = result.
+ // Does not change CRC start value (use ChangeStartValue for that).
+ // Returns number of stored bytes.
+ virtual size_t StoreComplementaryCrc(
+ void *dst,
+ UINT64 message_crc_lo, UINT64 message_crc_hi,
+ UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const = 0;
+
+ // Stores given CRC of a message as (degree + 7)/8 bytes filled
+ // with 0s to the right. Returns number of stored bytes.
+ // CRC of the message and stored CRC is a constant value returned
+ // by CrcOfCrc() -- it does not depend on contents of the message.
+ virtual size_t StoreCrc(/* OUT */ void *dst,
+ UINT64 lo,
+ UINT64 hi = 0) const = 0;
+
+ // Computes expected CRC value of CRC(Message,CRC(Message))
+ // when CRC is stored after the message. This value is fixed
+ // and does not depend on the message or CRC start value.
+ virtual void CrcOfCrc(/* OUT */ UINT64 *lo,
+ /* OUT */ UINT64 *hi = NULL) const = 0;
+
+ protected:
+ // CRC instance should be created only once (most of the time):
+ // - Creation and initializion is relatively expensive.
+ // - CRC is fully defined by its generating polynomials
+ // (well, and few more parameters).
+ // - CRC instances are pure constants. There is no
+ // reason to have 2 instances of the same CRC.
+ // - There are not too many generating polynomials that are
+ // used on practice. It is hard to imagine a project
+ // which uses 50 different generating polynomials.
+ // Thus, a handful of CRC instances is sufficient
+ // to cover the needs of even very large project.
+ // - Finally and most importantly, CRC tables should be
+ // aligned properly. No, the instances of CRC class
+ // are not created by blind "new" -- they use placement
+ // "new" and, in absense of placement "delete",
+ // should be deleted by calling explicit Delete() method.
+ virtual ~CRC();
+
+ // Cannot instantiate the class -- instances may be created
+ // by CRC::Create() only.
+ CRC();
+};
+
+} // namespace crcutil_interface
+
+
+#endif // CRCUTIL_INTERFACE_H_
diff --git a/contrib/libs/crcutil/multiword_128_64_gcc_amd64_sse2.cc b/contrib/libs/crcutil/multiword_128_64_gcc_amd64_sse2.cc
index f94fd1fa36..92713cabd5 100644
--- a/contrib/libs/crcutil/multiword_128_64_gcc_amd64_sse2.cc
+++ b/contrib/libs/crcutil/multiword_128_64_gcc_amd64_sse2.cc
@@ -1,291 +1,291 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements multiword CRC for GCC on i386.
-//
-// Small comment: the trick described in
-// http://software.intel.com/en-us/articles/fast-simd-integer-move-for-the-intel-pentiumr-4-processor
-// (replace "movdqa dst, src" with "pshufd $0xE4, src, dst")
-// did not work: execution time increased from
-// 1.8 CPU cycles/byte to 2.1 CPU cycles/byte.
-// So it may be good idea on P4 but it's not on newer CPUs.
-//
-// movaps/xorps vs. movdqa/pxor did not make any difference.
-
-#include "generic_crc.h"
-#include "uint128_sse2.h"
-
-#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2
-
-namespace crcutil {
-
-template<> uint128_sse2
-GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiwordGccAmd64Sse2(
- const uint8 *src, const uint8 *end, const uint128_sse2 &start) const;
-
-template<>
-uint128_sse2 GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
- const void *data, size_t bytes, const uint128_sse2 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint128_sse2 crc = start ^ this->Base().Canonize();
- const uint8 *end = src + bytes;
- if (bytes <= 7) {
- for (; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ this->Base().Canonize());
- }
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
- if (src >= end) {
- return (crc ^ this->Base().Canonize());
- }
-
- return CrcMultiwordGccAmd64Sse2(src, end, crc);
-}
-
-#define CRC_WORD_ASM() \
- SSE2_MOVQ " %[crc0], %[tmp0]\n" \
- "xorq %[tmp0], %[buf0]\n" \
- "psrldq $8, %[crc0]\n" \
- "movzbq %b[buf0], %[tmp0]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor (%[table_word], %[tmp0], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp1]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp1], %[tmp1]\n" \
- "pxor 1*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp0]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor 2*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp1]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp1], %[tmp1]\n" \
- "pxor 3*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp0]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor 4*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp1]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp1], %[tmp1]\n" \
- "pxor 5*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
- "movzbq %b[buf0], %[tmp0]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor 6*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
- "addq %[buf0], %[buf0]\n" \
- "pxor 7*256*16(%[table_word], %[buf0], 8), %[crc0]\n"
-
-template<> uint128_sse2
-GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiwordGccAmd64Sse2(
- const uint8 *src, const uint8 *end, const uint128_sse2 &start) const {
- __m128i crc0 = start;
- __m128i crc1;
- __m128i crc2;
- __m128i crc3;
- __m128i crc_carryover;
-
- uint64 buf0;
- uint64 buf1;
- uint64 buf2;
- uint64 buf3;
-
- uint64 tmp0;
- uint64 tmp1;
-
- asm(
- "sub $2*4*8 - 1, %[end]\n"
- "cmpq %[src], %[end]\n"
- "jbe 2f\n"
-
- "pxor %[crc1], %[crc1]\n"
- "pxor %[crc2], %[crc2]\n"
- "pxor %[crc3], %[crc3]\n"
- "pxor %[crc_carryover], %[crc_carryover]\n"
- "movq (%[src]), %[buf0]\n"
- "movq 1*8(%[src]), %[buf1]\n"
- "movq 2*8(%[src]), %[buf2]\n"
- "movq 3*8(%[src]), %[buf3]\n"
-
- "1:\n"
-#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
- "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
-#endif
-#if GCC_VERSION_AVAILABLE(4, 5)
- // Bug in GCC 4.2.4?
- "add $4*8, %[src]\n"
-#else
- "lea 4*8(%[src]), %[src]\n"
-#endif
- "pxor %[crc_carryover], %[crc0]\n"
-
- SSE2_MOVQ " %[crc0], %[tmp0]\n"
- "psrldq $8, %[crc0]\n"
- "xorq %[tmp0], %[buf0]\n"
- "movzbq %b[buf0], %[tmp0]\n"
- "pxor %[crc0], %[crc1]\n"
- "addq %[tmp0], %[tmp0]\n"
- "shrq $8, %[buf0]\n"
- "movdqa (%[table], %[tmp0], 8), %[crc0]\n"
-
- SSE2_MOVQ " %[crc1], %[tmp1]\n"
- "psrldq $8, %[crc1]\n"
- "xorq %[tmp1], %[buf1]\n"
- "movzbq %b[buf1], %[tmp1]\n"
- "pxor %[crc1], %[crc2]\n"
- "addq %[tmp1], %[tmp1]\n"
- "shrq $8, %[buf1]\n"
- "movdqa (%[table], %[tmp1], 8), %[crc1]\n"
-
- SSE2_MOVQ " %[crc2], %[tmp0]\n"
- "psrldq $8, %[crc2]\n"
- "xorq %[tmp0], %[buf2]\n"
- "movzbq %b[buf2], %[tmp0]\n"
- "pxor %[crc2], %[crc3]\n"
- "addq %[tmp0], %[tmp0]\n"
- "shrq $8, %[buf2]\n"
- "movdqa (%[table], %[tmp0], 8), %[crc2]\n"
-
- SSE2_MOVQ " %[crc3], %[tmp1]\n"
- "psrldq $8, %[crc3]\n"
- "xorq %[tmp1], %[buf3]\n"
- "movzbq %b[buf3], %[tmp1]\n"
- "movdqa %[crc3], %[crc_carryover]\n"
- "addq %[tmp1], %[tmp1]\n"
- "shrq $8, %[buf3]\n"
- "movdqa (%[table], %[tmp1], 8), %[crc3]\n"
-
-#define XOR(byte) \
- "movzbq %b[buf0], %[tmp0]\n" \
- "shrq $8, %[buf0]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc0]\n" \
- "movzbq %b[buf1], %[tmp1]\n" \
- "shrq $8, %[buf1]\n" \
- "addq %[tmp1], %[tmp1]\n" \
- "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc1]\n" \
- "movzbq %b[buf2], %[tmp0]\n" \
- "shrq $8, %[buf2]\n" \
- "addq %[tmp0], %[tmp0]\n" \
- "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc2]\n" \
- "movzbq %b[buf3], %[tmp1]\n" \
- "shrq $8, %[buf3]\n" \
- "addq %[tmp1], %[tmp1]\n" \
- "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc3]\n"
-
- XOR(1)
- XOR(2)
- XOR(3)
- XOR(4)
- XOR(5)
- XOR(6)
-#undef XOR
-
- "addq %[buf0], %[buf0]\n"
- "pxor 7*256*16(%[table], %[buf0], 8), %[crc0]\n"
- "movq (%[src]), %[buf0]\n"
-
- "addq %[buf1], %[buf1]\n"
- "pxor 7*256*16(%[table], %[buf1], 8), %[crc1]\n"
- "movq 1*8(%[src]), %[buf1]\n"
-
- "addq %[buf2], %[buf2]\n"
- "pxor 7*256*16(%[table], %[buf2], 8), %[crc2]\n"
- "movq 2*8(%[src]), %[buf2]\n"
-
- "addq %[buf3], %[buf3]\n"
- "pxor 7*256*16(%[table], %[buf3], 8), %[crc3]\n"
- "movq 3*8(%[src]), %[buf3]\n"
-
- "cmpq %[src], %[end]\n"
- "ja 1b\n"
-
- "pxor %[crc_carryover], %[crc0]\n"
- CRC_WORD_ASM()
-
- "pxor %[crc1], %[crc0]\n"
- "movq %[buf1], %[buf0]\n"
- CRC_WORD_ASM()
-
- "pxor %[crc2], %[crc0]\n"
- "movq %[buf2], %[buf0]\n"
- CRC_WORD_ASM()
-
- "pxor %[crc3], %[crc0]\n"
- "movq %[buf3], %[buf0]\n"
- CRC_WORD_ASM()
-
- "add $4*8, %[src]\n"
- "2:\n"
-
- "add $2*4*8 - 8, %[end]\n"
-
- "cmpq %[src], %[end]\n"
- "jbe 4f\n"
- "3:\n"
- "movq (%[src]), %[buf0]\n"
- "addq $8, %[src]\n"
- CRC_WORD_ASM()
- "cmpq %[src], %[end]\n"
- "ja 3b\n"
-
- "4:\n"
- "add $7, %[end]\n"
-
- "cmpq %[src], %[end]\n"
- "jbe 6f\n"
-
- "5:\n"
- "movzbq (%[src]), %[buf0]\n"
- "add $1, %[src]\n"
- SSE2_MOVQ " %[crc0], %[tmp0]\n"
- "movzx %b[tmp0], %[tmp0]\n"
- "psrldq $1, %[crc0]\n"
- "xor %[buf0], %[tmp0]\n"
- "addq %[tmp0], %[tmp0]\n"
- "pxor 7*256*16(%[table_word], %[tmp0], 8), %[crc0]\n"
-
- "cmpq %[src], %[end]\n"
- "ja 5b\n"
-
- "6:\n"
-
- : // outputs
- [src] "+r" (src),
- [end] "+r" (end),
- [crc0] "+x" (crc0),
- [crc1] "=&x" (crc1),
- [crc2] "=&x" (crc2),
- [crc3] "=&x" (crc3),
- [crc_carryover] "=&x" (crc_carryover),
- [buf0] "=&r" (buf0),
- [buf1] "=&r" (buf1),
- [buf2] "=&r" (buf2),
- [buf3] "=&r" (buf3),
- [tmp0] "=&r" (tmp0),
- [tmp1] "=&r" (tmp1)
-
- : // inputs
- [table_word] "r" (this->crc_word_),
- [table] "r" (this->crc_word_interleaved_));
-
- return (this->Base().Canonize() ^ crc0);
-}
-
-} // namespace crcutil
-
-#endif // defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements multiword CRC for GCC on i386.
+//
+// Small comment: the trick described in
+// http://software.intel.com/en-us/articles/fast-simd-integer-move-for-the-intel-pentiumr-4-processor
+// (replace "movdqa dst, src" with "pshufd $0xE4, src, dst")
+// did not work: execution time increased from
+// 1.8 CPU cycles/byte to 2.1 CPU cycles/byte.
+// So it may be good idea on P4 but it's not on newer CPUs.
+//
+// movaps/xorps vs. movdqa/pxor did not make any difference.
+
+#include "generic_crc.h"
+#include "uint128_sse2.h"
+
+#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2
+
+namespace crcutil {
+
+template<> uint128_sse2
+GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiwordGccAmd64Sse2(
+ const uint8 *src, const uint8 *end, const uint128_sse2 &start) const;
+
+template<>
+uint128_sse2 GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
+ const void *data, size_t bytes, const uint128_sse2 &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ uint128_sse2 crc = start ^ this->Base().Canonize();
+ const uint8 *end = src + bytes;
+ if (bytes <= 7) {
+ for (; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ this->Base().Canonize());
+ }
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
+ if (src >= end) {
+ return (crc ^ this->Base().Canonize());
+ }
+
+ return CrcMultiwordGccAmd64Sse2(src, end, crc);
+}
+
+#define CRC_WORD_ASM() \
+ SSE2_MOVQ " %[crc0], %[tmp0]\n" \
+ "xorq %[tmp0], %[buf0]\n" \
+ "psrldq $8, %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp0]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor (%[table_word], %[tmp0], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp1]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp1], %[tmp1]\n" \
+ "pxor 1*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp0]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor 2*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp1]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp1], %[tmp1]\n" \
+ "pxor 3*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp0]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor 4*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp1]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp1], %[tmp1]\n" \
+ "pxor 5*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \
+ "movzbq %b[buf0], %[tmp0]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor 6*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \
+ "addq %[buf0], %[buf0]\n" \
+ "pxor 7*256*16(%[table_word], %[buf0], 8), %[crc0]\n"
+
+template<> uint128_sse2
+GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiwordGccAmd64Sse2(
+ const uint8 *src, const uint8 *end, const uint128_sse2 &start) const {
+ __m128i crc0 = start;
+ __m128i crc1;
+ __m128i crc2;
+ __m128i crc3;
+ __m128i crc_carryover;
+
+ uint64 buf0;
+ uint64 buf1;
+ uint64 buf2;
+ uint64 buf3;
+
+ uint64 tmp0;
+ uint64 tmp1;
+
+ asm(
+ "sub $2*4*8 - 1, %[end]\n"
+ "cmpq %[src], %[end]\n"
+ "jbe 2f\n"
+
+ "pxor %[crc1], %[crc1]\n"
+ "pxor %[crc2], %[crc2]\n"
+ "pxor %[crc3], %[crc3]\n"
+ "pxor %[crc_carryover], %[crc_carryover]\n"
+ "movq (%[src]), %[buf0]\n"
+ "movq 1*8(%[src]), %[buf1]\n"
+ "movq 2*8(%[src]), %[buf2]\n"
+ "movq 3*8(%[src]), %[buf3]\n"
+
+ "1:\n"
+#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+ "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
+#endif
+#if GCC_VERSION_AVAILABLE(4, 5)
+ // Bug in GCC 4.2.4?
+ "add $4*8, %[src]\n"
+#else
+ "lea 4*8(%[src]), %[src]\n"
+#endif
+ "pxor %[crc_carryover], %[crc0]\n"
+
+ SSE2_MOVQ " %[crc0], %[tmp0]\n"
+ "psrldq $8, %[crc0]\n"
+ "xorq %[tmp0], %[buf0]\n"
+ "movzbq %b[buf0], %[tmp0]\n"
+ "pxor %[crc0], %[crc1]\n"
+ "addq %[tmp0], %[tmp0]\n"
+ "shrq $8, %[buf0]\n"
+ "movdqa (%[table], %[tmp0], 8), %[crc0]\n"
+
+ SSE2_MOVQ " %[crc1], %[tmp1]\n"
+ "psrldq $8, %[crc1]\n"
+ "xorq %[tmp1], %[buf1]\n"
+ "movzbq %b[buf1], %[tmp1]\n"
+ "pxor %[crc1], %[crc2]\n"
+ "addq %[tmp1], %[tmp1]\n"
+ "shrq $8, %[buf1]\n"
+ "movdqa (%[table], %[tmp1], 8), %[crc1]\n"
+
+ SSE2_MOVQ " %[crc2], %[tmp0]\n"
+ "psrldq $8, %[crc2]\n"
+ "xorq %[tmp0], %[buf2]\n"
+ "movzbq %b[buf2], %[tmp0]\n"
+ "pxor %[crc2], %[crc3]\n"
+ "addq %[tmp0], %[tmp0]\n"
+ "shrq $8, %[buf2]\n"
+ "movdqa (%[table], %[tmp0], 8), %[crc2]\n"
+
+ SSE2_MOVQ " %[crc3], %[tmp1]\n"
+ "psrldq $8, %[crc3]\n"
+ "xorq %[tmp1], %[buf3]\n"
+ "movzbq %b[buf3], %[tmp1]\n"
+ "movdqa %[crc3], %[crc_carryover]\n"
+ "addq %[tmp1], %[tmp1]\n"
+ "shrq $8, %[buf3]\n"
+ "movdqa (%[table], %[tmp1], 8), %[crc3]\n"
+
+#define XOR(byte) \
+ "movzbq %b[buf0], %[tmp0]\n" \
+ "shrq $8, %[buf0]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc0]\n" \
+ "movzbq %b[buf1], %[tmp1]\n" \
+ "shrq $8, %[buf1]\n" \
+ "addq %[tmp1], %[tmp1]\n" \
+ "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc1]\n" \
+ "movzbq %b[buf2], %[tmp0]\n" \
+ "shrq $8, %[buf2]\n" \
+ "addq %[tmp0], %[tmp0]\n" \
+ "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc2]\n" \
+ "movzbq %b[buf3], %[tmp1]\n" \
+ "shrq $8, %[buf3]\n" \
+ "addq %[tmp1], %[tmp1]\n" \
+ "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc3]\n"
+
+ XOR(1)
+ XOR(2)
+ XOR(3)
+ XOR(4)
+ XOR(5)
+ XOR(6)
+#undef XOR
+
+ "addq %[buf0], %[buf0]\n"
+ "pxor 7*256*16(%[table], %[buf0], 8), %[crc0]\n"
+ "movq (%[src]), %[buf0]\n"
+
+ "addq %[buf1], %[buf1]\n"
+ "pxor 7*256*16(%[table], %[buf1], 8), %[crc1]\n"
+ "movq 1*8(%[src]), %[buf1]\n"
+
+ "addq %[buf2], %[buf2]\n"
+ "pxor 7*256*16(%[table], %[buf2], 8), %[crc2]\n"
+ "movq 2*8(%[src]), %[buf2]\n"
+
+ "addq %[buf3], %[buf3]\n"
+ "pxor 7*256*16(%[table], %[buf3], 8), %[crc3]\n"
+ "movq 3*8(%[src]), %[buf3]\n"
+
+ "cmpq %[src], %[end]\n"
+ "ja 1b\n"
+
+ "pxor %[crc_carryover], %[crc0]\n"
+ CRC_WORD_ASM()
+
+ "pxor %[crc1], %[crc0]\n"
+ "movq %[buf1], %[buf0]\n"
+ CRC_WORD_ASM()
+
+ "pxor %[crc2], %[crc0]\n"
+ "movq %[buf2], %[buf0]\n"
+ CRC_WORD_ASM()
+
+ "pxor %[crc3], %[crc0]\n"
+ "movq %[buf3], %[buf0]\n"
+ CRC_WORD_ASM()
+
+ "add $4*8, %[src]\n"
+ "2:\n"
+
+ "add $2*4*8 - 8, %[end]\n"
+
+ "cmpq %[src], %[end]\n"
+ "jbe 4f\n"
+ "3:\n"
+ "movq (%[src]), %[buf0]\n"
+ "addq $8, %[src]\n"
+ CRC_WORD_ASM()
+ "cmpq %[src], %[end]\n"
+ "ja 3b\n"
+
+ "4:\n"
+ "add $7, %[end]\n"
+
+ "cmpq %[src], %[end]\n"
+ "jbe 6f\n"
+
+ "5:\n"
+ "movzbq (%[src]), %[buf0]\n"
+ "add $1, %[src]\n"
+ SSE2_MOVQ " %[crc0], %[tmp0]\n"
+ "movzx %b[tmp0], %[tmp0]\n"
+ "psrldq $1, %[crc0]\n"
+ "xor %[buf0], %[tmp0]\n"
+ "addq %[tmp0], %[tmp0]\n"
+ "pxor 7*256*16(%[table_word], %[tmp0], 8), %[crc0]\n"
+
+ "cmpq %[src], %[end]\n"
+ "ja 5b\n"
+
+ "6:\n"
+
+ : // outputs
+ [src] "+r" (src),
+ [end] "+r" (end),
+ [crc0] "+x" (crc0),
+ [crc1] "=&x" (crc1),
+ [crc2] "=&x" (crc2),
+ [crc3] "=&x" (crc3),
+ [crc_carryover] "=&x" (crc_carryover),
+ [buf0] "=&r" (buf0),
+ [buf1] "=&r" (buf1),
+ [buf2] "=&r" (buf2),
+ [buf3] "=&r" (buf3),
+ [tmp0] "=&r" (tmp0),
+ [tmp1] "=&r" (tmp1)
+
+ : // inputs
+ [table_word] "r" (this->crc_word_),
+ [table] "r" (this->crc_word_interleaved_));
+
+ return (this->Base().Canonize() ^ crc0);
+}
+
+} // namespace crcutil
+
+#endif // defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2
diff --git a/contrib/libs/crcutil/multiword_64_64_cl_i386_mmx.cc b/contrib/libs/crcutil/multiword_64_64_cl_i386_mmx.cc
index af7352aa46..4706d9c005 100644
--- a/contrib/libs/crcutil/multiword_64_64_cl_i386_mmx.cc
+++ b/contrib/libs/crcutil/multiword_64_64_cl_i386_mmx.cc
@@ -1,304 +1,304 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements 64-bit multiword CRC for Microsoft and Intel compilers
-// using MMX instructions (i386).
-
-#include "generic_crc.h"
-
-#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER)
-
-namespace crcutil {
-
-#define CRC_WORD_MMX() \
- __asm pxor BUF0, CRC0 \
- __asm movd TMP0, BUF0 \
- __asm psrlq BUF0, 32 \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm movq CRC0, [TABLE + TEMP * 8] \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8] \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8] \
- __asm pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8] \
- __asm movd TMP0, BUF0 \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8] \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8] \
- __asm movzx TEMP, TMP0L \
- __asm shr TMP0, 8 \
- __asm pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8] \
- __asm pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8]
-
-// frame pointer register 'ebp' modified by inline assembly code
-#pragma warning(disable: 4731)
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data,
- size_t bytes,
- const uint64 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- uint64 crc0 = start ^ this->Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
- if (src >= end) {
- return (crc0 ^ this->Base().Canonize());
- }
-
-#define CRC0 mm0
-#define CRC1 mm1
-#define CRC2 mm2
-#define CRC3 mm3
-#define BUF0 mm4
-#define BUF1 mm5
-#define BUF2 mm6
-#define BUF3 mm7
-#define TMP0 eax
-#define TMP0L al
-#define TMP0H ah
-#define TMP1 ebx
-#define TMP1L bl
-#define TMP1H bh
-#define TMP2 ecx
-#define TMP2L cl
-#define TMP2H ch
-#define TMP3 edx
-#define TMP3L dl
-#define TMP3H dh
-#define TEMP edi
-#define SRC esi
-#define END [esp]
-#define TABLE ebp
-
-
- const uint64 *interleaved_table_address =
- &this->crc_word_interleaved_[0][0];
- const uint64 *word_table_address = &this->crc_word_[0][0];
-
- __asm {
- push ebp
-
- mov TMP0, interleaved_table_address
-
- movq CRC0, crc0
- mov SRC, src
- mov TMP1, end
- sub TMP1, 2*4*8 - 1
- cmp SRC, TMP1
- mov TABLE, word_table_address
- jae end_main_loop
-
- push TABLE
- mov TABLE, TMP0
- push TMP1
-
- pxor CRC1, CRC1
- pxor CRC2, CRC2
- pxor CRC3, CRC3
-
- movq BUF0, [SRC]
- movq BUF1, [SRC + 1 * 8]
- movq BUF2, [SRC + 2 * 8]
- movq BUF3, [SRC + 3 * 8]
-
- main_loop:
-#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
- prefetcht0 [SRC + CRCUTIL_PREFETCH_WIDTH]
-#endif
- add SRC, 32
- pxor BUF0, CRC0
- pxor BUF1, CRC1
- pxor BUF2, CRC2
- pxor BUF3, CRC3
-
- movd TMP0, BUF0
- psrlq BUF0, 32
- movd TMP1, BUF1
- psrlq BUF1, 32
- movd TMP2, BUF2
- psrlq BUF2, 32
- movd TMP3, BUF3
- psrlq BUF3, 32
-
- movzx TEMP, TMP0L
- movq CRC0, [TABLE + TEMP * 8]
- movzx TEMP, TMP1L
- movq CRC1, [TABLE + TEMP * 8]
- movzx TEMP, TMP2L
- movq CRC2, [TABLE + TEMP * 8]
- movzx TEMP, TMP3L
- movq CRC3, [TABLE + TEMP * 8]
-
- movzx TEMP, TMP0H
- shr TMP0, 16
- pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8]
- movzx TEMP, TMP1H
- shr TMP1, 16
- pxor CRC1, [TABLE + TEMP * 8 + 1 * 256 * 8]
- movzx TEMP, TMP2H
- shr TMP2, 16
- pxor CRC2, [TABLE + TEMP * 8 + 1 * 256 * 8]
- movzx TEMP, TMP3H
- shr TMP3, 16
- pxor CRC3, [TABLE + TEMP * 8 + 1 * 256 * 8]
-
- movzx TEMP, TMP0L
- shr TMP0, 8
- pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8]
- movzx TEMP, TMP1L
- shr TMP1, 8
- pxor CRC1, [TABLE + TEMP * 8 + 2 * 256 * 8]
- movzx TEMP, TMP2L
- shr TMP2, 8
- pxor CRC2, [TABLE + TEMP * 8 + 2 * 256 * 8]
- movzx TEMP, TMP3L
- shr TMP3, 8
- pxor CRC3, [TABLE + TEMP * 8 + 2 * 256 * 8]
-
- pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8]
- movd TMP0, BUF0
- pxor CRC1, [TABLE + TMP1 * 8 + 3 * 256 * 8]
- movd TMP1, BUF1
- pxor CRC2, [TABLE + TMP2 * 8 + 3 * 256 * 8]
- movd TMP2, BUF2
- pxor CRC3, [TABLE + TMP3 * 8 + 3 * 256 * 8]
- movd TMP3, BUF3
-
- movzx TEMP, TMP0L
- pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8]
- movzx TEMP, TMP1L
- pxor CRC1, [TABLE + TEMP * 8 + 4 * 256 * 8]
- movzx TEMP, TMP2L
- pxor CRC2, [TABLE + TEMP * 8 + 4 * 256 * 8]
- movzx TEMP, TMP3L
- pxor CRC3, [TABLE + TEMP * 8 + 4 * 256 * 8]
-
- movzx TEMP, TMP0H
- shr TMP0, 16
- pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8]
- movzx TEMP, TMP1H
- shr TMP1, 16
- pxor CRC1, [TABLE + TEMP * 8 + 5 * 256 * 8]
- movzx TEMP, TMP2H
- shr TMP2, 16
- pxor CRC2, [TABLE + TEMP * 8 + 5 * 256 * 8]
- movzx TEMP, TMP3H
- shr TMP3, 16
- pxor CRC3, [TABLE + TEMP * 8 + 5 * 256 * 8]
-
- movzx TEMP, TMP0L
- shr TMP0, 8
- pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8]
- movzx TEMP, TMP1L
- shr TMP1, 8
- pxor CRC1, [TABLE + TEMP * 8 + 6 * 256 * 8]
- movzx TEMP, TMP2L
- shr TMP2, 8
- pxor CRC2, [TABLE + TEMP * 8 + 6 * 256 * 8]
- movzx TEMP, TMP3L
- shr TMP3, 8
- pxor CRC3, [TABLE + TEMP * 8 + 6 * 256 * 8]
-
- pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8]
- movq BUF0, [SRC]
- pxor CRC1, [TABLE + TMP1 * 8 + 7 * 256 * 8]
- movq BUF1, [SRC + 1 * 8]
- pxor CRC2, [TABLE + TMP2 * 8 + 7 * 256 * 8]
- movq BUF2, [SRC + 2 * 8]
- pxor CRC3, [TABLE + TMP3 * 8 + 7 * 256 * 8]
- movq BUF3, [SRC + 3 * 8]
-
- cmp END, SRC
- ja main_loop
-
-#undef END
-#define END TMP1
- pop END
- pop TABLE
- add SRC, 32
-
- CRC_WORD_MMX()
-
- pxor BUF1, CRC1
- movq BUF0, BUF1
- CRC_WORD_MMX()
-
- pxor BUF2, CRC2
- movq BUF0, BUF2
- CRC_WORD_MMX()
-
- pxor BUF3, CRC3
- movq BUF0, BUF3
- CRC_WORD_MMX()
-
- end_main_loop:
- add END, 2*4*8 - 8
- cmp SRC, END
- jae end_word_loop
-
- word_loop:
- movq BUF0, [SRC]
- add SRC, 8
- CRC_WORD_MMX()
- cmp END, SRC
- ja word_loop
- end_word_loop:
-
-#if 0 // Plain C version is faster?
- add END, 7
- cmp SRC, END
- jae end_byte_loop
-
- byte_loop:
- movd TMP0, CRC0
- movzx TEMP, byte ptr [SRC]
- movzx TMP0, TMP0L
- psrlq CRC0, 8
- xor TEMP, TMP0
- add SRC, 1
- pxor CRC0, [TABLE + TEMP*8 + 7*256*8]
- cmp END, SRC
- ja byte_loop
- end_byte_loop:
-#endif
-
- pop ebp
-
- mov src, SRC
- movq crc0, CRC0
-
- emms
- }
-
-#if 1
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc0, *src);
- }
-#endif
-
- return (crc0 ^ this->Base().Canonize());
-}
-
-
-} // namespace crcutil
-
-#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER)
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements 64-bit multiword CRC for Microsoft and Intel compilers
+// using MMX instructions (i386).
+
+#include "generic_crc.h"
+
+#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER)
+
+namespace crcutil {
+
+#define CRC_WORD_MMX() \
+ __asm pxor BUF0, CRC0 \
+ __asm movd TMP0, BUF0 \
+ __asm psrlq BUF0, 32 \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm movq CRC0, [TABLE + TEMP * 8] \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8] \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8] \
+ __asm pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8] \
+ __asm movd TMP0, BUF0 \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8] \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8] \
+ __asm movzx TEMP, TMP0L \
+ __asm shr TMP0, 8 \
+ __asm pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8] \
+ __asm pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8]
+
+// frame pointer register 'ebp' modified by inline assembly code
+#pragma warning(disable: 4731)
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data,
+ size_t bytes,
+ const uint64 &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ uint64 crc0 = start ^ this->Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
+ if (src >= end) {
+ return (crc0 ^ this->Base().Canonize());
+ }
+
+#define CRC0 mm0
+#define CRC1 mm1
+#define CRC2 mm2
+#define CRC3 mm3
+#define BUF0 mm4
+#define BUF1 mm5
+#define BUF2 mm6
+#define BUF3 mm7
+#define TMP0 eax
+#define TMP0L al
+#define TMP0H ah
+#define TMP1 ebx
+#define TMP1L bl
+#define TMP1H bh
+#define TMP2 ecx
+#define TMP2L cl
+#define TMP2H ch
+#define TMP3 edx
+#define TMP3L dl
+#define TMP3H dh
+#define TEMP edi
+#define SRC esi
+#define END [esp]
+#define TABLE ebp
+
+
+ const uint64 *interleaved_table_address =
+ &this->crc_word_interleaved_[0][0];
+ const uint64 *word_table_address = &this->crc_word_[0][0];
+
+ __asm {
+ push ebp
+
+ mov TMP0, interleaved_table_address
+
+ movq CRC0, crc0
+ mov SRC, src
+ mov TMP1, end
+ sub TMP1, 2*4*8 - 1
+ cmp SRC, TMP1
+ mov TABLE, word_table_address
+ jae end_main_loop
+
+ push TABLE
+ mov TABLE, TMP0
+ push TMP1
+
+ pxor CRC1, CRC1
+ pxor CRC2, CRC2
+ pxor CRC3, CRC3
+
+ movq BUF0, [SRC]
+ movq BUF1, [SRC + 1 * 8]
+ movq BUF2, [SRC + 2 * 8]
+ movq BUF3, [SRC + 3 * 8]
+
+ main_loop:
+#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+ prefetcht0 [SRC + CRCUTIL_PREFETCH_WIDTH]
+#endif
+ add SRC, 32
+ pxor BUF0, CRC0
+ pxor BUF1, CRC1
+ pxor BUF2, CRC2
+ pxor BUF3, CRC3
+
+ movd TMP0, BUF0
+ psrlq BUF0, 32
+ movd TMP1, BUF1
+ psrlq BUF1, 32
+ movd TMP2, BUF2
+ psrlq BUF2, 32
+ movd TMP3, BUF3
+ psrlq BUF3, 32
+
+ movzx TEMP, TMP0L
+ movq CRC0, [TABLE + TEMP * 8]
+ movzx TEMP, TMP1L
+ movq CRC1, [TABLE + TEMP * 8]
+ movzx TEMP, TMP2L
+ movq CRC2, [TABLE + TEMP * 8]
+ movzx TEMP, TMP3L
+ movq CRC3, [TABLE + TEMP * 8]
+
+ movzx TEMP, TMP0H
+ shr TMP0, 16
+ pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8]
+ movzx TEMP, TMP1H
+ shr TMP1, 16
+ pxor CRC1, [TABLE + TEMP * 8 + 1 * 256 * 8]
+ movzx TEMP, TMP2H
+ shr TMP2, 16
+ pxor CRC2, [TABLE + TEMP * 8 + 1 * 256 * 8]
+ movzx TEMP, TMP3H
+ shr TMP3, 16
+ pxor CRC3, [TABLE + TEMP * 8 + 1 * 256 * 8]
+
+ movzx TEMP, TMP0L
+ shr TMP0, 8
+ pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8]
+ movzx TEMP, TMP1L
+ shr TMP1, 8
+ pxor CRC1, [TABLE + TEMP * 8 + 2 * 256 * 8]
+ movzx TEMP, TMP2L
+ shr TMP2, 8
+ pxor CRC2, [TABLE + TEMP * 8 + 2 * 256 * 8]
+ movzx TEMP, TMP3L
+ shr TMP3, 8
+ pxor CRC3, [TABLE + TEMP * 8 + 2 * 256 * 8]
+
+ pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8]
+ movd TMP0, BUF0
+ pxor CRC1, [TABLE + TMP1 * 8 + 3 * 256 * 8]
+ movd TMP1, BUF1
+ pxor CRC2, [TABLE + TMP2 * 8 + 3 * 256 * 8]
+ movd TMP2, BUF2
+ pxor CRC3, [TABLE + TMP3 * 8 + 3 * 256 * 8]
+ movd TMP3, BUF3
+
+ movzx TEMP, TMP0L
+ pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8]
+ movzx TEMP, TMP1L
+ pxor CRC1, [TABLE + TEMP * 8 + 4 * 256 * 8]
+ movzx TEMP, TMP2L
+ pxor CRC2, [TABLE + TEMP * 8 + 4 * 256 * 8]
+ movzx TEMP, TMP3L
+ pxor CRC3, [TABLE + TEMP * 8 + 4 * 256 * 8]
+
+ movzx TEMP, TMP0H
+ shr TMP0, 16
+ pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8]
+ movzx TEMP, TMP1H
+ shr TMP1, 16
+ pxor CRC1, [TABLE + TEMP * 8 + 5 * 256 * 8]
+ movzx TEMP, TMP2H
+ shr TMP2, 16
+ pxor CRC2, [TABLE + TEMP * 8 + 5 * 256 * 8]
+ movzx TEMP, TMP3H
+ shr TMP3, 16
+ pxor CRC3, [TABLE + TEMP * 8 + 5 * 256 * 8]
+
+ movzx TEMP, TMP0L
+ shr TMP0, 8
+ pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8]
+ movzx TEMP, TMP1L
+ shr TMP1, 8
+ pxor CRC1, [TABLE + TEMP * 8 + 6 * 256 * 8]
+ movzx TEMP, TMP2L
+ shr TMP2, 8
+ pxor CRC2, [TABLE + TEMP * 8 + 6 * 256 * 8]
+ movzx TEMP, TMP3L
+ shr TMP3, 8
+ pxor CRC3, [TABLE + TEMP * 8 + 6 * 256 * 8]
+
+ pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8]
+ movq BUF0, [SRC]
+ pxor CRC1, [TABLE + TMP1 * 8 + 7 * 256 * 8]
+ movq BUF1, [SRC + 1 * 8]
+ pxor CRC2, [TABLE + TMP2 * 8 + 7 * 256 * 8]
+ movq BUF2, [SRC + 2 * 8]
+ pxor CRC3, [TABLE + TMP3 * 8 + 7 * 256 * 8]
+ movq BUF3, [SRC + 3 * 8]
+
+ cmp END, SRC
+ ja main_loop
+
+#undef END
+#define END TMP1
+ pop END
+ pop TABLE
+ add SRC, 32
+
+ CRC_WORD_MMX()
+
+ pxor BUF1, CRC1
+ movq BUF0, BUF1
+ CRC_WORD_MMX()
+
+ pxor BUF2, CRC2
+ movq BUF0, BUF2
+ CRC_WORD_MMX()
+
+ pxor BUF3, CRC3
+ movq BUF0, BUF3
+ CRC_WORD_MMX()
+
+ end_main_loop:
+ add END, 2*4*8 - 8
+ cmp SRC, END
+ jae end_word_loop
+
+ word_loop:
+ movq BUF0, [SRC]
+ add SRC, 8
+ CRC_WORD_MMX()
+ cmp END, SRC
+ ja word_loop
+ end_word_loop:
+
+#if 0 // Plain C version is faster?
+ add END, 7
+ cmp SRC, END
+ jae end_byte_loop
+
+ byte_loop:
+ movd TMP0, CRC0
+ movzx TEMP, byte ptr [SRC]
+ movzx TMP0, TMP0L
+ psrlq CRC0, 8
+ xor TEMP, TMP0
+ add SRC, 1
+ pxor CRC0, [TABLE + TEMP*8 + 7*256*8]
+ cmp END, SRC
+ ja byte_loop
+ end_byte_loop:
+#endif
+
+ pop ebp
+
+ mov src, SRC
+ movq crc0, CRC0
+
+ emms
+ }
+
+#if 1
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc0, *src);
+ }
+#endif
+
+ return (crc0 ^ this->Base().Canonize());
+}
+
+
+} // namespace crcutil
+
+#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER)
diff --git a/contrib/libs/crcutil/multiword_64_64_gcc_amd64_asm.cc b/contrib/libs/crcutil/multiword_64_64_gcc_amd64_asm.cc
index 36630fd1ac..1a64b85c5e 100644
--- a/contrib/libs/crcutil/multiword_64_64_gcc_amd64_asm.cc
+++ b/contrib/libs/crcutil/multiword_64_64_gcc_amd64_asm.cc
@@ -1,298 +1,298 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements multiword CRC for GCC on AMD64.
-//
-// Accoding to "Software Optimization Guide for AMD Family 10h Processors"
-// http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/40546.pdf
-// instead of
-// movzbq %al, %rsi
-// shrq $8, %rax
-// [use %rsi]
-// movzbq %al, %rsi
-// shrq $8, %rax
-// [use %rsi]
-// it is better to use 32-bit registers
-// (high 32 bits will be cleared on assignment), i.e.
-// movzbl %al, %esi
-// [use %rsi]
-// movzbl %ah, %esi
-// shrq $16, %rax
-// [use %rsi]
-// Makes instructions shorter and removes one shift
-// (the latter is not such a big deal as it's execution time
-// is nicely masked by [use %rsi] instruction).
-//
-// Performance difference:
-// About 10% degradation on bytes = 8 .. 16
-// (clobbering registers that should be saved)
-// Break even at 32 bytes.
-// 3% improvement starting from 64 bytes.
-
-#include "generic_crc.h"
-
-#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64
-
-namespace crcutil {
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordGccAmd64(
- const void *data, size_t bytes, const uint64 &start) const;
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data,
- size_t bytes,
- const uint64 &start) const {
- if (bytes <= 6 * sizeof(Word) - 1) {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint64 crc = start ^ this->Base().Canonize();
- const uint8 *end = src + bytes;
-#define PROCESS_ONE_WORD() do { \
- Word buf = reinterpret_cast<const Word *>(src)[0]; \
- CRC_WORD(this, crc, buf); \
- src += sizeof(Word); \
-} while (0)
- if (bytes >= 1 * sizeof(Word)) {
- PROCESS_ONE_WORD();
- if (bytes >= 2 * sizeof(Word)) {
- PROCESS_ONE_WORD();
- if (bytes >= 3 * sizeof(Word)) {
- PROCESS_ONE_WORD();
- if (bytes >= 4 * sizeof(Word)) {
- PROCESS_ONE_WORD();
- if (bytes >= 5 * sizeof(Word)) {
- PROCESS_ONE_WORD();
- }
- }
- }
- }
- }
- for (; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ this->Base().Canonize());
- }
- return this->CrcMultiwordGccAmd64(data, bytes, start);
-}
-
-#define TMP0 "%%rsi"
-#define TMP0W "%%esi"
-
-#define BUF0 "%%rax"
-#define BUF0L "%%al"
-#define BUF0H "%%ah"
-
-#define BUF1 "%%rbx"
-#define BUF1L "%%bl"
-#define BUF1H "%%bh"
-
-#define BUF2 "%%rcx"
-#define BUF2L "%%cl"
-#define BUF2H "%%ch"
-
-#define BUF3 "%%rdx"
-#define BUF3L "%%dl"
-#define BUF3H "%%dh"
-
-#define CRC_WORD_ASM() \
- "xorq %[crc0], " BUF0 "\n" \
- "movzbq " BUF0L ", " TMP0 "\n" \
- "movq (%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF0H ", " TMP0W "\n" \
- "shrq $16, " BUF0 "\n" \
- "xorq 1*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbq " BUF0L ", " TMP0 "\n" \
- "xorq 2*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF0H ", " TMP0W "\n" \
- "shrq $16, " BUF0 "\n" \
- "xorq 3*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbq " BUF0L ", " TMP0 "\n" \
- "xorq 4*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF0H ", " TMP0W "\n" \
- "shrq $16, " BUF0 "\n" \
- "xorq 5*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbq " BUF0L ", " TMP0 "\n" \
- "xorq 6*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF0H ", " TMP0W "\n" \
- "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n"
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordGccAmd64(
- const void *data, size_t bytes, const uint64 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- uint64 crc0 = start ^ this->Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
- if (src >= end) {
- return (crc0 ^ this->Base().Canonize());
- }
-
- uint64 crc1;
- uint64 crc2;
- uint64 crc3;
-
- asm(
- "sub $2*4*8 - 1, %[end]\n"
- "cmpq %[src], %[end]\n"
- "jbe 2f\n"
- "xorq %[crc1], %[crc1]\n"
- "movq (%[src]), " BUF0 "\n"
- "movq 1*8(%[src]), " BUF1 "\n"
- "movq 2*8(%[src]), " BUF2 "\n"
- "movq 3*8(%[src]), " BUF3 "\n"
- "movq %[crc1], %[crc2]\n"
- "movq %[crc1], %[crc3]\n"
-
- "1:\n"
-#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
- "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
-#endif // HAVE_SSE
- "add $4*8, %[src]\n"
-
- // Set buffer data.
- "xorq %[crc0], " BUF0 "\n"
- "xorq %[crc1], " BUF1 "\n"
- "xorq %[crc2], " BUF2 "\n"
- "xorq %[crc3], " BUF3 "\n"
-
- // LOAD crc of byte 0 and shift buffers.
- "movzbl " BUF0L ", " TMP0W "\n"
- "movq (%[table], " TMP0 ", 8), %[crc0]\n"
- "movzbl " BUF1L ", " TMP0W "\n"
- "movq (%[table], " TMP0 ", 8), %[crc1]\n"
- "movzbl " BUF2L ", " TMP0W "\n"
- "movq (%[table], " TMP0 ", 8), %[crc2]\n"
- "movzbl " BUF3L ", " TMP0W "\n"
- "movq (%[table], " TMP0 ", 8), %[crc3]\n"
-
-#define XOR1(byte1) \
- "movzbl " BUF0L ", " TMP0W "\n" \
- "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF1L ", " TMP0W "\n" \
- "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \
- "movzbl " BUF2L ", " TMP0W "\n" \
- "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \
- "movzbl " BUF3L ", " TMP0W "\n" \
- "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
-
-#define XOR2(byte2) \
- "movzbl " BUF0H ", " TMP0W "\n" \
- "shrq $16, " BUF0 "\n" \
- "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \
- "movzbl " BUF1H ", " TMP0W "\n" \
- "shrq $16, " BUF1 "\n" \
- "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \
- "movzbl " BUF2H ", " TMP0W "\n" \
- "shrq $16, " BUF2 "\n" \
- "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \
- "movzbl " BUF3H ", " TMP0W "\n" \
- "shrq $16, " BUF3 "\n" \
- "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
-
- XOR2(1)
- XOR1(2)
- XOR2(3)
- XOR1(4)
- XOR2(5)
- XOR1(6)
-
- // Update CRC registers and load buffers.
- "movzbl " BUF0H ", " TMP0W "\n"
- "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc0]\n"
- "movq (%[src]), " BUF0 "\n"
- "movzbl " BUF1H ", " TMP0W "\n"
- "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc1]\n"
- "movq 1*8(%[src]), " BUF1 "\n"
- "movzbl " BUF2H ", " TMP0W "\n"
- "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc2]\n"
- "movq 2*8(%[src]), " BUF2 "\n"
- "movzbl " BUF3H ", " TMP0W "\n"
- "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
- "movq 3*8(%[src]), " BUF3 "\n"
-
- "cmpq %[src], %[end]\n"
- "ja 1b\n"
-
- CRC_WORD_ASM()
-
- "xorq %[crc1], " BUF1 "\n"
- "movq " BUF1 ", " BUF0 "\n"
- CRC_WORD_ASM()
-
- "xorq %[crc2], " BUF2 "\n"
- "movq " BUF2 ", " BUF0 "\n"
- CRC_WORD_ASM()
-
- "xorq %[crc3], " BUF3 "\n"
- "movq " BUF3 ", " BUF0 "\n"
- CRC_WORD_ASM()
-
- "add $4*8, %[src]\n"
-
- "2:\n"
- "add $2*4*8 - 8, %[end]\n"
- "cmpq %[src], %[end]\n"
- "jbe 4f\n"
-
- "3:\n"
- "movq (%[src]), " BUF0 "\n"
- "add $8, %[src]\n"
- CRC_WORD_ASM()
- "cmpq %[src], %[end]\n"
- "ja 3b\n"
-
- "4:\n"
- "add $7, %[end]\n"
-
- "cmpq %[src], %[end]\n"
- "jbe 6f\n"
-
- "5:\n"
- "movzbq (%[src]), " BUF0 "\n"
- "movzbq %b[crc0], " TMP0 "\n"
- "shrq $8, %[crc0]\n"
- "xorq " BUF0 ", " TMP0 "\n"
- "add $1, %[src]\n"
- "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n"
- "cmpq %[src], %[end]\n"
- "ja 5b\n"
-
- "6:\n"
-
-
- : // outputs
- [src] "+r" (src),
- [end] "+r" (end),
- [crc0] "+r" (crc0),
- [crc1] "=&r" (crc1),
- [crc2] "=&r" (crc2),
- [crc3] "=&r" (crc3)
-
- : // inputs
- [table] "r" (&this->crc_word_interleaved_[0][0]),
- [table_word] "r" (&this->crc_word_[0][0])
-
- : // clobbers
- "%rax", // BUF0
- "%rbx", // BUF1
- "%rcx", // BUF2
- "%rdx", // BUF3
- "%rsi" // TMP0
- );
-
- return (crc0 ^ this->Base().Canonize());
-}
-
-} // namespace crcutil
-
-#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements multiword CRC for GCC on AMD64.
+//
+// Accoding to "Software Optimization Guide for AMD Family 10h Processors"
+// http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/40546.pdf
+// instead of
+// movzbq %al, %rsi
+// shrq $8, %rax
+// [use %rsi]
+// movzbq %al, %rsi
+// shrq $8, %rax
+// [use %rsi]
+// it is better to use 32-bit registers
+// (high 32 bits will be cleared on assignment), i.e.
+// movzbl %al, %esi
+// [use %rsi]
+// movzbl %ah, %esi
+// shrq $16, %rax
+// [use %rsi]
+// Makes instructions shorter and removes one shift
+// (the latter is not such a big deal as it's execution time
+// is nicely masked by [use %rsi] instruction).
+//
+// Performance difference:
+// About 10% degradation on bytes = 8 .. 16
+// (clobbering registers that should be saved)
+// Break even at 32 bytes.
+// 3% improvement starting from 64 bytes.
+
+#include "generic_crc.h"
+
+#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64
+
+namespace crcutil {
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordGccAmd64(
+ const void *data, size_t bytes, const uint64 &start) const;
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
+ const void *data,
+ size_t bytes,
+ const uint64 &start) const {
+ if (bytes <= 6 * sizeof(Word) - 1) {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ uint64 crc = start ^ this->Base().Canonize();
+ const uint8 *end = src + bytes;
+#define PROCESS_ONE_WORD() do { \
+ Word buf = reinterpret_cast<const Word *>(src)[0]; \
+ CRC_WORD(this, crc, buf); \
+ src += sizeof(Word); \
+} while (0)
+ if (bytes >= 1 * sizeof(Word)) {
+ PROCESS_ONE_WORD();
+ if (bytes >= 2 * sizeof(Word)) {
+ PROCESS_ONE_WORD();
+ if (bytes >= 3 * sizeof(Word)) {
+ PROCESS_ONE_WORD();
+ if (bytes >= 4 * sizeof(Word)) {
+ PROCESS_ONE_WORD();
+ if (bytes >= 5 * sizeof(Word)) {
+ PROCESS_ONE_WORD();
+ }
+ }
+ }
+ }
+ }
+ for (; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ this->Base().Canonize());
+ }
+ return this->CrcMultiwordGccAmd64(data, bytes, start);
+}
+
+#define TMP0 "%%rsi"
+#define TMP0W "%%esi"
+
+#define BUF0 "%%rax"
+#define BUF0L "%%al"
+#define BUF0H "%%ah"
+
+#define BUF1 "%%rbx"
+#define BUF1L "%%bl"
+#define BUF1H "%%bh"
+
+#define BUF2 "%%rcx"
+#define BUF2L "%%cl"
+#define BUF2H "%%ch"
+
+#define BUF3 "%%rdx"
+#define BUF3L "%%dl"
+#define BUF3H "%%dh"
+
+#define CRC_WORD_ASM() \
+ "xorq %[crc0], " BUF0 "\n" \
+ "movzbq " BUF0L ", " TMP0 "\n" \
+ "movq (%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF0H ", " TMP0W "\n" \
+ "shrq $16, " BUF0 "\n" \
+ "xorq 1*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbq " BUF0L ", " TMP0 "\n" \
+ "xorq 2*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF0H ", " TMP0W "\n" \
+ "shrq $16, " BUF0 "\n" \
+ "xorq 3*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbq " BUF0L ", " TMP0 "\n" \
+ "xorq 4*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF0H ", " TMP0W "\n" \
+ "shrq $16, " BUF0 "\n" \
+ "xorq 5*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbq " BUF0L ", " TMP0 "\n" \
+ "xorq 6*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF0H ", " TMP0W "\n" \
+ "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n"
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordGccAmd64(
+ const void *data, size_t bytes, const uint64 &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ uint64 crc0 = start ^ this->Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
+ if (src >= end) {
+ return (crc0 ^ this->Base().Canonize());
+ }
+
+ uint64 crc1;
+ uint64 crc2;
+ uint64 crc3;
+
+ asm(
+ "sub $2*4*8 - 1, %[end]\n"
+ "cmpq %[src], %[end]\n"
+ "jbe 2f\n"
+ "xorq %[crc1], %[crc1]\n"
+ "movq (%[src]), " BUF0 "\n"
+ "movq 1*8(%[src]), " BUF1 "\n"
+ "movq 2*8(%[src]), " BUF2 "\n"
+ "movq 3*8(%[src]), " BUF3 "\n"
+ "movq %[crc1], %[crc2]\n"
+ "movq %[crc1], %[crc3]\n"
+
+ "1:\n"
+#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+ "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
+#endif // HAVE_SSE
+ "add $4*8, %[src]\n"
+
+ // Set buffer data.
+ "xorq %[crc0], " BUF0 "\n"
+ "xorq %[crc1], " BUF1 "\n"
+ "xorq %[crc2], " BUF2 "\n"
+ "xorq %[crc3], " BUF3 "\n"
+
+ // LOAD crc of byte 0 and shift buffers.
+ "movzbl " BUF0L ", " TMP0W "\n"
+ "movq (%[table], " TMP0 ", 8), %[crc0]\n"
+ "movzbl " BUF1L ", " TMP0W "\n"
+ "movq (%[table], " TMP0 ", 8), %[crc1]\n"
+ "movzbl " BUF2L ", " TMP0W "\n"
+ "movq (%[table], " TMP0 ", 8), %[crc2]\n"
+ "movzbl " BUF3L ", " TMP0W "\n"
+ "movq (%[table], " TMP0 ", 8), %[crc3]\n"
+
+#define XOR1(byte1) \
+ "movzbl " BUF0L ", " TMP0W "\n" \
+ "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF1L ", " TMP0W "\n" \
+ "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \
+ "movzbl " BUF2L ", " TMP0W "\n" \
+ "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \
+ "movzbl " BUF3L ", " TMP0W "\n" \
+ "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
+
+#define XOR2(byte2) \
+ "movzbl " BUF0H ", " TMP0W "\n" \
+ "shrq $16, " BUF0 "\n" \
+ "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \
+ "movzbl " BUF1H ", " TMP0W "\n" \
+ "shrq $16, " BUF1 "\n" \
+ "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \
+ "movzbl " BUF2H ", " TMP0W "\n" \
+ "shrq $16, " BUF2 "\n" \
+ "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \
+ "movzbl " BUF3H ", " TMP0W "\n" \
+ "shrq $16, " BUF3 "\n" \
+ "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
+
+ XOR2(1)
+ XOR1(2)
+ XOR2(3)
+ XOR1(4)
+ XOR2(5)
+ XOR1(6)
+
+ // Update CRC registers and load buffers.
+ "movzbl " BUF0H ", " TMP0W "\n"
+ "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc0]\n"
+ "movq (%[src]), " BUF0 "\n"
+ "movzbl " BUF1H ", " TMP0W "\n"
+ "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc1]\n"
+ "movq 1*8(%[src]), " BUF1 "\n"
+ "movzbl " BUF2H ", " TMP0W "\n"
+ "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc2]\n"
+ "movq 2*8(%[src]), " BUF2 "\n"
+ "movzbl " BUF3H ", " TMP0W "\n"
+ "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc3]\n"
+ "movq 3*8(%[src]), " BUF3 "\n"
+
+ "cmpq %[src], %[end]\n"
+ "ja 1b\n"
+
+ CRC_WORD_ASM()
+
+ "xorq %[crc1], " BUF1 "\n"
+ "movq " BUF1 ", " BUF0 "\n"
+ CRC_WORD_ASM()
+
+ "xorq %[crc2], " BUF2 "\n"
+ "movq " BUF2 ", " BUF0 "\n"
+ CRC_WORD_ASM()
+
+ "xorq %[crc3], " BUF3 "\n"
+ "movq " BUF3 ", " BUF0 "\n"
+ CRC_WORD_ASM()
+
+ "add $4*8, %[src]\n"
+
+ "2:\n"
+ "add $2*4*8 - 8, %[end]\n"
+ "cmpq %[src], %[end]\n"
+ "jbe 4f\n"
+
+ "3:\n"
+ "movq (%[src]), " BUF0 "\n"
+ "add $8, %[src]\n"
+ CRC_WORD_ASM()
+ "cmpq %[src], %[end]\n"
+ "ja 3b\n"
+
+ "4:\n"
+ "add $7, %[end]\n"
+
+ "cmpq %[src], %[end]\n"
+ "jbe 6f\n"
+
+ "5:\n"
+ "movzbq (%[src]), " BUF0 "\n"
+ "movzbq %b[crc0], " TMP0 "\n"
+ "shrq $8, %[crc0]\n"
+ "xorq " BUF0 ", " TMP0 "\n"
+ "add $1, %[src]\n"
+ "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n"
+ "cmpq %[src], %[end]\n"
+ "ja 5b\n"
+
+ "6:\n"
+
+
+ : // outputs
+ [src] "+r" (src),
+ [end] "+r" (end),
+ [crc0] "+r" (crc0),
+ [crc1] "=&r" (crc1),
+ [crc2] "=&r" (crc2),
+ [crc3] "=&r" (crc3)
+
+ : // inputs
+ [table] "r" (&this->crc_word_interleaved_[0][0]),
+ [table_word] "r" (&this->crc_word_[0][0])
+
+ : // clobbers
+ "%rax", // BUF0
+ "%rbx", // BUF1
+ "%rcx", // BUF2
+ "%rdx", // BUF3
+ "%rsi" // TMP0
+ );
+
+ return (crc0 ^ this->Base().Canonize());
+}
+
+} // namespace crcutil
+
+#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM
diff --git a/contrib/libs/crcutil/multiword_64_64_gcc_i386_mmx.cc b/contrib/libs/crcutil/multiword_64_64_gcc_i386_mmx.cc
index 6bd4ead5bd..2bb6e59f3b 100644
--- a/contrib/libs/crcutil/multiword_64_64_gcc_i386_mmx.cc
+++ b/contrib/libs/crcutil/multiword_64_64_gcc_i386_mmx.cc
@@ -1,261 +1,261 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements multiword CRC for GCC on i386.
-
-#include "generic_crc.h"
-
-#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
-
-namespace crcutil {
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start)
- const GCC_OMIT_FRAME_POINTER;
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data, size_t bytes, const uint64 &start) const {
- if (bytes <= 7) {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint64 crc = start ^ this->Base().Canonize();
- for (const uint8 *end = src + bytes; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ this->Base().Canonize());
- }
- return CrcMultiwordI386Mmx(data, bytes, start);
-}
-
-#define CRC_WORD_MMX() \
- "pxor %[crc0], %[buf0]\n" \
- "movd %[buf0], %[tmp0]\n" \
- "psrlq $32, %[buf0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "movq (%[table], %[temp], 8), %[crc0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor 1*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor 2*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n" \
- "movd %[buf0], %[tmp0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor 4*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor 5*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor 6*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n"
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- uint64 crc0 = start ^ this->Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
- if (src >= end) {
- return (crc0 ^ this->Base().Canonize());
- }
-
- uint64 crc1;
- uint64 crc2;
- uint64 crc3;
-
- uint64 buf0;
- uint64 buf1;
- uint64 buf2;
- uint64 buf3;
-
- uint32 tmp0;
- uint32 tmp1;
- uint32 tmp2;
- uint32 tmp3;
-
- uint32 temp;
-
- void *table_ptr;
- const uint64 *table_interleaved = &this->crc_word_interleaved_[0][0];
- const uint64 *table_word = &this->crc_word_[0][0];
-
- asm(
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements multiword CRC for GCC on i386.
+
+#include "generic_crc.h"
+
+#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
+
+namespace crcutil {
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data, size_t bytes, const uint64 &start)
+ const GCC_OMIT_FRAME_POINTER;
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
+ const void *data, size_t bytes, const uint64 &start) const {
+ if (bytes <= 7) {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ uint64 crc = start ^ this->Base().Canonize();
+ for (const uint8 *end = src + bytes; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ this->Base().Canonize());
+ }
+ return CrcMultiwordI386Mmx(data, bytes, start);
+}
+
+#define CRC_WORD_MMX() \
+ "pxor %[crc0], %[buf0]\n" \
+ "movd %[buf0], %[tmp0]\n" \
+ "psrlq $32, %[buf0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "movq (%[table], %[temp], 8), %[crc0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor 1*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor 2*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n" \
+ "movd %[buf0], %[tmp0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor 4*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor 5*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor 6*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n"
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data, size_t bytes, const uint64 &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ uint64 crc0 = start ^ this->Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64);
+ if (src >= end) {
+ return (crc0 ^ this->Base().Canonize());
+ }
+
+ uint64 crc1;
+ uint64 crc2;
+ uint64 crc3;
+
+ uint64 buf0;
+ uint64 buf1;
+ uint64 buf2;
+ uint64 buf3;
+
+ uint32 tmp0;
+ uint32 tmp1;
+ uint32 tmp2;
+ uint32 tmp3;
+
+ uint32 temp;
+
+ void *table_ptr;
+ const uint64 *table_interleaved = &this->crc_word_interleaved_[0][0];
+ const uint64 *table_word = &this->crc_word_[0][0];
+
+ asm(
"subl $2*4*8 - 1, %[end]\n"
- "cmpl %[src], %[end]\n"
- "jbe 2f\n"
-
- "pxor %[crc1], %[crc1]\n"
- "pxor %[crc2], %[crc2]\n"
- "pxor %[crc3], %[crc3]\n"
- "movq (%[src]), %[buf0]\n"
- "movq 1*8(%[src]), %[buf1]\n"
- "movq 2*8(%[src]), %[buf2]\n"
- "movq 3*8(%[src]), %[buf3]\n"
-
- "movl %[table_interleaved], %[table]\n"
- "1:\n"
-#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
- "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
-#endif
- "addl $0x20, %[src]\n"
- "pxor %[crc0], %[buf0]\n"
- "pxor %[crc1], %[buf1]\n"
- "pxor %[crc2], %[buf2]\n"
- "pxor %[crc3], %[buf3]\n"
-
- "movd %[buf0], %[tmp0]\n"
- "psrlq $32, %[buf0]\n"
- "movd %[buf1], %[tmp1]\n"
- "psrlq $32, %[buf1]\n"
- "movd %[buf2], %[tmp2]\n"
- "psrlq $32, %[buf2]\n"
- "movd %[buf3], %[tmp3]\n"
- "psrlq $32, %[buf3]\n"
-
- "movzbl %b[tmp0], %[temp]\n"
- "shrl $8, %[tmp0]\n"
- "movq (%[table], %[temp], 8), %[crc0]\n"
- "movzbl %b[tmp1], %[temp]\n"
- "shrl $8, %[tmp1]\n"
- "movq (%[table], %[temp], 8), %[crc1]\n"
- "movzbl %b[tmp2], %[temp]\n"
- "shrl $8, %[tmp2]\n"
- "movq (%[table], %[temp], 8), %[crc2]\n"
- "movzbl %b[tmp3], %[temp]\n"
- "shrl $8, %[tmp3]\n"
- "movq (%[table], %[temp], 8), %[crc3]\n"
-
-#define XOR(byte) \
- "movzbl %b[tmp0], %[temp]\n" \
- "shrl $8, %[tmp0]\n" \
- "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc0]\n" \
- "movzbl %b[tmp1], %[temp]\n" \
- "shrl $8, %[tmp1]\n" \
- "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc1]\n" \
- "movzbl %b[tmp2], %[temp]\n" \
- "shrl $8, %[tmp2]\n" \
- "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc2]\n" \
- "movzbl %b[tmp3], %[temp]\n" \
- "shrl $8, %[tmp3]\n" \
- "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc3]\n"
-
- XOR(1)
- XOR(2)
-
- "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n"
- "movd %[buf0], %[tmp0]\n"
- "pxor 3*256*8(%[table], %[tmp1], 8), %[crc1]\n"
- "movd %[buf1], %[tmp1]\n"
- "pxor 3*256*8(%[table], %[tmp2], 8), %[crc2]\n"
- "movd %[buf2], %[tmp2]\n"
- "pxor 3*256*8(%[table], %[tmp3], 8), %[crc3]\n"
- "movd %[buf3], %[tmp3]\n"
-
- XOR(4)
- XOR(5)
- XOR(6)
-
- "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n"
- "movq (%[src]), %[buf0]\n"
- "pxor 7*256*8(%[table], %[tmp1], 8), %[crc1]\n"
- "movq 1*8(%[src]), %[buf1]\n"
- "pxor 7*256*8(%[table], %[tmp2], 8), %[crc2]\n"
- "movq 2*8(%[src]), %[buf2]\n"
- "pxor 7*256*8(%[table], %[tmp3], 8), %[crc3]\n"
- "movq 3*8(%[src]), %[buf3]\n"
- "cmpl %[src], %[end]\n"
- "ja 1b\n"
-#undef XOR
-
- "movl %[table_word], %[table]\n"
- CRC_WORD_MMX()
-
- "pxor %[crc1], %[buf1]\n"
- "movq %[buf1], %[buf0]\n"
- CRC_WORD_MMX()
-
- "pxor %[crc2], %[buf2]\n"
- "movq %[buf2], %[buf0]\n"
- CRC_WORD_MMX()
-
- "pxor %[crc3], %[buf3]\n"
- "movq %[buf3], %[buf0]\n"
- CRC_WORD_MMX()
-
- "add $4*8, %[src]\n"
- "2:\n"
- "movl %[table_word], %[table]\n"
-
+ "cmpl %[src], %[end]\n"
+ "jbe 2f\n"
+
+ "pxor %[crc1], %[crc1]\n"
+ "pxor %[crc2], %[crc2]\n"
+ "pxor %[crc3], %[crc3]\n"
+ "movq (%[src]), %[buf0]\n"
+ "movq 1*8(%[src]), %[buf1]\n"
+ "movq 2*8(%[src]), %[buf2]\n"
+ "movq 3*8(%[src]), %[buf3]\n"
+
+ "movl %[table_interleaved], %[table]\n"
+ "1:\n"
+#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+ "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n"
+#endif
+ "addl $0x20, %[src]\n"
+ "pxor %[crc0], %[buf0]\n"
+ "pxor %[crc1], %[buf1]\n"
+ "pxor %[crc2], %[buf2]\n"
+ "pxor %[crc3], %[buf3]\n"
+
+ "movd %[buf0], %[tmp0]\n"
+ "psrlq $32, %[buf0]\n"
+ "movd %[buf1], %[tmp1]\n"
+ "psrlq $32, %[buf1]\n"
+ "movd %[buf2], %[tmp2]\n"
+ "psrlq $32, %[buf2]\n"
+ "movd %[buf3], %[tmp3]\n"
+ "psrlq $32, %[buf3]\n"
+
+ "movzbl %b[tmp0], %[temp]\n"
+ "shrl $8, %[tmp0]\n"
+ "movq (%[table], %[temp], 8), %[crc0]\n"
+ "movzbl %b[tmp1], %[temp]\n"
+ "shrl $8, %[tmp1]\n"
+ "movq (%[table], %[temp], 8), %[crc1]\n"
+ "movzbl %b[tmp2], %[temp]\n"
+ "shrl $8, %[tmp2]\n"
+ "movq (%[table], %[temp], 8), %[crc2]\n"
+ "movzbl %b[tmp3], %[temp]\n"
+ "shrl $8, %[tmp3]\n"
+ "movq (%[table], %[temp], 8), %[crc3]\n"
+
+#define XOR(byte) \
+ "movzbl %b[tmp0], %[temp]\n" \
+ "shrl $8, %[tmp0]\n" \
+ "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc0]\n" \
+ "movzbl %b[tmp1], %[temp]\n" \
+ "shrl $8, %[tmp1]\n" \
+ "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc1]\n" \
+ "movzbl %b[tmp2], %[temp]\n" \
+ "shrl $8, %[tmp2]\n" \
+ "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc2]\n" \
+ "movzbl %b[tmp3], %[temp]\n" \
+ "shrl $8, %[tmp3]\n" \
+ "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc3]\n"
+
+ XOR(1)
+ XOR(2)
+
+ "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n"
+ "movd %[buf0], %[tmp0]\n"
+ "pxor 3*256*8(%[table], %[tmp1], 8), %[crc1]\n"
+ "movd %[buf1], %[tmp1]\n"
+ "pxor 3*256*8(%[table], %[tmp2], 8), %[crc2]\n"
+ "movd %[buf2], %[tmp2]\n"
+ "pxor 3*256*8(%[table], %[tmp3], 8), %[crc3]\n"
+ "movd %[buf3], %[tmp3]\n"
+
+ XOR(4)
+ XOR(5)
+ XOR(6)
+
+ "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n"
+ "movq (%[src]), %[buf0]\n"
+ "pxor 7*256*8(%[table], %[tmp1], 8), %[crc1]\n"
+ "movq 1*8(%[src]), %[buf1]\n"
+ "pxor 7*256*8(%[table], %[tmp2], 8), %[crc2]\n"
+ "movq 2*8(%[src]), %[buf2]\n"
+ "pxor 7*256*8(%[table], %[tmp3], 8), %[crc3]\n"
+ "movq 3*8(%[src]), %[buf3]\n"
+ "cmpl %[src], %[end]\n"
+ "ja 1b\n"
+#undef XOR
+
+ "movl %[table_word], %[table]\n"
+ CRC_WORD_MMX()
+
+ "pxor %[crc1], %[buf1]\n"
+ "movq %[buf1], %[buf0]\n"
+ CRC_WORD_MMX()
+
+ "pxor %[crc2], %[buf2]\n"
+ "movq %[buf2], %[buf0]\n"
+ CRC_WORD_MMX()
+
+ "pxor %[crc3], %[buf3]\n"
+ "movq %[buf3], %[buf0]\n"
+ CRC_WORD_MMX()
+
+ "add $4*8, %[src]\n"
+ "2:\n"
+ "movl %[table_word], %[table]\n"
+
"addl $2*4*8 - 8, %[end]\n"
- "cmpl %[src], %[end]\n"
- "jbe 4f\n"
- "3:\n"
- "movq (%[src]), %[buf0]\n"
- "addl $0x8, %[src]\n"
- CRC_WORD_MMX()
- "cmpl %[src], %[end]\n"
- "ja 3b\n"
- "4:\n"
+ "cmpl %[src], %[end]\n"
+ "jbe 4f\n"
+ "3:\n"
+ "movq (%[src]), %[buf0]\n"
+ "addl $0x8, %[src]\n"
+ CRC_WORD_MMX()
+ "cmpl %[src], %[end]\n"
+ "ja 3b\n"
+ "4:\n"
"addl $7, %[end]\n"
-
- "cmpl %[src], %[end]\n"
- "jbe 6f\n"
-
- "5:\n"
- "movd %[crc0], %[tmp0]\n"
- "movzbl (%[src]), %[temp]\n"
- "movzbl %b[tmp0], %[tmp0]\n"
- "psrlq $8, %[crc0]\n"
- "xorl %[tmp0], %[temp]\n"
- "add $1, %[src]\n"
- "pxor 7*256*8(%[table], %[temp], 8), %[crc0]\n"
- "cmpl %[src], %[end]\n"
- "ja 5b\n"
-
- "6:\n"
-
- : // outputs
- [src] "+r" (src),
- [end] "+m" (end),
- [crc0] "+y" (crc0),
- [crc1] "=&y" (crc1),
- [crc2] "=&y" (crc2),
- [crc3] "=&y" (crc3),
- [buf0] "=&y" (buf0),
- [buf1] "=&y" (buf1),
- [buf2] "=&y" (buf2),
- [buf3] "=&y" (buf3),
- [tmp0] "=&q" (tmp0),
- [tmp1] "=&q" (tmp1),
- [tmp2] "=&q" (tmp2),
- [tmp3] "=&q" (tmp3),
- [temp] "=&r" (temp),
- [table] "=&r" (table_ptr)
-
- : // inputs
- [table_interleaved] "m" (table_interleaved),
- [table_word] "m" (table_word));
-
- asm volatile("emms");
-
- return (crc0 ^ this->Base().Canonize());
-}
-
-} // namespace crcutil
-
-#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM
+
+ "cmpl %[src], %[end]\n"
+ "jbe 6f\n"
+
+ "5:\n"
+ "movd %[crc0], %[tmp0]\n"
+ "movzbl (%[src]), %[temp]\n"
+ "movzbl %b[tmp0], %[tmp0]\n"
+ "psrlq $8, %[crc0]\n"
+ "xorl %[tmp0], %[temp]\n"
+ "add $1, %[src]\n"
+ "pxor 7*256*8(%[table], %[temp], 8), %[crc0]\n"
+ "cmpl %[src], %[end]\n"
+ "ja 5b\n"
+
+ "6:\n"
+
+ : // outputs
+ [src] "+r" (src),
+ [end] "+m" (end),
+ [crc0] "+y" (crc0),
+ [crc1] "=&y" (crc1),
+ [crc2] "=&y" (crc2),
+ [crc3] "=&y" (crc3),
+ [buf0] "=&y" (buf0),
+ [buf1] "=&y" (buf1),
+ [buf2] "=&y" (buf2),
+ [buf3] "=&y" (buf3),
+ [tmp0] "=&q" (tmp0),
+ [tmp1] "=&q" (tmp1),
+ [tmp2] "=&q" (tmp2),
+ [tmp3] "=&q" (tmp3),
+ [temp] "=&r" (temp),
+ [table] "=&r" (table_ptr)
+
+ : // inputs
+ [table_interleaved] "m" (table_interleaved),
+ [table_word] "m" (table_word));
+
+ asm volatile("emms");
+
+ return (crc0 ^ this->Base().Canonize());
+}
+
+} // namespace crcutil
+
+#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM
diff --git a/contrib/libs/crcutil/multiword_64_64_intrinsic_i386_mmx.cc b/contrib/libs/crcutil/multiword_64_64_intrinsic_i386_mmx.cc
index 302af5a5a4..a37d4d251b 100644
--- a/contrib/libs/crcutil/multiword_64_64_intrinsic_i386_mmx.cc
+++ b/contrib/libs/crcutil/multiword_64_64_intrinsic_i386_mmx.cc
@@ -1,243 +1,243 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements 64-bit multiword CRC using MMX built-in functions.
-
-#include "generic_crc.h"
-
-#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
-
-namespace crcutil {
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start)
- const GCC_OMIT_FRAME_POINTER;
-
-#if !defined(_MSC_VER)
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data,
- size_t bytes,
- const uint64 &start) const {
- if (bytes <= 7) {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint64 crc = start ^ Base().Canonize();
- for (const uint8 *end = src + bytes; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ Base().Canonize());
- }
- return CrcMultiwordI386Mmx(data, bytes, start);
-}
-#else
-#pragma warning(push)
-// CL: uninitialized local variable 'crc1' used
-// Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
-#pragma warning(disable: 4700)
-
-#pragma warning(disable: 4619) // there is no warning number '592'
-
-// ICL: variable "crc1" is used before its value is set
-// Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
-#pragma warning(disable: 592)
-#endif // !defined(_MSC_VER)
-
-#define MM64(adr) reinterpret_cast<const __m64 *>(adr)
-#define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte])
-
-#define CRC_WORD_MMX(this, crc, buf) do { \
- buf = _mm_xor_si64(buf, crc); \
- uint32 tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
- buf = _mm_srli_si64(buf, 32); \
- crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \
- tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
- crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \
-} while (0)
-
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- uint64 crc = start ^ Base().Canonize();
-
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
- if (src >= end) {
- return (crc ^ Base().Canonize());
- }
-
- // Process 4 registers of sizeof(uint64) bytes at once.
- bytes = static_cast<size_t>(end - src) & ~(4*8 - 1);
- if (bytes > 4*8) {
- const uint8 *stop = src + bytes - 4*8;
- union {
- __m64 m64;
- uint64 u64;
- } temp;
- __m64 crc0;
- __m64 crc1;
- __m64 crc2;
- __m64 crc3;
- __m64 buf0 = MM64(src)[0];
- __m64 buf1 = MM64(src)[1];
- __m64 buf2 = MM64(src)[2];
- __m64 buf3 = MM64(src)[3];
-
- temp.u64 = crc;
- crc0 = temp.m64;
-#if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
- // There is no way to suppress a warning in GCC;
- // generate extra assignments.
- temp.u64 = 0;
- crc1 = temp.m64;
- crc2 = temp.m64;
- crc3 = temp.m64;
-#else
- crc1 = _mm_xor_si64(crc1, crc1);
- crc2 = _mm_xor_si64(crc2, crc2);
- crc3 = _mm_xor_si64(crc3, crc3);
-#endif // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
-
- do {
- PREFETCH(src);
- src += 4*8;
-
- buf0 = _mm_xor_si64(buf0, crc0);
- buf1 = _mm_xor_si64(buf1, crc1);
- buf2 = _mm_xor_si64(buf2, crc2);
- buf3 = _mm_xor_si64(buf3, crc3);
-
- uint32 tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
- uint32 tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
- uint32 tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
- uint32 tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
-
- buf0 = _mm_srli_si64(buf0, 32);
- buf1 = _mm_srli_si64(buf1, 32);
- buf2 = _mm_srli_si64(buf2, 32);
- buf3 = _mm_srli_si64(buf3, 32);
-
- crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)];
- tmp0 >>= 8;
- crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)];
- tmp1 >>= 8;
- crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)];
- tmp2 >>= 8;
- crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)];
- tmp3 >>= 8;
-
-#define XOR(byte) do { \
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \
- tmp0 >>= 8; \
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \
- tmp1 >>= 8; \
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \
- tmp2 >>= 8; \
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \
- tmp3 >>= 8; \
-} while (0)
-
- XOR(1);
- XOR(2);
-
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]);
- tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]);
- tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]);
- tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]);
- tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
-
- XOR(4);
- XOR(5);
- XOR(6);
-
-#undef XOR
-
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]);
- buf0 = MM64(src)[0];
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]);
- buf1 = MM64(src)[1];
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]);
- buf2 = MM64(src)[2];
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]);
- buf3 = MM64(src)[3];
- }
- while (src < stop);
-
- CRC_WORD_MMX(this, crc0, buf0);
- buf1 = _mm_xor_si64(buf1, crc1);
- CRC_WORD_MMX(this, crc0, buf1);
- buf2 = _mm_xor_si64(buf2, crc2);
- CRC_WORD_MMX(this, crc0, buf2);
- buf3 = _mm_xor_si64(buf3, crc3);
- CRC_WORD_MMX(this, crc0, buf3);
-
- temp.m64 = crc0;
- crc = temp.u64;
-
- _mm_empty();
-
- src += 4*8;
- }
-
- // Process sizeof(uint64) bytes at once.
- bytes = static_cast<size_t>(end - src) & ~(sizeof(uint64) - 1);
- if (bytes > 0) {
- union {
- __m64 m64;
- uint64 u64;
- } temp;
- __m64 crc0;
-
- temp.u64 = crc;
- crc0 = temp.m64;
-
- for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) {
- __m64 buf0 = MM64(src)[0];
- CRC_WORD_MMX(this, crc0, buf0);
- }
-
- temp.m64 = crc0;
- crc = temp.u64;
-
- _mm_empty();
- }
-
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
-
- return (crc ^ Base().Canonize());
-}
-
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#endif // defined(_MSC_VER)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements 64-bit multiword CRC using MMX built-in functions.
+
+#include "generic_crc.h"
+
+#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
+
+namespace crcutil {
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data, size_t bytes, const uint64 &start)
+ const GCC_OMIT_FRAME_POINTER;
+
+#if !defined(_MSC_VER)
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
+ const void *data,
+ size_t bytes,
+ const uint64 &start) const {
+ if (bytes <= 7) {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ uint64 crc = start ^ Base().Canonize();
+ for (const uint8 *end = src + bytes; src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+ return (crc ^ Base().Canonize());
+ }
+ return CrcMultiwordI386Mmx(data, bytes, start);
+}
+#else
+#pragma warning(push)
+// CL: uninitialized local variable 'crc1' used
+// Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
+#pragma warning(disable: 4700)
+
+#pragma warning(disable: 4619) // there is no warning number '592'
+
+// ICL: variable "crc1" is used before its value is set
+// Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
+#pragma warning(disable: 592)
+#endif // !defined(_MSC_VER)
+
+#define MM64(adr) reinterpret_cast<const __m64 *>(adr)
+#define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte])
+
+#define CRC_WORD_MMX(this, crc, buf) do { \
+ buf = _mm_xor_si64(buf, crc); \
+ uint32 tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
+ buf = _mm_srli_si64(buf, 32); \
+ crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \
+ tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \
+ tmp >>= 8; \
+ crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \
+} while (0)
+
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
+ const void *data, size_t bytes, const uint64 &start) const {
+ const uint8 *src = static_cast<const uint8 *>(data);
+ const uint8 *end = src + bytes;
+ uint64 crc = start ^ Base().Canonize();
+
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
+ if (src >= end) {
+ return (crc ^ Base().Canonize());
+ }
+
+ // Process 4 registers of sizeof(uint64) bytes at once.
+ bytes = static_cast<size_t>(end - src) & ~(4*8 - 1);
+ if (bytes > 4*8) {
+ const uint8 *stop = src + bytes - 4*8;
+ union {
+ __m64 m64;
+ uint64 u64;
+ } temp;
+ __m64 crc0;
+ __m64 crc1;
+ __m64 crc2;
+ __m64 crc3;
+ __m64 buf0 = MM64(src)[0];
+ __m64 buf1 = MM64(src)[1];
+ __m64 buf2 = MM64(src)[2];
+ __m64 buf3 = MM64(src)[3];
+
+ temp.u64 = crc;
+ crc0 = temp.m64;
+#if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
+ // There is no way to suppress a warning in GCC;
+ // generate extra assignments.
+ temp.u64 = 0;
+ crc1 = temp.m64;
+ crc2 = temp.m64;
+ crc3 = temp.m64;
+#else
+ crc1 = _mm_xor_si64(crc1, crc1);
+ crc2 = _mm_xor_si64(crc2, crc2);
+ crc3 = _mm_xor_si64(crc3, crc3);
+#endif // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
+
+ do {
+ PREFETCH(src);
+ src += 4*8;
+
+ buf0 = _mm_xor_si64(buf0, crc0);
+ buf1 = _mm_xor_si64(buf1, crc1);
+ buf2 = _mm_xor_si64(buf2, crc2);
+ buf3 = _mm_xor_si64(buf3, crc3);
+
+ uint32 tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
+ uint32 tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
+ uint32 tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
+ uint32 tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
+
+ buf0 = _mm_srli_si64(buf0, 32);
+ buf1 = _mm_srli_si64(buf1, 32);
+ buf2 = _mm_srli_si64(buf2, 32);
+ buf3 = _mm_srli_si64(buf3, 32);
+
+ crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)];
+ tmp0 >>= 8;
+ crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)];
+ tmp1 >>= 8;
+ crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)];
+ tmp2 >>= 8;
+ crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)];
+ tmp3 >>= 8;
+
+#define XOR(byte) do { \
+ crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \
+ tmp0 >>= 8; \
+ crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \
+ tmp1 >>= 8; \
+ crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \
+ tmp2 >>= 8; \
+ crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \
+ tmp3 >>= 8; \
+} while (0)
+
+ XOR(1);
+ XOR(2);
+
+ crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]);
+ tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
+ crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]);
+ tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
+ crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]);
+ tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
+ crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]);
+ tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
+
+ XOR(4);
+ XOR(5);
+ XOR(6);
+
+#undef XOR
+
+ crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]);
+ buf0 = MM64(src)[0];
+ crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]);
+ buf1 = MM64(src)[1];
+ crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]);
+ buf2 = MM64(src)[2];
+ crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]);
+ buf3 = MM64(src)[3];
+ }
+ while (src < stop);
+
+ CRC_WORD_MMX(this, crc0, buf0);
+ buf1 = _mm_xor_si64(buf1, crc1);
+ CRC_WORD_MMX(this, crc0, buf1);
+ buf2 = _mm_xor_si64(buf2, crc2);
+ CRC_WORD_MMX(this, crc0, buf2);
+ buf3 = _mm_xor_si64(buf3, crc3);
+ CRC_WORD_MMX(this, crc0, buf3);
+
+ temp.m64 = crc0;
+ crc = temp.u64;
+
+ _mm_empty();
+
+ src += 4*8;
+ }
+
+ // Process sizeof(uint64) bytes at once.
+ bytes = static_cast<size_t>(end - src) & ~(sizeof(uint64) - 1);
+ if (bytes > 0) {
+ union {
+ __m64 m64;
+ uint64 u64;
+ } temp;
+ __m64 crc0;
+
+ temp.u64 = crc;
+ crc0 = temp.m64;
+
+ for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) {
+ __m64 buf0 = MM64(src)[0];
+ CRC_WORD_MMX(this, crc0, buf0);
+ }
+
+ temp.m64 = crc0;
+ crc = temp.u64;
+
+ _mm_empty();
+ }
+
+ // Compute CRC of remaining bytes.
+ for (;src < end; ++src) {
+ CRC_BYTE(this, crc, *src);
+ }
+
+ return (crc ^ Base().Canonize());
+}
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif // defined(_MSC_VER)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
diff --git a/contrib/libs/crcutil/platform.h b/contrib/libs/crcutil/platform.h
index 936cf7d331..7a19ff2679 100644
--- a/contrib/libs/crcutil/platform.h
+++ b/contrib/libs/crcutil/platform.h
@@ -1,245 +1,245 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Detects configuration and defines compiler-specific macros.
-// Also, sets user-defined CRUTIL_USE_* macros to default values.
-
-#ifndef CRCUTIL_PLATFORM_H_
-#define CRCUTIL_PLATFORM_H_
-
-// Permanently disable some annoying warnings generated
-// by Microsoft CL when compiling Microsoft's headers.
-#include "std_headers.h"
-
-// Use inline asm version of the code?
-#if !defined(CRCUTIL_USE_ASM)
-#define CRCUTIL_USE_ASM 1
-#endif // !defined(CRCUTIL_USE_ASM)
-
-
-#if !defined(HAVE_I386)
-#if defined(__i386__) || defined(_M_IX86)
-#define HAVE_I386 1
-#else
-#define HAVE_I386 0
-#endif // defined(__i386__) || defined(_M_IX86)
-#endif // defined(HAVE_I386)
-
-
-#if !defined(HAVE_AMD64)
-#if defined(__amd64__) || defined(_M_AMD64)
-#define HAVE_AMD64 1
-#else
-#define HAVE_AMD64 0
-#endif // defined(__amd64__) || defined(_M_AMD64)
-#endif // defined(HAVE_AMD64)
-
-
-#if HAVE_AMD64 || HAVE_I386
-#if defined(_MSC_VER)
-#pragma warning(push)
-// '_M_IX86' is not defined as a preprocessor macro
-#pragma warning(disable: 4668)
-#include <intrin.h>
-#pragma warning(pop)
-#endif // defined(_MSC_VER)
-
-
-#if !defined(HAVE_MMX)
-#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
-#define HAVE_MMX 1
-#else
-#define HAVE_MMX 0
-#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
-#endif // !defined(HAVE_MMX)
-
-
-#if !defined(HAVE_SSE)
-#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
-#include <xmmintrin.h>
-#define HAVE_SSE 1
-#else
-#define HAVE_SSE 0
-#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
-#endif // !defined(HAVE_SSE)
-
-
-#if !defined(HAVE_SSE2)
-#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
-#include <emmintrin.h>
-#define HAVE_SSE2 1
-#else
-#define HAVE_SSE2 0
-#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
-#endif // !defined(HAVE_SSE2)
-
-#else
-
-#if !defined(HAVE_MMX)
-#define HAVE_MMX 0
-#endif // !defined(HAVE_MMX)
-
-#if !defined(HAVE_SSE)
-#define HAVE_SSE 0
-#endif // !defined(HAVE_SSE)
-
-#if !defined(HAVE_SSE2)
-#define HAVE_SSE2 0
-#endif // !defined(HAVE_SSE2)
-
-#endif // HAVE_AMD64 || HAVE_I386
-
-// Error checking
-#if HAVE_SSE && !HAVE_MMX
-#error SSE is available but not MMX?
-#endif // HAVE_SSE && !HAVE_MMX
-
-#if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
-#error SSE2 is available but not SSE or MMX?
-#endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
-
-
-#if !defined(CRCUTIL_PREFETCH_WIDTH)
-// On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without
-// prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and
-// larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs
-// smaller than 8MB, 4-5% slower when length >= 8MB.
-// Tested with prefetch length 256, 512, and 4096.
-//
-// At this moment there is no compelling reason to use prefetching.
-//
-#define CRCUTIL_PREFETCH_WIDTH 0
-#endif // !defined(CRCUTIL_PREFETCH_WIDTH)
-
-
-#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
-#define PREFETCH(src) \
- _mm_prefetch(reinterpret_cast<const char *>(src) + CRCUTIL_PREFETCH_WIDTH, \
- _MM_HINT_T0)
-#else
-#define PREFETCH(src)
-#endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
-
-
-// If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data
-// before accessing it at word boundary. See generic_crc.cc,
-// ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro.
-#if !defined(CRCUTIL_MIN_ALIGN_SIZE)
-#if HAVE_AMD64 || HAVE_I386
-#define CRCUTIL_MIN_ALIGN_SIZE (1024)
-#else
-#define CRCUTIL_MIN_ALIGN_SIZE 0
-#endif // HAVE_AMD64 || HAVE_I386
-#endif // !defined(CRCUTIL_MIN_ALIGN_SIZE)
-
-
-// Use _mm_crc32_u64/32/8 intrinics?
-// If not, they will be implemented in software.
-#if !HAVE_I386 && !HAVE_AMD64
-
-#undef CRCUTIL_USE_MM_CRC32
-#define CRCUTIL_USE_MM_CRC32 0
-
-#else
-
-#if !defined(CRCUTIL_USE_MM_CRC32)
-#if defined(_MSC_VER) || defined(__GNUC__)
-#define CRCUTIL_USE_MM_CRC32 1
-#else
-#define CRCUTIL_USE_MM_CRC32 0
-#endif // defined(_MSC_VER) || defined(__GNUC__)
-#endif // !defined(CRCUTIL_USE_MM_CRC32)
-
-#endif // !HAVE_I386 && !HAVE_AMD64
-
-
-// Stringize -- always handy.
-#define TO_STRING_VALUE(arg) #arg
-#define TO_STRING(arg) TO_STRING_VALUE(arg)
-
-
-// Compilers give "right shift count >= width of type" warning even
-// though the shift happens only under appropriate "if".
-#define SHIFT_RIGHT_NO_WARNING(value, bits) \
- ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0))
-#define SHIFT_RIGHT_SAFE(value, bits) \
- ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0)
-
-// The same for left shifts.
-#define SHIFT_LEFT_NO_WARNING(value, bits) \
- ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0))
-#define SHIFT_LEFT_SAFE(value, bits) \
- ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0)
-
-// GCC-specific macros.
-//
-#define GCC_VERSION_AVAILABLE(major, minor) \
- (defined(__GNUC__) && \
- (__GNUC__ > (major) || \
- (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-
-
-#if defined(__GNUC__)
-
-// The GenericCrc tables must be properly aligned.
-// Penalty for misalignment? 50% performance degradation.
-// For 128-bit SSE2, the penalty is access violation.
-#define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n)))
-
-#if GCC_VERSION_AVAILABLE(4, 4)
-// If not marked as "omit frame pointer",
-// GCC won't be able to find enough registers.
-#define GCC_OMIT_FRAME_POINTER \
- __attribute__((__optimize__(2, "omit-frame-pointer")))
-#endif // GCC_VERSION_AVAILABLE(4, 4)
-
-#if !defined(__forceinline)
-#define __forceinline __attribute__((__always_inline__)) inline
-#endif // !defined(__forceinline)
-
-#if defined(__APPLE_CC__)
-// The version of GCC used by Max OS X xCode v 5664 does not understand
-// "movq xmm, r64" instruction and requires the use of "movd" (probably
-// because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm"
-// the same).
-//
-// Leaving common sense aside, let's peek into Intel's instruction
-// reference manual. That's what description of MOVD command says:
-// MOVD xmm, r/m32 (opcode 66 0F 6E /r)
-// MOVD r/m32, xmm (opcode 66 0F 7E /r)
-// MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r)
-// MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r)
-#define SSE2_MOVQ "movd"
-#else
-#define SSE2_MOVQ "movq"
-#endif // defined(__APPLE_CC__)
-
-#endif // defined(__GNUC__)
-
-
-// Define compiler-specific macros that were not set yet.
-#if !defined(_MSC_VER) && !defined(__forceinline)
-#define __forceinline inline
-#endif // !defined(_MSC_VER) && !defined(__forceinline)
-
-#if !defined(GCC_OMIT_FRAME_POINTER)
-#define GCC_OMIT_FRAME_POINTER
-#endif // !defined(GCC_OMIT_FRAME_POINTER)
-
-#if !defined(GCC_ALIGN_ATTRIBUTE)
-#define GCC_ALIGN_ATTRIBUTE(n)
-#endif // !defined(GCC_ALIGN_ATTRIBUTE)
-
-
-#endif // CRCUTIL_PLATFORM_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Detects configuration and defines compiler-specific macros.
+// Also, sets user-defined CRUTIL_USE_* macros to default values.
+
+#ifndef CRCUTIL_PLATFORM_H_
+#define CRCUTIL_PLATFORM_H_
+
+// Permanently disable some annoying warnings generated
+// by Microsoft CL when compiling Microsoft's headers.
+#include "std_headers.h"
+
+// Use inline asm version of the code?
+#if !defined(CRCUTIL_USE_ASM)
+#define CRCUTIL_USE_ASM 1
+#endif // !defined(CRCUTIL_USE_ASM)
+
+
+#if !defined(HAVE_I386)
+#if defined(__i386__) || defined(_M_IX86)
+#define HAVE_I386 1
+#else
+#define HAVE_I386 0
+#endif // defined(__i386__) || defined(_M_IX86)
+#endif // defined(HAVE_I386)
+
+
+#if !defined(HAVE_AMD64)
+#if defined(__amd64__) || defined(_M_AMD64)
+#define HAVE_AMD64 1
+#else
+#define HAVE_AMD64 0
+#endif // defined(__amd64__) || defined(_M_AMD64)
+#endif // defined(HAVE_AMD64)
+
+
+#if HAVE_AMD64 || HAVE_I386
+#if defined(_MSC_VER)
+#pragma warning(push)
+// '_M_IX86' is not defined as a preprocessor macro
+#pragma warning(disable: 4668)
+#include <intrin.h>
+#pragma warning(pop)
+#endif // defined(_MSC_VER)
+
+
+#if !defined(HAVE_MMX)
+#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
+#define HAVE_MMX 1
+#else
+#define HAVE_MMX 0
+#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
+#endif // !defined(HAVE_MMX)
+
+
+#if !defined(HAVE_SSE)
+#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
+#include <xmmintrin.h>
+#define HAVE_SSE 1
+#else
+#define HAVE_SSE 0
+#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
+#endif // !defined(HAVE_SSE)
+
+
+#if !defined(HAVE_SSE2)
+#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
+#include <emmintrin.h>
+#define HAVE_SSE2 1
+#else
+#define HAVE_SSE2 0
+#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
+#endif // !defined(HAVE_SSE2)
+
+#else
+
+#if !defined(HAVE_MMX)
+#define HAVE_MMX 0
+#endif // !defined(HAVE_MMX)
+
+#if !defined(HAVE_SSE)
+#define HAVE_SSE 0
+#endif // !defined(HAVE_SSE)
+
+#if !defined(HAVE_SSE2)
+#define HAVE_SSE2 0
+#endif // !defined(HAVE_SSE2)
+
+#endif // HAVE_AMD64 || HAVE_I386
+
+// Error checking
+#if HAVE_SSE && !HAVE_MMX
+#error SSE is available but not MMX?
+#endif // HAVE_SSE && !HAVE_MMX
+
+#if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
+#error SSE2 is available but not SSE or MMX?
+#endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
+
+
+#if !defined(CRCUTIL_PREFETCH_WIDTH)
+// On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without
+// prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and
+// larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs
+// smaller than 8MB, 4-5% slower when length >= 8MB.
+// Tested with prefetch length 256, 512, and 4096.
+//
+// At this moment there is no compelling reason to use prefetching.
+//
+#define CRCUTIL_PREFETCH_WIDTH 0
+#endif // !defined(CRCUTIL_PREFETCH_WIDTH)
+
+
+#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+#define PREFETCH(src) \
+ _mm_prefetch(reinterpret_cast<const char *>(src) + CRCUTIL_PREFETCH_WIDTH, \
+ _MM_HINT_T0)
+#else
+#define PREFETCH(src)
+#endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
+
+
+// If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data
+// before accessing it at word boundary. See generic_crc.cc,
+// ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro.
+#if !defined(CRCUTIL_MIN_ALIGN_SIZE)
+#if HAVE_AMD64 || HAVE_I386
+#define CRCUTIL_MIN_ALIGN_SIZE (1024)
+#else
+#define CRCUTIL_MIN_ALIGN_SIZE 0
+#endif // HAVE_AMD64 || HAVE_I386
+#endif // !defined(CRCUTIL_MIN_ALIGN_SIZE)
+
+
+// Use _mm_crc32_u64/32/8 intrinics?
+// If not, they will be implemented in software.
+#if !HAVE_I386 && !HAVE_AMD64
+
+#undef CRCUTIL_USE_MM_CRC32
+#define CRCUTIL_USE_MM_CRC32 0
+
+#else
+
+#if !defined(CRCUTIL_USE_MM_CRC32)
+#if defined(_MSC_VER) || defined(__GNUC__)
+#define CRCUTIL_USE_MM_CRC32 1
+#else
+#define CRCUTIL_USE_MM_CRC32 0
+#endif // defined(_MSC_VER) || defined(__GNUC__)
+#endif // !defined(CRCUTIL_USE_MM_CRC32)
+
+#endif // !HAVE_I386 && !HAVE_AMD64
+
+
+// Stringize -- always handy.
+#define TO_STRING_VALUE(arg) #arg
+#define TO_STRING(arg) TO_STRING_VALUE(arg)
+
+
+// Compilers give "right shift count >= width of type" warning even
+// though the shift happens only under appropriate "if".
+#define SHIFT_RIGHT_NO_WARNING(value, bits) \
+ ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0))
+#define SHIFT_RIGHT_SAFE(value, bits) \
+ ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0)
+
+// The same for left shifts.
+#define SHIFT_LEFT_NO_WARNING(value, bits) \
+ ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0))
+#define SHIFT_LEFT_SAFE(value, bits) \
+ ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0)
+
+// GCC-specific macros.
+//
+#define GCC_VERSION_AVAILABLE(major, minor) \
+ (defined(__GNUC__) && \
+ (__GNUC__ > (major) || \
+ (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
+
+
+#if defined(__GNUC__)
+
+// The GenericCrc tables must be properly aligned.
+// Penalty for misalignment? 50% performance degradation.
+// For 128-bit SSE2, the penalty is access violation.
+#define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n)))
+
+#if GCC_VERSION_AVAILABLE(4, 4)
+// If not marked as "omit frame pointer",
+// GCC won't be able to find enough registers.
+#define GCC_OMIT_FRAME_POINTER \
+ __attribute__((__optimize__(2, "omit-frame-pointer")))
+#endif // GCC_VERSION_AVAILABLE(4, 4)
+
+#if !defined(__forceinline)
+#define __forceinline __attribute__((__always_inline__)) inline
+#endif // !defined(__forceinline)
+
+#if defined(__APPLE_CC__)
+// The version of GCC used by Max OS X xCode v 5664 does not understand
+// "movq xmm, r64" instruction and requires the use of "movd" (probably
+// because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm"
+// the same).
+//
+// Leaving common sense aside, let's peek into Intel's instruction
+// reference manual. That's what description of MOVD command says:
+// MOVD xmm, r/m32 (opcode 66 0F 6E /r)
+// MOVD r/m32, xmm (opcode 66 0F 7E /r)
+// MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r)
+// MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r)
+#define SSE2_MOVQ "movd"
+#else
+#define SSE2_MOVQ "movq"
+#endif // defined(__APPLE_CC__)
+
+#endif // defined(__GNUC__)
+
+
+// Define compiler-specific macros that were not set yet.
+#if !defined(_MSC_VER) && !defined(__forceinline)
+#define __forceinline inline
+#endif // !defined(_MSC_VER) && !defined(__forceinline)
+
+#if !defined(GCC_OMIT_FRAME_POINTER)
+#define GCC_OMIT_FRAME_POINTER
+#endif // !defined(GCC_OMIT_FRAME_POINTER)
+
+#if !defined(GCC_ALIGN_ATTRIBUTE)
+#define GCC_ALIGN_ATTRIBUTE(n)
+#endif // !defined(GCC_ALIGN_ATTRIBUTE)
+
+
+#endif // CRCUTIL_PLATFORM_H_
diff --git a/contrib/libs/crcutil/protected_crc.h b/contrib/libs/crcutil/protected_crc.h
index 762fceda9b..81a7051bc8 100644
--- a/contrib/libs/crcutil/protected_crc.h
+++ b/contrib/libs/crcutil/protected_crc.h
@@ -1,61 +1,61 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Protects CRC tables with its own CRC.
-// CRC tables get corrupted too, and if corruption is
-// not caught, data poisoning becomes a reality.
-
-#ifndef CRCUTIL_PROTECTED_CRC_H_
-#define CRCUTIL_PROTECTED_CRC_H_
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Class CrcImplementation should not have virtual functions:
-// vptr is stored as the very first field, vptr value is defined
-// at runtime, so it is impossible to CRC(*this) once and
-// guarantee that this value will not change from run to run.
-//
-template<typename CrcImplementation> class ProtectedCrc
- : public CrcImplementation {
- public:
- typedef typename CrcImplementation::Crc Crc;
-
- // Returns check value that the caller should compare
- // against pre-computed, trusted constant.
- //
- // Computing SelfCheckValue() after CRC initialization,
- // storing it in memory, and periodically checking against
- // stored value may not work: if CRC tables were initialized
- // incorrectly and/or had been corrupted during initialization,
- // CheckValue() will return garbage. Garbage in, garbage out.
- // Consequitive checks will not detect a problem, the application
- // will happily produce and save the data with corrupt CRC.
- //
- // The application should call SelfCheckValue() regularly:
- // 1. First and foremost, on every CRC mismatch.
- // 2. After CRC'ing the data but before sending it out or writing it.
- // 3. Worst case, every Nth CRC'ed byte or every Nth call to CRC.
- //
- Crc SelfCheckValue() const {
- return CrcDefault(this, sizeof(*this), 0);
- }
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_PROTECTED_CRC_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Protects CRC tables with its own CRC.
+// CRC tables get corrupted too, and if corruption is
+// not caught, data poisoning becomes a reality.
+
+#ifndef CRCUTIL_PROTECTED_CRC_H_
+#define CRCUTIL_PROTECTED_CRC_H_
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// Class CrcImplementation should not have virtual functions:
+// vptr is stored as the very first field, vptr value is defined
+// at runtime, so it is impossible to CRC(*this) once and
+// guarantee that this value will not change from run to run.
+//
+template<typename CrcImplementation> class ProtectedCrc
+ : public CrcImplementation {
+ public:
+ typedef typename CrcImplementation::Crc Crc;
+
+ // Returns check value that the caller should compare
+ // against pre-computed, trusted constant.
+ //
+ // Computing SelfCheckValue() after CRC initialization,
+ // storing it in memory, and periodically checking against
+ // stored value may not work: if CRC tables were initialized
+ // incorrectly and/or had been corrupted during initialization,
+ // CheckValue() will return garbage. Garbage in, garbage out.
+ // Consequitive checks will not detect a problem, the application
+ // will happily produce and save the data with corrupt CRC.
+ //
+ // The application should call SelfCheckValue() regularly:
+ // 1. First and foremost, on every CRC mismatch.
+ // 2. After CRC'ing the data but before sending it out or writing it.
+ // 3. Worst case, every Nth CRC'ed byte or every Nth call to CRC.
+ //
+ Crc SelfCheckValue() const {
+ return CrcDefault(this, sizeof(*this), 0);
+ }
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_PROTECTED_CRC_H_
diff --git a/contrib/libs/crcutil/rdtsc.h b/contrib/libs/crcutil/rdtsc.h
index 57aea1f1fb..ead586f842 100644
--- a/contrib/libs/crcutil/rdtsc.h
+++ b/contrib/libs/crcutil/rdtsc.h
@@ -1,59 +1,59 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Reads CPU cycle counter on AMD64 and I386 (for performance measurements).
-// Thanks to __rdtsc() intrinsic, it's easy with Microsoft and Intel
-// compilers, but real pain with GCC.
-
-#ifndef CRCUTIL_RDTSC_H_
-#define CRCUTIL_RDTSC_H_
-
-#include "platform.h"
-
-namespace crcutil {
-
-struct Rdtsc {
- static inline uint64 Get() {
-#if defined(_MSC_VER) && (HAVE_AMD64 || HAVE_I386)
- return __rdtsc();
-#elif defined(__GNUC__) && HAVE_AMD64
- int64 result;
- __asm__ volatile(
- "rdtsc\n"
- : "=a" (result));
- return result;
-#elif defined(__GNUC__) && HAVE_I386
- // If "low" and "high" are defined as "uint64" to
- // avoid explicit cast to uint64, GCC 4.5.0 in "-m32" mode
- // fails with "impossible register constraint" error
- // (no, it is not because one cannot use 64-bit value as argument
- // for 32-bit register, but because its register allocator
- // could not resolve a conflict under high register pressure).
- uint32 low;
- uint32 high;
- __asm__ volatile(
- "rdtsc\n"
- : "=a" (low), "=d" (high));
- return ((static_cast<uint64>(high) << 32) | low);
-#else
- // It is hard to find low overhead timer with
- // sub-millisecond resolution and granularity.
- return 0;
-#endif
- }
-};
-
-} // namespace crcutil
-
-#endif // CRCUTIL_RDTSC_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Reads CPU cycle counter on AMD64 and I386 (for performance measurements).
+// Thanks to __rdtsc() intrinsic, it's easy with Microsoft and Intel
+// compilers, but real pain with GCC.
+
+#ifndef CRCUTIL_RDTSC_H_
+#define CRCUTIL_RDTSC_H_
+
+#include "platform.h"
+
+namespace crcutil {
+
+struct Rdtsc {
+ static inline uint64 Get() {
+#if defined(_MSC_VER) && (HAVE_AMD64 || HAVE_I386)
+ return __rdtsc();
+#elif defined(__GNUC__) && HAVE_AMD64
+ int64 result;
+ __asm__ volatile(
+ "rdtsc\n"
+ : "=a" (result));
+ return result;
+#elif defined(__GNUC__) && HAVE_I386
+ // If "low" and "high" are defined as "uint64" to
+ // avoid explicit cast to uint64, GCC 4.5.0 in "-m32" mode
+ // fails with "impossible register constraint" error
+ // (no, it is not because one cannot use 64-bit value as argument
+ // for 32-bit register, but because its register allocator
+ // could not resolve a conflict under high register pressure).
+ uint32 low;
+ uint32 high;
+ __asm__ volatile(
+ "rdtsc\n"
+ : "=a" (low), "=d" (high));
+ return ((static_cast<uint64>(high) << 32) | low);
+#else
+ // It is hard to find low overhead timer with
+ // sub-millisecond resolution and granularity.
+ return 0;
+#endif
+ }
+};
+
+} // namespace crcutil
+
+#endif // CRCUTIL_RDTSC_H_
diff --git a/contrib/libs/crcutil/rolling_crc.h b/contrib/libs/crcutil/rolling_crc.h
index ad4a947e49..0de69d2bd8 100644
--- a/contrib/libs/crcutil/rolling_crc.h
+++ b/contrib/libs/crcutil/rolling_crc.h
@@ -1,106 +1,106 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements rolling CRC (e.g. for Rabin fingerprinting).
-
-#ifndef CRCUTIL_ROLLING_CRC_H_
-#define CRCUTIL_ROLLING_CRC_H_
-
-#include "base_types.h" // size_t, uint8
-#include "crc_casts.h" // TO_BYTE
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// CrcImplementation should provide:
-// - typename Crc
-// - typename TableEntry
-// - typename Word
-// - Crc CrcDefault(const void *data, size_t bytes, const Crc &start)
-// - const GfUtil<Crc> &Base() const
-template<typename CrcImplementation> class RollingCrc {
- public:
- typedef typename CrcImplementation::Crc Crc;
- typedef typename CrcImplementation::TableEntry TableEntry;
- typedef typename CrcImplementation::Word Word;
-
- RollingCrc() {}
-
- // Initializes internal data structures.
- // Retains reference to "crc" instance -- it is used by Start().
- RollingCrc(const CrcImplementation &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- Init(crc, roll_window_bytes, start_value);
- }
-
- // Computes crc of "roll_window_bytes" using
- // "start_value" of "crc" (see Init()).
- Crc Start(const void *data) const {
- return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
- }
-
- // Computes CRC of "roll_window_bytes" starting in next position.
- Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
- return (old_crc >> 8) ^ in_[TO_BYTE(old_crc) ^ byte_in] ^ out_[byte_out];
- }
-
- // Initializes internal data structures.
- // Retains reference to "crc" instance -- it is used by Start().
- void Init(const CrcImplementation &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- crc_ = &crc;
- roll_window_bytes_ = roll_window_bytes;
- start_value_ = start_value;
-
- Crc add = crc.Base().Canonize() ^ start_value;
- add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
- add ^= crc.Base().Canonize();
- Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
- add = crc.Base().Multiply(add, mul);
-
- mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
- for (size_t i = 0; i < 256; ++i) {
- out_[i] = static_cast<TableEntry>(
- crc.Base().MultiplyUnnormalized(
- static_cast<Crc>(i), 8, mul) ^ add);
- }
- for (size_t i = 0; i < 256; ++i) {
- in_[i] = crc.crc_word_[sizeof(Word) - 1][i];
- }
- }
-
- // Returns start value.
- Crc StartValue() const { return start_value_; }
-
- // Returns length of roll window.
- size_t WindowBytes() const { return roll_window_bytes_; }
-
- protected:
- TableEntry in_[256];
- TableEntry out_[256];
-
- // Used only by Start().
- Crc start_value_;
- const CrcImplementation *crc_;
- size_t roll_window_bytes_;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-} // namespace crcutil
-
-#endif // CRCUTIL_ROLLING_CRC_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements rolling CRC (e.g. for Rabin fingerprinting).
+
+#ifndef CRCUTIL_ROLLING_CRC_H_
+#define CRCUTIL_ROLLING_CRC_H_
+
+#include "base_types.h" // size_t, uint8
+#include "crc_casts.h" // TO_BYTE
+
+namespace crcutil {
+
+#pragma pack(push, 16)
+
+// CrcImplementation should provide:
+// - typename Crc
+// - typename TableEntry
+// - typename Word
+// - Crc CrcDefault(const void *data, size_t bytes, const Crc &start)
+// - const GfUtil<Crc> &Base() const
+template<typename CrcImplementation> class RollingCrc {
+ public:
+ typedef typename CrcImplementation::Crc Crc;
+ typedef typename CrcImplementation::TableEntry TableEntry;
+ typedef typename CrcImplementation::Word Word;
+
+ RollingCrc() {}
+
+ // Initializes internal data structures.
+ // Retains reference to "crc" instance -- it is used by Start().
+ RollingCrc(const CrcImplementation &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value) {
+ Init(crc, roll_window_bytes, start_value);
+ }
+
+ // Computes crc of "roll_window_bytes" using
+ // "start_value" of "crc" (see Init()).
+ Crc Start(const void *data) const {
+ return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
+ }
+
+ // Computes CRC of "roll_window_bytes" starting in next position.
+ Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
+ return (old_crc >> 8) ^ in_[TO_BYTE(old_crc) ^ byte_in] ^ out_[byte_out];
+ }
+
+ // Initializes internal data structures.
+ // Retains reference to "crc" instance -- it is used by Start().
+ void Init(const CrcImplementation &crc,
+ size_t roll_window_bytes,
+ const Crc &start_value) {
+ crc_ = &crc;
+ roll_window_bytes_ = roll_window_bytes;
+ start_value_ = start_value;
+
+ Crc add = crc.Base().Canonize() ^ start_value;
+ add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
+ add ^= crc.Base().Canonize();
+ Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
+ add = crc.Base().Multiply(add, mul);
+
+ mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
+ for (size_t i = 0; i < 256; ++i) {
+ out_[i] = static_cast<TableEntry>(
+ crc.Base().MultiplyUnnormalized(
+ static_cast<Crc>(i), 8, mul) ^ add);
+ }
+ for (size_t i = 0; i < 256; ++i) {
+ in_[i] = crc.crc_word_[sizeof(Word) - 1][i];
+ }
+ }
+
+ // Returns start value.
+ Crc StartValue() const { return start_value_; }
+
+ // Returns length of roll window.
+ size_t WindowBytes() const { return roll_window_bytes_; }
+
+ protected:
+ TableEntry in_[256];
+ TableEntry out_[256];
+
+ // Used only by Start().
+ Crc start_value_;
+ const CrcImplementation *crc_;
+ size_t roll_window_bytes_;
+} GCC_ALIGN_ATTRIBUTE(16);
+
+#pragma pack(pop)
+
+} // namespace crcutil
+
+#endif // CRCUTIL_ROLLING_CRC_H_
diff --git a/contrib/libs/crcutil/std_headers.h b/contrib/libs/crcutil/std_headers.h
index 0d7e4ecb56..55746fb8be 100644
--- a/contrib/libs/crcutil/std_headers.h
+++ b/contrib/libs/crcutil/std_headers.h
@@ -1,53 +1,53 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Includes some standard C headers for size_t, memset, etc.
-//
-// Also, permanently disables a number of warnings produced
-// by Microsoft's compiler when it includes standard headers
-// (surprisingly, also by Microsoft).
-
-#ifndef CRCUTIL_STD_HEADERS_H_
-#define CRCUTIL_STD_HEADERS_H_
-
-#if defined(_MSC_VER)
-// '4' bytes padding added after data member ...
-#pragma warning(disable:4820)
-
-// unreferenced inline function has been removed ...
-#pragma warning(disable:4514)
-
-// conditional expression is constant
-#pragma warning(disable: 4127)
-
-// function ... not inlined
-#pragma warning(disable: 4710)
-
-// function ... selected for automatic inline expansion
-#pragma warning(disable: 4711)
-
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Includes some standard C headers for size_t, memset, etc.
+//
+// Also, permanently disables a number of warnings produced
+// by Microsoft's compiler when it includes standard headers
+// (surprisingly, also by Microsoft).
+
+#ifndef CRCUTIL_STD_HEADERS_H_
+#define CRCUTIL_STD_HEADERS_H_
+
+#if defined(_MSC_VER)
+// '4' bytes padding added after data member ...
+#pragma warning(disable:4820)
+
+// unreferenced inline function has been removed ...
+#pragma warning(disable:4514)
+
+// conditional expression is constant
+#pragma warning(disable: 4127)
+
+// function ... not inlined
+#pragma warning(disable: 4710)
+
+// function ... selected for automatic inline expansion
+#pragma warning(disable: 4711)
+
#ifndef _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
#endif
-
-#endif // defined(_MSC_VER)
-
-// #define _CSTDLIB_
-#include <stdio.h> // always handy
-#include <string.h> // memset
-#include <stdlib.h> // size_t, _rotl/_rotl64(MSC)
-#include <stddef.h> // ptrdiff_t (GNUC)
-#include <stdarg.h> // va_list
-
-#endif // CRCUTIL_STD_HEADERS_H_
+
+#endif // defined(_MSC_VER)
+
+// #define _CSTDLIB_
+#include <stdio.h> // always handy
+#include <string.h> // memset
+#include <stdlib.h> // size_t, _rotl/_rotl64(MSC)
+#include <stddef.h> // ptrdiff_t (GNUC)
+#include <stdarg.h> // va_list
+
+#endif // CRCUTIL_STD_HEADERS_H_
diff --git a/contrib/libs/crcutil/uint128_sse2.h b/contrib/libs/crcutil/uint128_sse2.h
index 24b4072658..0fcc7255ed 100644
--- a/contrib/libs/crcutil/uint128_sse2.h
+++ b/contrib/libs/crcutil/uint128_sse2.h
@@ -1,310 +1,310 @@
-// Copyright 2010 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements a limited set of 128-bit arithmetic operations
-// (the ones that are used by CRC) using SSE2 intrinsics.
-
-#ifndef CRCUTIL_UINT128_SSE2_H_
-#define CRCUTIL_UINT128_SSE2_H_
-
-#include "base_types.h"
-#include "crc_casts.h" // Downcast, CrcFromUint64, Uint64FromCrc
-#include "platform.h"
-
-#if HAVE_SSE2
-
-namespace crcutil {
-
-// Specialized functions handling __m128i.
-template<> __forceinline uint64 Downcast(const __m128i &value) {
-#if HAVE_AMD64 && defined(__GNUC__)
- // GCC 4.4.x is too smart and, instead of MOVQ, generates SSE4 PEXTRQ
- // instruction when the code is compiled with -mmsse4.
- // Fixed in 4.5 which generates conversion through memory (why?).
- // And -- yes, it makes quite measurable difference.
- uint64 temp;
- asm(SSE2_MOVQ " %[i128], %[u64]\n" : [u64] "=r" (temp) : [i128] "x" (value));
- return temp;
-#elif HAVE_AMD64 && (!defined(_MSC_FULL_VER) || _MSC_FULL_VER > 150030729)
- return static_cast<uint64>(_mm_cvtsi128_si64(value));
-#else
- // 64-bit CL 15.00.30729.1 -O2 generates incorrect code (tests fail).
- // _mm_cvtsi128_si64() is not available on i386.
- uint64 temp;
- _mm_storel_epi64(reinterpret_cast<__m128i *>(&temp), value);
- return temp;
-#endif
-}
-
-
-class uint128_sse2 {
- public:
- uint128_sse2() {}
- ~uint128_sse2() {}
-
- // Default casts to uint128_sse2 and assignment operator.
- __forceinline void operator =(uint64 value) {
-#if HAVE_AMD64 && defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 5)
- // Prevent generation of SSE4 pinsrq insruction when
- // compiling with GCC 4.4.x with -msse4 flag.
- asm(SSE2_MOVQ " %[u64], %[i128]\n" : [i128] "=x" (x_) : [u64] "r" (value));
-#elif HAVE_AMD64
- x_ = _mm_cvtsi64_si128(static_cast<int64>(value));
-#else
- x_ = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&value));
-#endif
- }
- __forceinline uint128_sse2(uint64 x) {
- *this = x;
- }
- __forceinline uint128_sse2(const __m128i x) : x_(x) {
- }
- __forceinline operator __m128i() const {
- return x_;
- }
- __forceinline void operator =(const uint128_sse2 &x) {
- x_ = x.x_;
- }
-
- // Extracts 64 less significant bits.
- __forceinline uint64 to_uint64() const {
- return Downcast<__m128i, uint64>(x_);
- }
-
- // Comparisons.
- __forceinline bool operator ==(const uint128_sse2 &y) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } t;
- t.i128 = _mm_xor_si128(x_, y.x_);
- return (t.u64[0] | t.u64[1]) == 0;
- }
- __forceinline bool operator ==(uint64 value) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } t;
- t.i128 = x_;
- return (t.u64[0] == value && t.u64[1] == 0);
- }
- __forceinline bool operator !=(const uint128_sse2 &y) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } t;
- t.i128 = _mm_xor_si128(x_, y.x_);
- return (t.u64[0] | t.u64[1]) != 0;
- }
- __forceinline bool operator !=(uint64 value) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } t;
- t.i128 = x_;
- return (t.u64[0] != value || t.u64[1] != 0);
- }
-
- __forceinline bool operator <(const uint128_sse2 &y) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } xx, yy;
- xx.i128 = x_;
- yy.i128 = y.x_;
- return (xx.u64[0] < yy.u64[0] ||
- (xx.u64[0] == yy.u64[0] && xx.u64[1] < yy.u64[1]));
- }
-
- // Bitwise logic operators.
- __forceinline uint128_sse2 operator ^(const uint128_sse2 &y) const {
- return _mm_xor_si128(x_, y.x_);
- }
- __forceinline uint128_sse2 operator &(const uint128_sse2 &y) const {
- return _mm_and_si128(x_, y.x_);
- }
- __forceinline uint128_sse2 operator |(const uint128_sse2 &y) const {
- return _mm_or_si128(x_, y.x_);
- }
-
- __forceinline void operator ^=(const uint128_sse2 &y) {
- *this = *this ^ y.x_;
- }
- __forceinline void operator &=(const uint128_sse2 &y) {
- *this = *this & y.x_;
- }
- __forceinline void operator |=(const uint128_sse2 &y) {
- *this = *this | y.x_;
- }
-
- // Arithmetic operators.
- __forceinline uint128_sse2 operator +(uint64 y) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } temp;
- temp.i128 = x_;
- // a + b >= 2**64 iff
- // a + b > (2**64 - 1) iff
- // a > (2**64 - 1) - b iff
- // a > ~b
- if (temp.u64[0] > ~y) {
- temp.u64[1] += 1;
- }
- temp.u64[0] += y;
- return temp.i128;
- }
- __forceinline void operator +=(uint64 x) {
- *this = *this + x;
- }
- __forceinline uint128_sse2 operator -(uint64 y) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } temp;
- temp.i128 = x_;
- if (temp.u64[0] < y) {
- temp.u64[1] -= 1;
- }
- temp.u64[0] -= y;
- return temp.i128;
- }
- __forceinline void operator -=(uint64 x) {
- *this = *this - x;
- }
-
- // Bitwise logical shifts.
- __forceinline uint128_sse2 operator >>(const int bits) const {
- if (bits == 8) {
- return _mm_srli_si128(x_, 1);
- } else if (bits == 16) {
- return _mm_srli_si128(x_, 2);
- } else if (bits == 32) {
- return _mm_srli_si128(x_, 4);
- } else if (bits == 64) {
- return _mm_srli_si128(x_, 8);
- } else {
- return long_shift_right(bits);
- }
- }
- __forceinline uint128_sse2 operator >>(const size_t bits) const {
- return *this >> static_cast<int>(bits);
- }
- __forceinline void operator >>=(const int bits) {
- *this = *this >> bits;
- }
- __forceinline void operator >>=(const size_t bits) {
- *this = *this >> static_cast<int>(bits);
- }
-
- __forceinline uint128_sse2 operator <<(int bits) const {
- if (bits == 8) {
- return _mm_slli_si128(x_, 1);
- } else if (bits == 16) {
- return _mm_slli_si128(x_, 2);
- } else if (bits == 32) {
- return _mm_slli_si128(x_, 4);
- } else if (bits == 64) {
- return _mm_slli_si128(x_, 8);
- } else {
- return long_shift_left(bits);
- }
- }
- __forceinline uint128_sse2 operator <<(size_t bits) const {
- return *this << static_cast<int>(bits);
- }
- __forceinline void operator <<=(int bits) {
- *this = *this << bits;
- }
- __forceinline void operator <<=(size_t bits) {
- *this = *this << static_cast<int>(bits);
- }
-
- protected:
- __forceinline uint128_sse2 long_shift_right(int bits) const {
- union {
- __m128i i128;
- uint64 u64[2];
- } x;
- x.i128 = x_;
- for (; bits > 0; --bits) {
- x.u64[0] >>= 1;
- if (x.u64[1] & 1) {
- x.u64[0] |= static_cast<uint64>(1) << 63;
- }
- x.u64[1] >>= 1;
- }
- return x.i128;
- }
-
- __forceinline uint128_sse2 long_shift_left(int bits) const {
- union {
- __m128i i128;
- int64 i64[2];
- } x;
- x.i128 = x_;
- for (; bits > 0; --bits) {
- x.i64[1] <<= 1;
- if (x.i64[0] < 0) {
- x.i64[1] |= 1;
- }
- x.i64[0] <<= 1;
- }
- return x.i128;
- }
-
- __m128i x_;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-
-// Specialized versions.
-template<> __forceinline uint64 Downcast(const uint128_sse2 &x) {
- return x.to_uint64();
-}
-template<> __forceinline uint32 Downcast(const uint128_sse2 &x) {
- return static_cast<uint32>(x.to_uint64());
-}
-template<> __forceinline uint16 Downcast(const uint128_sse2 &x) {
- return static_cast<uint16>(x.to_uint64());
-}
-template<> __forceinline uint8 Downcast(const uint128_sse2 &x) {
- return static_cast<uint8>(x.to_uint64());
-}
-
-template<> __forceinline uint128_sse2 CrcFromUint64(uint64 lo, uint64 hi) {
- union {
- __m128i i128;
- uint64 u64[2];
- } temp;
- temp.u64[0] = lo;
- temp.u64[1] = hi;
- return temp.i128;
-}
-
-template<> __forceinline void Uint64FromCrc(const uint128_sse2 &crc,
- uint64 *lo, uint64 *hi) {
- union {
- __m128i i128;
- uint64 u64[2];
- } temp;
- temp.i128 = crc;
- *lo = temp.u64[0];
- *hi = temp.u64[1];
-}
-
-} // namespace crcutil
-
-#endif // HAVE_SSE2
-
-#endif // CRCUTIL_UINT128_SSE2_H_
+// Copyright 2010 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements a limited set of 128-bit arithmetic operations
+// (the ones that are used by CRC) using SSE2 intrinsics.
+
+#ifndef CRCUTIL_UINT128_SSE2_H_
+#define CRCUTIL_UINT128_SSE2_H_
+
+#include "base_types.h"
+#include "crc_casts.h" // Downcast, CrcFromUint64, Uint64FromCrc
+#include "platform.h"
+
+#if HAVE_SSE2
+
+namespace crcutil {
+
+// Specialized functions handling __m128i.
+template<> __forceinline uint64 Downcast(const __m128i &value) {
+#if HAVE_AMD64 && defined(__GNUC__)
+ // GCC 4.4.x is too smart and, instead of MOVQ, generates SSE4 PEXTRQ
+ // instruction when the code is compiled with -mmsse4.
+ // Fixed in 4.5 which generates conversion through memory (why?).
+ // And -- yes, it makes quite measurable difference.
+ uint64 temp;
+ asm(SSE2_MOVQ " %[i128], %[u64]\n" : [u64] "=r" (temp) : [i128] "x" (value));
+ return temp;
+#elif HAVE_AMD64 && (!defined(_MSC_FULL_VER) || _MSC_FULL_VER > 150030729)
+ return static_cast<uint64>(_mm_cvtsi128_si64(value));
+#else
+ // 64-bit CL 15.00.30729.1 -O2 generates incorrect code (tests fail).
+ // _mm_cvtsi128_si64() is not available on i386.
+ uint64 temp;
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(&temp), value);
+ return temp;
+#endif
+}
+
+
+class uint128_sse2 {
+ public:
+ uint128_sse2() {}
+ ~uint128_sse2() {}
+
+ // Default casts to uint128_sse2 and assignment operator.
+ __forceinline void operator =(uint64 value) {
+#if HAVE_AMD64 && defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 5)
+ // Prevent generation of SSE4 pinsrq insruction when
+ // compiling with GCC 4.4.x with -msse4 flag.
+ asm(SSE2_MOVQ " %[u64], %[i128]\n" : [i128] "=x" (x_) : [u64] "r" (value));
+#elif HAVE_AMD64
+ x_ = _mm_cvtsi64_si128(static_cast<int64>(value));
+#else
+ x_ = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&value));
+#endif
+ }
+ __forceinline uint128_sse2(uint64 x) {
+ *this = x;
+ }
+ __forceinline uint128_sse2(const __m128i x) : x_(x) {
+ }
+ __forceinline operator __m128i() const {
+ return x_;
+ }
+ __forceinline void operator =(const uint128_sse2 &x) {
+ x_ = x.x_;
+ }
+
+ // Extracts 64 less significant bits.
+ __forceinline uint64 to_uint64() const {
+ return Downcast<__m128i, uint64>(x_);
+ }
+
+ // Comparisons.
+ __forceinline bool operator ==(const uint128_sse2 &y) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } t;
+ t.i128 = _mm_xor_si128(x_, y.x_);
+ return (t.u64[0] | t.u64[1]) == 0;
+ }
+ __forceinline bool operator ==(uint64 value) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } t;
+ t.i128 = x_;
+ return (t.u64[0] == value && t.u64[1] == 0);
+ }
+ __forceinline bool operator !=(const uint128_sse2 &y) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } t;
+ t.i128 = _mm_xor_si128(x_, y.x_);
+ return (t.u64[0] | t.u64[1]) != 0;
+ }
+ __forceinline bool operator !=(uint64 value) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } t;
+ t.i128 = x_;
+ return (t.u64[0] != value || t.u64[1] != 0);
+ }
+
+ __forceinline bool operator <(const uint128_sse2 &y) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } xx, yy;
+ xx.i128 = x_;
+ yy.i128 = y.x_;
+ return (xx.u64[0] < yy.u64[0] ||
+ (xx.u64[0] == yy.u64[0] && xx.u64[1] < yy.u64[1]));
+ }
+
+ // Bitwise logic operators.
+ __forceinline uint128_sse2 operator ^(const uint128_sse2 &y) const {
+ return _mm_xor_si128(x_, y.x_);
+ }
+ __forceinline uint128_sse2 operator &(const uint128_sse2 &y) const {
+ return _mm_and_si128(x_, y.x_);
+ }
+ __forceinline uint128_sse2 operator |(const uint128_sse2 &y) const {
+ return _mm_or_si128(x_, y.x_);
+ }
+
+ __forceinline void operator ^=(const uint128_sse2 &y) {
+ *this = *this ^ y.x_;
+ }
+ __forceinline void operator &=(const uint128_sse2 &y) {
+ *this = *this & y.x_;
+ }
+ __forceinline void operator |=(const uint128_sse2 &y) {
+ *this = *this | y.x_;
+ }
+
+ // Arithmetic operators.
+ __forceinline uint128_sse2 operator +(uint64 y) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } temp;
+ temp.i128 = x_;
+ // a + b >= 2**64 iff
+ // a + b > (2**64 - 1) iff
+ // a > (2**64 - 1) - b iff
+ // a > ~b
+ if (temp.u64[0] > ~y) {
+ temp.u64[1] += 1;
+ }
+ temp.u64[0] += y;
+ return temp.i128;
+ }
+ __forceinline void operator +=(uint64 x) {
+ *this = *this + x;
+ }
+ __forceinline uint128_sse2 operator -(uint64 y) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } temp;
+ temp.i128 = x_;
+ if (temp.u64[0] < y) {
+ temp.u64[1] -= 1;
+ }
+ temp.u64[0] -= y;
+ return temp.i128;
+ }
+ __forceinline void operator -=(uint64 x) {
+ *this = *this - x;
+ }
+
+ // Bitwise logical shifts.
+ __forceinline uint128_sse2 operator >>(const int bits) const {
+ if (bits == 8) {
+ return _mm_srli_si128(x_, 1);
+ } else if (bits == 16) {
+ return _mm_srli_si128(x_, 2);
+ } else if (bits == 32) {
+ return _mm_srli_si128(x_, 4);
+ } else if (bits == 64) {
+ return _mm_srli_si128(x_, 8);
+ } else {
+ return long_shift_right(bits);
+ }
+ }
+ __forceinline uint128_sse2 operator >>(const size_t bits) const {
+ return *this >> static_cast<int>(bits);
+ }
+ __forceinline void operator >>=(const int bits) {
+ *this = *this >> bits;
+ }
+ __forceinline void operator >>=(const size_t bits) {
+ *this = *this >> static_cast<int>(bits);
+ }
+
+ __forceinline uint128_sse2 operator <<(int bits) const {
+ if (bits == 8) {
+ return _mm_slli_si128(x_, 1);
+ } else if (bits == 16) {
+ return _mm_slli_si128(x_, 2);
+ } else if (bits == 32) {
+ return _mm_slli_si128(x_, 4);
+ } else if (bits == 64) {
+ return _mm_slli_si128(x_, 8);
+ } else {
+ return long_shift_left(bits);
+ }
+ }
+ __forceinline uint128_sse2 operator <<(size_t bits) const {
+ return *this << static_cast<int>(bits);
+ }
+ __forceinline void operator <<=(int bits) {
+ *this = *this << bits;
+ }
+ __forceinline void operator <<=(size_t bits) {
+ *this = *this << static_cast<int>(bits);
+ }
+
+ protected:
+ __forceinline uint128_sse2 long_shift_right(int bits) const {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } x;
+ x.i128 = x_;
+ for (; bits > 0; --bits) {
+ x.u64[0] >>= 1;
+ if (x.u64[1] & 1) {
+ x.u64[0] |= static_cast<uint64>(1) << 63;
+ }
+ x.u64[1] >>= 1;
+ }
+ return x.i128;
+ }
+
+ __forceinline uint128_sse2 long_shift_left(int bits) const {
+ union {
+ __m128i i128;
+ int64 i64[2];
+ } x;
+ x.i128 = x_;
+ for (; bits > 0; --bits) {
+ x.i64[1] <<= 1;
+ if (x.i64[0] < 0) {
+ x.i64[1] |= 1;
+ }
+ x.i64[0] <<= 1;
+ }
+ return x.i128;
+ }
+
+ __m128i x_;
+} GCC_ALIGN_ATTRIBUTE(16);
+
+
+// Specialized versions.
+template<> __forceinline uint64 Downcast(const uint128_sse2 &x) {
+ return x.to_uint64();
+}
+template<> __forceinline uint32 Downcast(const uint128_sse2 &x) {
+ return static_cast<uint32>(x.to_uint64());
+}
+template<> __forceinline uint16 Downcast(const uint128_sse2 &x) {
+ return static_cast<uint16>(x.to_uint64());
+}
+template<> __forceinline uint8 Downcast(const uint128_sse2 &x) {
+ return static_cast<uint8>(x.to_uint64());
+}
+
+template<> __forceinline uint128_sse2 CrcFromUint64(uint64 lo, uint64 hi) {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } temp;
+ temp.u64[0] = lo;
+ temp.u64[1] = hi;
+ return temp.i128;
+}
+
+template<> __forceinline void Uint64FromCrc(const uint128_sse2 &crc,
+ uint64 *lo, uint64 *hi) {
+ union {
+ __m128i i128;
+ uint64 u64[2];
+ } temp;
+ temp.i128 = crc;
+ *lo = temp.u64[0];
+ *hi = temp.u64[1];
+}
+
+} // namespace crcutil
+
+#endif // HAVE_SSE2
+
+#endif // CRCUTIL_UINT128_SSE2_H_
diff --git a/contrib/libs/crcutil/ya.make b/contrib/libs/crcutil/ya.make
index 2da8ef940f..90340a33be 100644
--- a/contrib/libs/crcutil/ya.make
+++ b/contrib/libs/crcutil/ya.make
@@ -1,5 +1,5 @@
-LIBRARY()
-
+LIBRARY()
+
LICENSE(Apache-2.0)
VERSION(1.0)
@@ -13,10 +13,10 @@ OWNER(
NO_UTIL()
-NO_COMPILER_WARNINGS()
+NO_COMPILER_WARNINGS()
NO_JOIN_SRC()
-
+
IF (GCC AND USE_LTO)
CFLAGS(-DCRCUTIL_FORCE_ASM_CRC32C=1)
ENDIF()
@@ -61,10 +61,10 @@ IF (ARCH_I386 OR ARCH_X86_64)
SRC_CPP_SSE4(crc32c_sse4.cc)
ENDIF()
ENDIF()
-
-SRCS(
- interface.cc
- multiword_64_64_intrinsic_i386_mmx.cc
-)
-
-END()
+
+SRCS(
+ interface.cc
+ multiword_64_64_intrinsic_i386_mmx.cc
+)
+
+END()