diff options
author | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 12:29:46 +0300 |
---|---|---|
committer | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 13:14:22 +0300 |
commit | 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch) | |
tree | a8fb3181d5947c0d78cf402aa56e686130179049 /contrib/go/_std_1.22/src/hash/maphash | |
parent | a44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff) | |
download | ydb-9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80.tar.gz |
publishFullContrib: true for ydb
<HIDDEN_URL>
commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/go/_std_1.22/src/hash/maphash')
-rw-r--r-- | contrib/go/_std_1.22/src/hash/maphash/maphash.go | 277 | ||||
-rw-r--r-- | contrib/go/_std_1.22/src/hash/maphash/maphash_purego.go | 104 | ||||
-rw-r--r-- | contrib/go/_std_1.22/src/hash/maphash/maphash_runtime.go | 43 | ||||
-rw-r--r-- | contrib/go/_std_1.22/src/hash/maphash/ya.make | 10 |
4 files changed, 434 insertions, 0 deletions
diff --git a/contrib/go/_std_1.22/src/hash/maphash/maphash.go b/contrib/go/_std_1.22/src/hash/maphash/maphash.go new file mode 100644 index 0000000000..1e70a279f5 --- /dev/null +++ b/contrib/go/_std_1.22/src/hash/maphash/maphash.go @@ -0,0 +1,277 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package maphash provides hash functions on byte sequences. +// These hash functions are intended to be used to implement hash tables or +// other data structures that need to map arbitrary strings or byte +// sequences to a uniform distribution on unsigned 64-bit integers. +// Each different instance of a hash table or data structure should use its own [Seed]. +// +// The hash functions are not cryptographically secure. +// (See crypto/sha256 and crypto/sha512 for cryptographic use.) +package maphash + +// A Seed is a random value that selects the specific hash function +// computed by a [Hash]. If two Hashes use the same Seeds, they +// will compute the same hash values for any given input. +// If two Hashes use different Seeds, they are very likely to compute +// distinct hash values for any given input. +// +// A Seed must be initialized by calling [MakeSeed]. +// The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. +// +// Each Seed value is local to a single process and cannot be serialized +// or otherwise recreated in a different process. +type Seed struct { + s uint64 +} + +// Bytes returns the hash of b with the given seed. +// +// Bytes is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.Write(b) +// return h.Sum64() +func Bytes(seed Seed, b []byte) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + + if len(b) > bufSize { + b = b[:len(b):len(b)] // merge len and cap calculations when reslicing + for len(b) > bufSize { + state = rthash(b[:bufSize], state) + b = b[bufSize:] + } + } + return rthash(b, state) +} + +// String returns the hash of s with the given seed. +// +// String is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.WriteString(s) +// return h.Sum64() +func String(seed Seed, s string) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + for len(s) > bufSize { + state = rthashString(s[:bufSize], state) + s = s[bufSize:] + } + return rthashString(s, state) +} + +// A Hash computes a seeded hash of a byte sequence. +// +// The zero Hash is a valid Hash ready to use. +// A zero Hash chooses a random seed for itself during +// the first call to a Reset, Write, Seed, or Sum64 method. +// For control over the seed, use SetSeed. +// +// The computed hash values depend only on the initial seed and +// the sequence of bytes provided to the Hash object, not on the way +// in which the bytes are provided. For example, the three sequences +// +// h.Write([]byte{'f','o','o'}) +// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') +// h.WriteString("foo") +// +// all have the same effect. +// +// Hashes are intended to be collision-resistant, even for situations +// where an adversary controls the byte sequences being hashed. +// +// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. +// If multiple goroutines must compute the same seeded hash, +// each can declare its own Hash and call SetSeed with a common Seed. +type Hash struct { + _ [0]func() // not comparable + seed Seed // initial seed used for this hash + state Seed // current hash of all flushed bytes + buf [bufSize]byte // unflushed byte buffer + n int // number of unflushed bytes +} + +// bufSize is the size of the Hash write buffer. +// The buffer ensures that writes depend only on the sequence of bytes, +// not the sequence of WriteByte/Write/WriteString calls, +// by always calling rthash with a full buffer (except for the tail). +const bufSize = 128 + +// initSeed seeds the hash if necessary. +// initSeed is called lazily before any operation that actually uses h.seed/h.state. +// Note that this does not include Write/WriteByte/WriteString in the case +// where they only add to h.buf. (If they write too much, they call h.flush, +// which does call h.initSeed.) +func (h *Hash) initSeed() { + if h.seed.s == 0 { + seed := MakeSeed() + h.seed = seed + h.state = seed + } +} + +// WriteByte adds b to the sequence of bytes hashed by h. +// It never fails; the error result is for implementing [io.ByteWriter]. +func (h *Hash) WriteByte(b byte) error { + if h.n == len(h.buf) { + h.flush() + } + h.buf[h.n] = b + h.n++ + return nil +} + +// Write adds b to the sequence of bytes hashed by h. +// It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. +func (h *Hash) Write(b []byte) (int, error) { + size := len(b) + // Deal with bytes left over in h.buf. + // h.n <= bufSize is always true. + // Checking it is ~free and it lets the compiler eliminate a bounds check. + if h.n > 0 && h.n <= bufSize { + k := copy(h.buf[h.n:], b) + h.n += k + if h.n < bufSize { + // Copied the entirety of b to h.buf. + return size, nil + } + b = b[k:] + h.flush() + // No need to set h.n = 0 here; it happens just before exit. + } + // Process as many full buffers as possible, without copying, and calling initSeed only once. + if len(b) > bufSize { + h.initSeed() + for len(b) > bufSize { + h.state.s = rthash(b[:bufSize], h.state.s) + b = b[bufSize:] + } + } + // Copy the tail. + copy(h.buf[:], b) + h.n = len(b) + return size, nil +} + +// WriteString adds the bytes of s to the sequence of bytes hashed by h. +// It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. +func (h *Hash) WriteString(s string) (int, error) { + // WriteString mirrors Write. See Write for comments. + size := len(s) + if h.n > 0 && h.n <= bufSize { + k := copy(h.buf[h.n:], s) + h.n += k + if h.n < bufSize { + return size, nil + } + s = s[k:] + h.flush() + } + if len(s) > bufSize { + h.initSeed() + for len(s) > bufSize { + h.state.s = rthashString(s[:bufSize], h.state.s) + s = s[bufSize:] + } + } + copy(h.buf[:], s) + h.n = len(s) + return size, nil +} + +// Seed returns h's seed value. +func (h *Hash) Seed() Seed { + h.initSeed() + return h.seed +} + +// SetSeed sets h to use seed, which must have been returned by [MakeSeed] +// or by another [Hash.Seed] method. +// Two [Hash] objects with the same seed behave identically. +// Two [Hash] objects with different seeds will very likely behave differently. +// Any bytes added to h before this call will be discarded. +func (h *Hash) SetSeed(seed Seed) { + if seed.s == 0 { + panic("maphash: use of uninitialized Seed") + } + h.seed = seed + h.state = seed + h.n = 0 +} + +// Reset discards all bytes added to h. +// (The seed remains the same.) +func (h *Hash) Reset() { + h.initSeed() + h.state = h.seed + h.n = 0 +} + +// precondition: buffer is full. +func (h *Hash) flush() { + if h.n != len(h.buf) { + panic("maphash: flush of partially full buffer") + } + h.initSeed() + h.state.s = rthash(h.buf[:h.n], h.state.s) + h.n = 0 +} + +// Sum64 returns h's current 64-bit value, which depends on +// h's seed and the sequence of bytes added to h since the +// last call to [Hash.Reset] or [Hash.SetSeed]. +// +// All bits of the Sum64 result are close to uniformly and +// independently distributed, so it can be safely reduced +// by using bit masking, shifting, or modular arithmetic. +func (h *Hash) Sum64() uint64 { + h.initSeed() + return rthash(h.buf[:h.n], h.state.s) +} + +// MakeSeed returns a new random seed. +func MakeSeed() Seed { + var s uint64 + for { + s = randUint64() + // We use seed 0 to indicate an uninitialized seed/hash, + // so keep trying until we get a non-zero seed. + if s != 0 { + break + } + } + return Seed{s: s} +} + +// Sum appends the hash's current 64-bit value to b. +// It exists for implementing [hash.Hash]. +// For direct calls, it is more efficient to use [Hash.Sum64]. +func (h *Hash) Sum(b []byte) []byte { + x := h.Sum64() + return append(b, + byte(x>>0), + byte(x>>8), + byte(x>>16), + byte(x>>24), + byte(x>>32), + byte(x>>40), + byte(x>>48), + byte(x>>56)) +} + +// Size returns h's hash value size, 8 bytes. +func (h *Hash) Size() int { return 8 } + +// BlockSize returns h's block size. +func (h *Hash) BlockSize() int { return len(h.buf) } diff --git a/contrib/go/_std_1.22/src/hash/maphash/maphash_purego.go b/contrib/go/_std_1.22/src/hash/maphash/maphash_purego.go new file mode 100644 index 0000000000..d49a44ae64 --- /dev/null +++ b/contrib/go/_std_1.22/src/hash/maphash/maphash_purego.go @@ -0,0 +1,104 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build purego + +package maphash + +import ( + "crypto/rand" + "math/bits" +) + +func rthash(buf []byte, seed uint64) uint64 { + if len(buf) == 0 { + return seed + } + return wyhash(buf, seed, uint64(len(buf))) +} + +func rthashString(s string, state uint64) uint64 { + return rthash([]byte(s), state) +} + +func randUint64() uint64 { + buf := make([]byte, 8) + _, _ = rand.Read(buf) + return leUint64(buf) +} + +// This is a port of wyhash implementation in runtime/hash64.go, +// without using unsafe for purego. + +const ( + m1 = 0xa0761d6478bd642f + m2 = 0xe7037ed1a0b428db + m3 = 0x8ebc6af09c88c6e3 + m4 = 0x589965cc75374cc3 + m5 = 0x1d8e4e27c47d124f +) + +func wyhash(key []byte, seed, len uint64) uint64 { + p := key + i := len + var a, b uint64 + seed ^= m1 + + if i > 16 { + if i > 48 { + seed1 := seed + seed2 := seed + for ; i > 48; i -= 48 { + seed = mix(r8(p)^m2, r8(p[8:])^seed) + seed1 = mix(r8(p[16:])^m3, r8(p[24:])^seed1) + seed2 = mix(r8(p[32:])^m4, r8(p[40:])^seed2) + p = p[48:] + } + seed ^= seed1 ^ seed2 + } + for ; i > 16; i -= 16 { + seed = mix(r8(p)^m2, r8(p[8:])^seed) + p = p[16:] + } + } + switch { + case i == 0: + return seed + case i < 4: + a = r3(p, i) + default: + n := (i >> 3) << 2 + a = r4(p)<<32 | r4(p[n:]) + b = r4(p[i-4:])<<32 | r4(p[i-4-n:]) + } + return mix(m5^len, mix(a^m2, b^seed)) +} + +func r3(p []byte, k uint64) uint64 { + return (uint64(p[0]) << 16) | (uint64(p[k>>1]) << 8) | uint64(p[k-1]) +} + +func r4(p []byte) uint64 { + return uint64(leUint32(p)) +} + +func r8(p []byte) uint64 { + return leUint64(p) +} + +func mix(a, b uint64) uint64 { + hi, lo := bits.Mul64(a, b) + return hi ^ lo +} + +func leUint32(b []byte) uint32 { + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func leUint64(b []byte) uint64 { + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} diff --git a/contrib/go/_std_1.22/src/hash/maphash/maphash_runtime.go b/contrib/go/_std_1.22/src/hash/maphash/maphash_runtime.go new file mode 100644 index 0000000000..b831df2cf4 --- /dev/null +++ b/contrib/go/_std_1.22/src/hash/maphash/maphash_runtime.go @@ -0,0 +1,43 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !purego + +package maphash + +import ( + "unsafe" +) + +//go:linkname runtime_rand runtime.rand +func runtime_rand() uint64 + +//go:linkname runtime_memhash runtime.memhash +//go:noescape +func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr + +func rthash(buf []byte, seed uint64) uint64 { + if len(buf) == 0 { + return seed + } + len := len(buf) + // The runtime hasher only works on uintptr. For 64-bit + // architectures, we use the hasher directly. Otherwise, + // we use two parallel hashers on the lower and upper 32 bits. + if unsafe.Sizeof(uintptr(0)) == 8 { + return uint64(runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len))) + } + lo := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len)) + hi := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed>>32), uintptr(len)) + return uint64(hi)<<32 | uint64(lo) +} + +func rthashString(s string, state uint64) uint64 { + buf := unsafe.Slice(unsafe.StringData(s), len(s)) + return rthash(buf, state) +} + +func randUint64() uint64 { + return runtime_rand() +} diff --git a/contrib/go/_std_1.22/src/hash/maphash/ya.make b/contrib/go/_std_1.22/src/hash/maphash/ya.make new file mode 100644 index 0000000000..e923b35d40 --- /dev/null +++ b/contrib/go/_std_1.22/src/hash/maphash/ya.make @@ -0,0 +1,10 @@ +SUBSCRIBER(g:contrib) + +GO_LIBRARY() +IF (TRUE) + SRCS( + maphash.go + maphash_runtime.go + ) +ENDIF() +END() |