aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/go-faster/city/128.go
blob: bccf81bf6063454bf54f7a7501bff1dd110e9c5f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package city

import "encoding/binary"

// much faster than uint64[2]

// U128 is uint128.
type U128 struct {
	Low  uint64 // first 64 bits
	High uint64 // last 64 bits
}

// A subroutine for Hash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Mumur.
func cityMurmur(s []byte, seed U128) U128 {
	length := len(s)
	a := seed.Low
	b := seed.High
	c := uint64(0)
	d := uint64(0)
	l := length - 16
	if l <= 0 { // length <= 16
		a = shiftMix(a*k1) * k1
		c = b*k1 + hash0to16(s, length)

		tmp := c
		if length >= 8 {
			tmp = binary.LittleEndian.Uint64(s)
		}
		d = shiftMix(a + tmp)
	} else { // length > 16
		c = hash16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
		d = hash16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
		a += d
		for {
			a ^= shiftMix(binary.LittleEndian.Uint64(s)*k1) * k1
			a *= k1
			b ^= a
			c ^= shiftMix(binary.LittleEndian.Uint64(s[8:])*k1) * k1
			c *= k1
			d ^= c
			s = s[16:]
			l -= 16
			if l <= 0 {
				break
			}
		}
	}
	a = hash16(a, c)
	b = hash16(d, b)
	return U128{a ^ b, hash16(b, a)}
}

// Hash128Seed return a 128-bit hash with a seed.
func Hash128Seed(s []byte, seed U128) U128 {
	if len(s) < 128 {
		return cityMurmur(s, seed)
	}

	// Saving initial input for tail hashing.
	t := s

	// We expect len >= 128 to be the common case. Keep 56 bytes of state:
	// v, w, x, y and z.
	var v, w U128
	x := seed.Low
	y := seed.High
	z := uint64(len(s)) * k1

	v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(s)
	v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(s[8:])
	w.Low = rot64(y+z, 35)*k1 + x
	w.High = rot64(x+binary.LittleEndian.Uint64(s[88:]), 53) * k1

	// This is the same inner loop as Hash64(), manually unrolled.
	for len(s) >= 128 {
		// Roll 1.
		x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[8:]), 37) * k1
		y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
		x ^= w.High
		y += v.Low + binary.LittleEndian.Uint64(s[40:])
		z = rot64(z+w.Low, 33) * k1
		v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
		w = weakHash32SeedsByte(s[32:], z+w.High, y+binary.LittleEndian.Uint64(s[16:]))
		z, x = x, z
		s = s[64:]

		// Roll 2.
		x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[8:]), 37) * k1
		y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
		x ^= w.High
		y += v.Low + binary.LittleEndian.Uint64(s[40:])
		z = rot64(z+w.Low, 33) * k1
		v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
		w = weakHash32SeedsByte(s[32:], z+w.High, y+binary.LittleEndian.Uint64(s[16:]))
		z, x = x, z
		s = s[64:]
	}

	x += rot64(v.Low+z, 49) * k0
	y = y*k0 + rot64(w.High, 37)
	z = z*k0 + rot64(w.Low, 27)
	w.Low *= 9
	v.Low *= k0

	// If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
	for i := 0; i < len(s); {
		i += 32
		y = rot64(x+y, 42)*k0 + v.High
		w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
		x = x*k0 + w.Low
		z += w.High + binary.LittleEndian.Uint64(t[len(t)-i:])
		w.High += v.Low
		v = weakHash32SeedsByte(t[len(t)-i:], v.Low+z, v.High)
		v.Low *= k0
	}

	// At this point our 56 bytes of state should contain more than
	// enough information for a strong 128-bit hash. We use two different
	// 56-byte-to-8-byte hashes to get a 16-byte final result.
	x = hash16(x, v.Low)
	y = hash16(y+z, w.Low)

	return U128{
		Low:  hash16(x+v.High, w.High) + y,
		High: hash16(x+w.High, y+v.High),
	}
}

// Hash128 returns a 128-bit hash and are tuned for strings of at least
// a few hundred bytes.  Depending on your compiler and hardware,
// it's likely faster than Hash64() on sufficiently long strings.
// It's slower than necessary on shorter strings, but we expect
// that case to be relatively unimportant.
func Hash128(s []byte) U128 {
	if len(s) >= 16 {
		return Hash128Seed(s[16:], U128{
			Low:  binary.LittleEndian.Uint64(s),
			High: binary.LittleEndian.Uint64(s[8:]) + k0},
		)
	}
	return Hash128Seed(s, U128{Low: k0, High: k1})
}