aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/go-faster/city/ch_128.go
blob: a69c015887b99812467013e4d3594d018905c08b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package city

import "encoding/binary"

// A subroutine for CH128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Mumur.
func chMurmur(s []byte, seed U128) U128 {
	length := len(s)
	a := seed.Low
	b := seed.High
	c := uint64(0)
	d := uint64(0)
	l := length - 16
	if len(s) <= 16 { // length <= 16
		a = shiftMix(a*k1) * k1
		c = b*k1 + ch0to16(s, length)

		if length >= 8 {
			d = shiftMix(a + binary.LittleEndian.Uint64(s))
		} else {
			d = shiftMix(a + c)
		}
	} else { // length > 16
		c = ch16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
		d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
		a += d

		{
			a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
			a *= k1
			b ^= a
			c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
			c *= k1
			d ^= c
			s = s[16:]
			l -= 16
		}

		if l > 0 {
			for len(s) >= 16 {
				a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
				a *= k1
				b ^= a
				c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
				c *= k1
				d ^= c
				s = s[16:]
				l -= 16

				if l <= 0 {
					break
				}
			}
		}
	}
	a = ch16(a, c)
	b = ch16(d, b)
	return U128{a ^ b, ch16(b, a)}
}

// CH128 returns 128-bit ClickHouse CityHash.
func CH128(s []byte) U128 {
	if len(s) >= 16 {
		return CH128Seed(s[16:], U128{
			Low:  binary.LittleEndian.Uint64(s[0:8:8]) ^ k3,
			High: binary.LittleEndian.Uint64(s[8 : 8+8 : 8+8]),
		})
	}
	if len(s) >= 8 {
		l := uint64(len(s))
		return CH128Seed(nil, U128{
			Low:  binary.LittleEndian.Uint64(s) ^ (l * k0),
			High: binary.LittleEndian.Uint64(s[l-8:]) ^ k1,
		})
	}
	return CH128Seed(s, U128{Low: k0, High: k1})
}

// CH128Seed returns 128-bit seeded ClickHouse CityHash.
func CH128Seed(s []byte, seed U128) U128 {
	if len(s) < 128 {
		return chMurmur(s, seed)
	}

	// Saving initial input for tail hashing.
	t := s

	// We expect len >= 128 to be the common case. Keep 56 bytes of state:
	// v, w, x, y and z.
	var v, w U128
	x := seed.Low
	y := seed.High
	z := uint64(len(s)) * k1

	{
		subSlice := (*[96]byte)(s[0:])
		v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(subSlice[0:])
		v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(subSlice[8:])
		w.Low = rot64(y+z, 35)*k1 + x
		w.High = rot64(x+binary.LittleEndian.Uint64(subSlice[88:]), 53) * k1
	}

	// This is the same inner loop as CH64(), manually unrolled.
	for len(s) >= 128 {
		// Roll 1.
		{
			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:16+8:16+8]), 37) * k1
			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:48+8:48+8]), 42) * k1

			x ^= w.High
			y ^= v.Low

			z = rot64(z^w.Low, 33)
			v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
			w = weakHash32SeedsByte(s[32:], z+w.High, y)
			z, x = x, z
		}

		// Roll 2.
		{
			const offset = 64
			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:offset+16+8:offset+16+8]), 37) * k1
			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:offset+48+8:offset+48+8]), 42) * k1
			x ^= w.High
			y ^= v.Low

			z = rot64(z^w.Low, 33)
			v = weakHash32SeedsByte(s[offset:], v.High*k1, x+w.Low)
			w = weakHash32SeedsByte(s[offset+32:], z+w.High, y)
			z, x = x, z
		}
		s = s[128:]
	}

	y += rot64(w.Low, 37)*k0 + z
	x += rot64(v.Low+z, 49) * k0

	// If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
	for i := 0; i < len(s); {
		i += 32
		y = rot64(y-x, 42)*k0 + v.High
		w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
		x = rot64(x, 49)*k0 + w.Low
		w.Low += v.Low
		v = weakHash32SeedsByte(t[len(t)-i:], v.Low, v.High)
	}

	// At this point our 48 bytes of state should contain more than
	// enough information for a strong 128-bit hash.  We use two
	// different 48-byte-to-8-byte hashes to get a 16-byte final result.
	x = ch16(x, v.Low)
	y = ch16(y, w.Low)

	return U128{
		Low:  ch16(x+v.High, w.High) + y,
		High: ch16(x+w.High, y+v.High),
	}
}