aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-14 09:58:56 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /library/cpp
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
downloadydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/clickhouse_deps/h3_compat/README.md3
-rw-r--r--library/cpp/clickhouse_deps/h3_compat/constants.h1
-rw-r--r--library/cpp/clickhouse_deps/h3_compat/h3api.h262
-rw-r--r--library/cpp/clickhouse_deps/h3_compat/ya.make7
-rw-r--r--library/cpp/clickhouse_deps/incbin_stub/incbin.h5
-rw-r--r--library/cpp/clickhouse_deps/incbin_stub/ya.make3
-rw-r--r--library/cpp/clickhouse_deps/re2_st_stub/README.md8
-rw-r--r--library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h3
-rw-r--r--library/cpp/clickhouse_deps/re2_st_stub/ya.make11
-rw-r--r--library/cpp/consistent_hashing/consistent_hashing.cpp119
-rw-r--r--library/cpp/consistent_hashing/consistent_hashing.h16
-rw-r--r--library/cpp/consistent_hashing/ya.make11
12 files changed, 449 insertions, 0 deletions
diff --git a/library/cpp/clickhouse_deps/h3_compat/README.md b/library/cpp/clickhouse_deps/h3_compat/README.md
new file mode 100644
index 0000000000..56e2fce66f
--- /dev/null
+++ b/library/cpp/clickhouse_deps/h3_compat/README.md
@@ -0,0 +1,3 @@
+ClickHouse uses a not stable version of H3 library, which interface is incompatible with both 3.x and 4.x H3 versions.
+
+To avoid having several versions of H3 library in our monorepository, we implement H3 API used in ClickHouse via H3 3.x version.
diff --git a/library/cpp/clickhouse_deps/h3_compat/constants.h b/library/cpp/clickhouse_deps/h3_compat/constants.h
new file mode 100644
index 0000000000..680e7d8323
--- /dev/null
+++ b/library/cpp/clickhouse_deps/h3_compat/constants.h
@@ -0,0 +1 @@
+#include <contrib/libs/h3/h3lib/include/constants.h>
diff --git a/library/cpp/clickhouse_deps/h3_compat/h3api.h b/library/cpp/clickhouse_deps/h3_compat/h3api.h
new file mode 100644
index 0000000000..96201508fa
--- /dev/null
+++ b/library/cpp/clickhouse_deps/h3_compat/h3api.h
@@ -0,0 +1,262 @@
+#pragma once
+
+#include <contrib/libs/h3/h3lib/include/h3api.h>
+
+// TODO(dakovalkov): eliminate it.
+#define lng lon
+
+using LatLng = GeoCoord;
+using CellBoundary = GeoBoundary;
+using H3Error = uint32_t;
+
+enum H3ErrorCodes
+{
+ E_SUCCESS = 0,
+ E_FAILED = 1,
+};
+
+#define H3_NULL 0
+
+inline H3Error latLngToCell(const LatLng *g, int res, H3Index *out)
+{
+ *out = geoToH3(g, res);
+ return *out == H3_NULL ? E_FAILED : E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getHexagonEdgeLengthAvgM(int res, double *out)
+// But CH uses a random commit from H3 master branch which is incompatible with both 3.x and 4.x.
+inline double getHexagonEdgeLengthAvgM(int res)
+{
+ return edgeLengthM(res);
+}
+
+inline int getBaseCellNumber(H3Index h)
+{
+ return h3GetBaseCell(h);
+}
+
+inline int getResolution(H3Index h)
+{
+ return h3GetResolution(h);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getHexagonAreaAvgM2(int res, double *out)
+inline double getHexagonAreaAvgM2(int res)
+{
+ return hexAreaM2(res);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error areNeighborCells(H3Index origin, H3Index destination, int *out)
+inline int areNeighborCells(H3Index origin, H3Index destination)
+{
+ return h3IndexesAreNeighbors(origin, destination);
+}
+
+inline int isValidCell(H3Index h)
+{
+ return h3IsValid(h);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error cellToChildrenSize(H3Index cell, int childRes, int64_t *out)
+inline int64_t cellToChildrenSize(H3Index cell, int childRes)
+{
+ return maxH3ToChildrenSize(cell, childRes);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error cellToChildren(H3Index cell, int childRes, H3Index *children)
+inline H3Error cellToChildren(H3Index cell, int childRes, H3Index *children)
+{
+ h3ToChildren(cell, childRes, children);
+ return *children == H3_NULL ? E_FAILED : E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error cellToParent(H3Index cell, int parentRes, H3Index *parent)
+inline H3Index cellToParent(H3Index cell, int parentRes)
+{
+ return h3ToParent(cell, parentRes);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error maxGridDiskSize(int k, int64_t *out)
+inline int64_t maxGridDiskSize(int k) {
+ return maxKringSize(k);
+}
+
+
+inline H3Error gridDisk(H3Index origin, int k, H3Index* out)
+{
+ kRing(origin, k, out);
+ return *out == H3_NULL ? E_FAILED : E_SUCCESS;
+}
+
+inline H3Error cellToLatLng(H3Index cell, LatLng *g)
+{
+ h3ToGeo(cell, g);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x (is it even possible?).
+ return E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error cellsToDirectedEdge(H3Index origin, H3Index destination, H3Index *out)
+inline H3Index cellsToDirectedEdge(H3Index origin, H3Index destination)
+{
+ return getH3UnidirectionalEdge(origin, destination);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error gridPathCellsSize(H3Index start, H3Index end, int64_t* size);
+inline int64_t gridPathCellsSize(H3Index start, H3Index end)
+{
+ return h3LineSize(start, end);
+}
+
+inline H3Error cellToBoundary(H3Index cell, CellBoundary *bndry)
+{
+ h3ToGeoBoundary(cell, bndry);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x (is it even possible?).
+ return E_SUCCESS;
+}
+
+inline int isResClassIII(H3Index h)
+{
+ return h3IsResClassIII(h);
+}
+
+inline int isPentagon(H3Index h)
+{
+ return h3IsPentagon(h);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getHexagonEdgeLengthAvgKm(int res, double *out)
+inline double getHexagonEdgeLengthAvgKm(int res)
+{
+ return edgeLengthKm(res);
+}
+
+inline H3Error getIcosahedronFaces(H3Index h, int* out)
+{
+ h3GetFaces(h, out);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x (is the error even possible?).
+ return E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getHexagonAreaAvgKm2(int res, double *out)
+inline double getHexagonAreaAvgKm2(int res)
+{
+ return hexAreaKm2(res);
+}
+
+inline H3Error gridPathCells(H3Index start, H3Index end, H3Index* out)
+{
+ return h3Line(start, end, out);
+}
+
+inline int isValidDirectedEdge(H3Index edge)
+{
+ return h3UnidirectionalEdgeIsValid(edge);
+}
+
+inline H3Error originToDirectedEdges(H3Index origin, H3Index* edges)
+{
+ // TODO(dakovalkov): Find out how to implement it via 3.x version.
+ throw "Not implemented";
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getDirectedEdgeDestination(H3Index edge, H3Index *out);
+inline H3Index getDirectedEdgeDestination(H3Index edge)
+{
+ return getDestinationH3IndexFromUnidirectionalEdge(edge);
+}
+
+inline int res0CellCount()
+{
+ return res0IndexCount();
+}
+
+inline H3Error getRes0Cells(H3Index *out)
+{
+ getRes0Indexes(out);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x.
+ return E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getNumCells(int res, int64_t *out);
+inline int64_t getNumCells(int res)
+{
+ return numHexagons(res);
+}
+
+inline H3Error directedEdgeToBoundary(H3Index edge, CellBoundary* gb)
+{
+ getH3UnidirectionalEdgeBoundary(edge, gb);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x.
+ return E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error cellToCenterChild(H3Index cell, int childRes, H3Index *child);
+inline H3Index cellToCenterChild(H3Index cell, int childRes)
+{
+ return h3ToCenterChild(cell, childRes);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// double greatCircleDistanceKm(const LatLng *a, const LatLng *b)
+inline double distanceKm(const LatLng *a, const LatLng *b)
+{
+ return pointDistKm(a, b);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// double greatCircleDistanceM(const LatLng *a, const LatLng *b)
+inline double distanceM(const LatLng *a, const LatLng *b)
+{
+ return pointDistM(a, b);
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// double greatCircleDistanceRads(const LatLng *a, const LatLng *b)
+inline double distanceRads(const LatLng *a, const LatLng *b)
+{
+ return pointDistRads(a, b);
+}
+
+inline H3Error directedEdgeToCells(H3Index edge, H3Index* originDestination)
+{
+ getH3IndexesFromUnidirectionalEdge(edge, originDestination);
+ return *originDestination == H3_NULL ? E_FAILED : E_SUCCESS;
+}
+
+// NOTE(dakovalkov): The real function signature in H3 4.x is
+// H3Error getDirectedEdgeOrigin(H3Index edge, H3Index *out);
+inline H3Index getDirectedEdgeOrigin(H3Index edge)
+{
+ return getOriginH3IndexFromUnidirectionalEdge(edge);
+}
+
+inline int pentagonCount()
+{
+ return pentagonIndexCount();
+}
+
+inline H3Error getPentagons(int res, H3Index *out)
+{
+ getPentagonIndexes(res, out);
+ // NOTE(dakovalkov): There is no way to get the error in 3.x.
+ return E_SUCCESS;
+}
+
+inline H3Error gridRingUnsafe(H3Index origin, int k, H3Index* out)
+{
+ return hexRing(origin, k, out);
+}
diff --git a/library/cpp/clickhouse_deps/h3_compat/ya.make b/library/cpp/clickhouse_deps/h3_compat/ya.make
new file mode 100644
index 0000000000..3a63f92143
--- /dev/null
+++ b/library/cpp/clickhouse_deps/h3_compat/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+PEERDIR(
+ contrib/libs/h3
+)
+
+END()
diff --git a/library/cpp/clickhouse_deps/incbin_stub/incbin.h b/library/cpp/clickhouse_deps/incbin_stub/incbin.h
new file mode 100644
index 0000000000..f5fc17ea14
--- /dev/null
+++ b/library/cpp/clickhouse_deps/incbin_stub/incbin.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define INCBIN(name, file) \
+const unsigned char * g ## name ## Data = nullptr; \
+unsigned int g ## name ## Size = 0;
diff --git a/library/cpp/clickhouse_deps/incbin_stub/ya.make b/library/cpp/clickhouse_deps/incbin_stub/ya.make
new file mode 100644
index 0000000000..9865d255c8
--- /dev/null
+++ b/library/cpp/clickhouse_deps/incbin_stub/ya.make
@@ -0,0 +1,3 @@
+LIBRARY()
+
+END()
diff --git a/library/cpp/clickhouse_deps/re2_st_stub/README.md b/library/cpp/clickhouse_deps/re2_st_stub/README.md
new file mode 100644
index 0000000000..fc8660564d
--- /dev/null
+++ b/library/cpp/clickhouse_deps/re2_st_stub/README.md
@@ -0,0 +1,8 @@
+re2_st is a modification of re2 library, which is generated during ClickHouse build:
+
+https://github.com/ClickHouse/ClickHouse/blob/master/contrib/re2-cmake/CMakeLists.txt#L52
+https://github.com/ClickHouse/ClickHouse/blob/master/contrib/re2-cmake/re2_transform.cmake
+
+This library mimics re2_st interface via original re2 library, so we can build ClickHouse properly in Arcadia.
+
+This library should not be used outside of ClickHouse build.
diff --git a/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h b/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h
new file mode 100644
index 0000000000..48c2b6915b
--- /dev/null
+++ b/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h
@@ -0,0 +1,3 @@
+#include <contrib/libs/re2/re2/re2.h>
+
+namespace re2_st = re2;
diff --git a/library/cpp/clickhouse_deps/re2_st_stub/ya.make b/library/cpp/clickhouse_deps/re2_st_stub/ya.make
new file mode 100644
index 0000000000..6cfb279469
--- /dev/null
+++ b/library/cpp/clickhouse_deps/re2_st_stub/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+PEERDIR(
+ contrib/libs/re2
+)
+
+ADDINCL(
+ GLOBAL library/cpp/clickhouse_deps/re2_st_stub/include
+)
+
+END()
diff --git a/library/cpp/consistent_hashing/consistent_hashing.cpp b/library/cpp/consistent_hashing/consistent_hashing.cpp
new file mode 100644
index 0000000000..77ef3898ff
--- /dev/null
+++ b/library/cpp/consistent_hashing/consistent_hashing.cpp
@@ -0,0 +1,119 @@
+#include "consistent_hashing.h"
+
+#include <library/cpp/pop_count/popcount.h>
+
+#include <util/generic/bitops.h>
+
+/*
+ * (all numbers are written in big-endian manner: the least significant digit on the right)
+ * (only bit representations are used - no hex or octal, leading zeroes are ommited)
+ *
+ * Consistent hashing scheme:
+ *
+ * (sizeof(TValue) * 8, y] (y, 0]
+ * a = * ablock
+ * b = * cblock
+ *
+ * (sizeof(TValue) * 8, k] (k, 0]
+ * c = * cblock
+ *
+ * d = *
+ *
+ * k - is determined by 2^(k-1) < n <= 2^k inequality
+ * z - is number of ones in cblock
+ * y - number of digits after first one in cblock
+ *
+ * The cblock determines logic of using a- and b- blocks:
+ *
+ * bits of cblock | result of a function
+ * 0 : 0
+ * 1 : 1 (optimization, the next case includes this one)
+ * 1?..? : 1ablock (z is even) or 1bblock (z is odd) if possible (<n)
+ *
+ * If last case is not possible (>=n), than smooth moving from n=2^(k-1) to n=2^k is applied.
+ * Using "*" bits of a-,b-,c-,d- blocks ui64 value is combined, modulo of which determines
+ * if the value should be greather than 2^(k-1) or ConsistentHashing(x, 2^(k-1)) should be used.
+ * The last case is optimized according to previous checks.
+ */
+
+namespace {
+ ui64 PowerOf2(size_t k) {
+ return (ui64)0x1 << k;
+ }
+
+ template <class TValue>
+ TValue SelectAOrBBlock(TValue a, TValue b, TValue cBlock) {
+ size_t z = PopCount<unsigned long long>(cBlock);
+ bool useABlock = z % 2 == 0;
+ return useABlock ? a : b;
+ }
+
+ // Gets the exact result for n = k2 = 2 ^ k
+ template <class TValue>
+ size_t ConsistentHashingForPowersOf2(TValue a, TValue b, TValue c, ui64 k2) {
+ TValue cBlock = c & (k2 - 1); // (k, 0] bits of c
+ // Zero and one cases
+ if (cBlock < 2) {
+ // First two cases of result function table: 0 if cblock is 0, 1 if cblock is 1.
+ return cBlock;
+ }
+ size_t y = GetValueBitCount<unsigned long long>(cBlock) - 1; // cblock = 0..01?..? (y = number of digits after 1), y > 0
+ ui64 y2 = PowerOf2(y); // y2 = 2^y
+ TValue abBlock = SelectAOrBBlock(a, b, cBlock) & (y2 - 1);
+ return y2 + abBlock;
+ }
+
+ template <class TValue>
+ ui64 GetAsteriskBits(TValue a, TValue b, TValue c, TValue d, size_t k) {
+ size_t shift = sizeof(TValue) * 8 - k;
+ ui64 res = (d << shift) | (c >> k);
+ ++shift;
+ res <<= shift;
+ res |= b >> (k - 1);
+ res <<= shift;
+ res |= a >> (k - 1);
+
+ return res;
+ }
+
+ template <class TValue>
+ size_t ConsistentHashingImpl(TValue a, TValue b, TValue c, TValue d, size_t n) {
+ Y_ABORT_UNLESS(n > 0, "Can't map consistently to a zero values.");
+ // Uninteresting case
+ if (n == 1) {
+ return 0;
+ }
+ size_t k = GetValueBitCount(n - 1); // 2^(k-1) < n <= 2^k, k >= 1
+ ui64 k2 = PowerOf2(k); // k2 = 2^k
+ size_t largeValue;
+ {
+ // Bit determined variant. Large scheme.
+ largeValue = ConsistentHashingForPowersOf2(a, b, c, k2);
+ if (largeValue < n) {
+ return largeValue;
+ }
+ }
+ // Since largeValue is not assigned yet
+ // Smooth moving from one bit scheme to another
+ ui64 k21 = PowerOf2(k - 1);
+ {
+ size_t s = GetAsteriskBits(a, b, c, d, k) % (largeValue * (largeValue + 1));
+ size_t largeValue2 = s / k2 + k21;
+ if (largeValue2 < n) {
+ return largeValue2;
+ }
+ }
+ // Bit determined variant. Short scheme.
+ return ConsistentHashingForPowersOf2(a, b, c, k21); // Do not apply checks. It is always less than k21 = 2^(k-1)
+ }
+
+}
+
+size_t ConsistentHashing(ui64 x, size_t n) {
+ ui32 lo = Lo32(x);
+ ui32 hi = Hi32(x);
+ return ConsistentHashingImpl<ui16>(Lo16(lo), Hi16(lo), Lo16(hi), Hi16(hi), n);
+}
+size_t ConsistentHashing(ui64 lo, ui64 hi, size_t n) {
+ return ConsistentHashingImpl<ui32>(Lo32(lo), Hi32(lo), Lo32(hi), Hi32(hi), n);
+}
diff --git a/library/cpp/consistent_hashing/consistent_hashing.h b/library/cpp/consistent_hashing/consistent_hashing.h
new file mode 100644
index 0000000000..8e4d299150
--- /dev/null
+++ b/library/cpp/consistent_hashing/consistent_hashing.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+/*
+ * Maps random ui64 x (in fact hash of some string) to n baskets/shards.
+ * Output value is id of a basket. 0 <= ConsistentHashing(x, n) < n.
+ * Probability of all baskets must be equal. Also, it should be consistent
+ * in terms, that with different n_1 < n_2 probability of
+ * ConsistentHashing(x, n_1) != ConsistentHashing(x, n_2) must be equal to
+ * (n_2 - n_1) / n_2 - the least possible with previous conditions.
+ * It requires O(1) memory and cpu to calculate. So, it is faster than classic
+ * consistent hashing algos with points on circle.
+ */
+size_t ConsistentHashing(ui64 x, size_t n); // Works good for n < 65536
+size_t ConsistentHashing(ui64 lo, ui64 hi, size_t n); // Works good for n < 4294967296
diff --git a/library/cpp/consistent_hashing/ya.make b/library/cpp/consistent_hashing/ya.make
new file mode 100644
index 0000000000..b7e9e25426
--- /dev/null
+++ b/library/cpp/consistent_hashing/ya.make
@@ -0,0 +1,11 @@
+LIBRARY(consistent-hashing)
+
+PEERDIR(
+ library/cpp/pop_count
+)
+
+SRCS(
+ consistent_hashing.cpp
+)
+
+END()