diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /library/cpp | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'library/cpp')
-rw-r--r-- | library/cpp/clickhouse_deps/h3_compat/README.md | 3 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/h3_compat/constants.h | 1 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/h3_compat/h3api.h | 262 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/h3_compat/ya.make | 7 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/incbin_stub/incbin.h | 5 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/incbin_stub/ya.make | 3 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/re2_st_stub/README.md | 8 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h | 3 | ||||
-rw-r--r-- | library/cpp/clickhouse_deps/re2_st_stub/ya.make | 11 | ||||
-rw-r--r-- | library/cpp/consistent_hashing/consistent_hashing.cpp | 119 | ||||
-rw-r--r-- | library/cpp/consistent_hashing/consistent_hashing.h | 16 | ||||
-rw-r--r-- | library/cpp/consistent_hashing/ya.make | 11 |
12 files changed, 449 insertions, 0 deletions
diff --git a/library/cpp/clickhouse_deps/h3_compat/README.md b/library/cpp/clickhouse_deps/h3_compat/README.md new file mode 100644 index 0000000000..56e2fce66f --- /dev/null +++ b/library/cpp/clickhouse_deps/h3_compat/README.md @@ -0,0 +1,3 @@ +ClickHouse uses a not stable version of H3 library, which interface is incompatible with both 3.x and 4.x H3 versions. + +To avoid having several versions of H3 library in our monorepository, we implement H3 API used in ClickHouse via H3 3.x version. diff --git a/library/cpp/clickhouse_deps/h3_compat/constants.h b/library/cpp/clickhouse_deps/h3_compat/constants.h new file mode 100644 index 0000000000..680e7d8323 --- /dev/null +++ b/library/cpp/clickhouse_deps/h3_compat/constants.h @@ -0,0 +1 @@ +#include <contrib/libs/h3/h3lib/include/constants.h> diff --git a/library/cpp/clickhouse_deps/h3_compat/h3api.h b/library/cpp/clickhouse_deps/h3_compat/h3api.h new file mode 100644 index 0000000000..96201508fa --- /dev/null +++ b/library/cpp/clickhouse_deps/h3_compat/h3api.h @@ -0,0 +1,262 @@ +#pragma once + +#include <contrib/libs/h3/h3lib/include/h3api.h> + +// TODO(dakovalkov): eliminate it. +#define lng lon + +using LatLng = GeoCoord; +using CellBoundary = GeoBoundary; +using H3Error = uint32_t; + +enum H3ErrorCodes +{ + E_SUCCESS = 0, + E_FAILED = 1, +}; + +#define H3_NULL 0 + +inline H3Error latLngToCell(const LatLng *g, int res, H3Index *out) +{ + *out = geoToH3(g, res); + return *out == H3_NULL ? E_FAILED : E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getHexagonEdgeLengthAvgM(int res, double *out) +// But CH uses a random commit from H3 master branch which is incompatible with both 3.x and 4.x. +inline double getHexagonEdgeLengthAvgM(int res) +{ + return edgeLengthM(res); +} + +inline int getBaseCellNumber(H3Index h) +{ + return h3GetBaseCell(h); +} + +inline int getResolution(H3Index h) +{ + return h3GetResolution(h); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getHexagonAreaAvgM2(int res, double *out) +inline double getHexagonAreaAvgM2(int res) +{ + return hexAreaM2(res); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error areNeighborCells(H3Index origin, H3Index destination, int *out) +inline int areNeighborCells(H3Index origin, H3Index destination) +{ + return h3IndexesAreNeighbors(origin, destination); +} + +inline int isValidCell(H3Index h) +{ + return h3IsValid(h); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error cellToChildrenSize(H3Index cell, int childRes, int64_t *out) +inline int64_t cellToChildrenSize(H3Index cell, int childRes) +{ + return maxH3ToChildrenSize(cell, childRes); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error cellToChildren(H3Index cell, int childRes, H3Index *children) +inline H3Error cellToChildren(H3Index cell, int childRes, H3Index *children) +{ + h3ToChildren(cell, childRes, children); + return *children == H3_NULL ? E_FAILED : E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error cellToParent(H3Index cell, int parentRes, H3Index *parent) +inline H3Index cellToParent(H3Index cell, int parentRes) +{ + return h3ToParent(cell, parentRes); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error maxGridDiskSize(int k, int64_t *out) +inline int64_t maxGridDiskSize(int k) { + return maxKringSize(k); +} + + +inline H3Error gridDisk(H3Index origin, int k, H3Index* out) +{ + kRing(origin, k, out); + return *out == H3_NULL ? E_FAILED : E_SUCCESS; +} + +inline H3Error cellToLatLng(H3Index cell, LatLng *g) +{ + h3ToGeo(cell, g); + // NOTE(dakovalkov): There is no way to get the error in 3.x (is it even possible?). + return E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error cellsToDirectedEdge(H3Index origin, H3Index destination, H3Index *out) +inline H3Index cellsToDirectedEdge(H3Index origin, H3Index destination) +{ + return getH3UnidirectionalEdge(origin, destination); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error gridPathCellsSize(H3Index start, H3Index end, int64_t* size); +inline int64_t gridPathCellsSize(H3Index start, H3Index end) +{ + return h3LineSize(start, end); +} + +inline H3Error cellToBoundary(H3Index cell, CellBoundary *bndry) +{ + h3ToGeoBoundary(cell, bndry); + // NOTE(dakovalkov): There is no way to get the error in 3.x (is it even possible?). + return E_SUCCESS; +} + +inline int isResClassIII(H3Index h) +{ + return h3IsResClassIII(h); +} + +inline int isPentagon(H3Index h) +{ + return h3IsPentagon(h); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getHexagonEdgeLengthAvgKm(int res, double *out) +inline double getHexagonEdgeLengthAvgKm(int res) +{ + return edgeLengthKm(res); +} + +inline H3Error getIcosahedronFaces(H3Index h, int* out) +{ + h3GetFaces(h, out); + // NOTE(dakovalkov): There is no way to get the error in 3.x (is the error even possible?). + return E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getHexagonAreaAvgKm2(int res, double *out) +inline double getHexagonAreaAvgKm2(int res) +{ + return hexAreaKm2(res); +} + +inline H3Error gridPathCells(H3Index start, H3Index end, H3Index* out) +{ + return h3Line(start, end, out); +} + +inline int isValidDirectedEdge(H3Index edge) +{ + return h3UnidirectionalEdgeIsValid(edge); +} + +inline H3Error originToDirectedEdges(H3Index origin, H3Index* edges) +{ + // TODO(dakovalkov): Find out how to implement it via 3.x version. + throw "Not implemented"; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getDirectedEdgeDestination(H3Index edge, H3Index *out); +inline H3Index getDirectedEdgeDestination(H3Index edge) +{ + return getDestinationH3IndexFromUnidirectionalEdge(edge); +} + +inline int res0CellCount() +{ + return res0IndexCount(); +} + +inline H3Error getRes0Cells(H3Index *out) +{ + getRes0Indexes(out); + // NOTE(dakovalkov): There is no way to get the error in 3.x. + return E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getNumCells(int res, int64_t *out); +inline int64_t getNumCells(int res) +{ + return numHexagons(res); +} + +inline H3Error directedEdgeToBoundary(H3Index edge, CellBoundary* gb) +{ + getH3UnidirectionalEdgeBoundary(edge, gb); + // NOTE(dakovalkov): There is no way to get the error in 3.x. + return E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error cellToCenterChild(H3Index cell, int childRes, H3Index *child); +inline H3Index cellToCenterChild(H3Index cell, int childRes) +{ + return h3ToCenterChild(cell, childRes); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// double greatCircleDistanceKm(const LatLng *a, const LatLng *b) +inline double distanceKm(const LatLng *a, const LatLng *b) +{ + return pointDistKm(a, b); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// double greatCircleDistanceM(const LatLng *a, const LatLng *b) +inline double distanceM(const LatLng *a, const LatLng *b) +{ + return pointDistM(a, b); +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// double greatCircleDistanceRads(const LatLng *a, const LatLng *b) +inline double distanceRads(const LatLng *a, const LatLng *b) +{ + return pointDistRads(a, b); +} + +inline H3Error directedEdgeToCells(H3Index edge, H3Index* originDestination) +{ + getH3IndexesFromUnidirectionalEdge(edge, originDestination); + return *originDestination == H3_NULL ? E_FAILED : E_SUCCESS; +} + +// NOTE(dakovalkov): The real function signature in H3 4.x is +// H3Error getDirectedEdgeOrigin(H3Index edge, H3Index *out); +inline H3Index getDirectedEdgeOrigin(H3Index edge) +{ + return getOriginH3IndexFromUnidirectionalEdge(edge); +} + +inline int pentagonCount() +{ + return pentagonIndexCount(); +} + +inline H3Error getPentagons(int res, H3Index *out) +{ + getPentagonIndexes(res, out); + // NOTE(dakovalkov): There is no way to get the error in 3.x. + return E_SUCCESS; +} + +inline H3Error gridRingUnsafe(H3Index origin, int k, H3Index* out) +{ + return hexRing(origin, k, out); +} diff --git a/library/cpp/clickhouse_deps/h3_compat/ya.make b/library/cpp/clickhouse_deps/h3_compat/ya.make new file mode 100644 index 0000000000..3a63f92143 --- /dev/null +++ b/library/cpp/clickhouse_deps/h3_compat/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +PEERDIR( + contrib/libs/h3 +) + +END() diff --git a/library/cpp/clickhouse_deps/incbin_stub/incbin.h b/library/cpp/clickhouse_deps/incbin_stub/incbin.h new file mode 100644 index 0000000000..f5fc17ea14 --- /dev/null +++ b/library/cpp/clickhouse_deps/incbin_stub/incbin.h @@ -0,0 +1,5 @@ +#pragma once + +#define INCBIN(name, file) \ +const unsigned char * g ## name ## Data = nullptr; \ +unsigned int g ## name ## Size = 0; diff --git a/library/cpp/clickhouse_deps/incbin_stub/ya.make b/library/cpp/clickhouse_deps/incbin_stub/ya.make new file mode 100644 index 0000000000..9865d255c8 --- /dev/null +++ b/library/cpp/clickhouse_deps/incbin_stub/ya.make @@ -0,0 +1,3 @@ +LIBRARY() + +END() diff --git a/library/cpp/clickhouse_deps/re2_st_stub/README.md b/library/cpp/clickhouse_deps/re2_st_stub/README.md new file mode 100644 index 0000000000..fc8660564d --- /dev/null +++ b/library/cpp/clickhouse_deps/re2_st_stub/README.md @@ -0,0 +1,8 @@ +re2_st is a modification of re2 library, which is generated during ClickHouse build: + +https://github.com/ClickHouse/ClickHouse/blob/master/contrib/re2-cmake/CMakeLists.txt#L52 +https://github.com/ClickHouse/ClickHouse/blob/master/contrib/re2-cmake/re2_transform.cmake + +This library mimics re2_st interface via original re2 library, so we can build ClickHouse properly in Arcadia. + +This library should not be used outside of ClickHouse build. diff --git a/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h b/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h new file mode 100644 index 0000000000..48c2b6915b --- /dev/null +++ b/library/cpp/clickhouse_deps/re2_st_stub/include/re2_st/re2.h @@ -0,0 +1,3 @@ +#include <contrib/libs/re2/re2/re2.h> + +namespace re2_st = re2; diff --git a/library/cpp/clickhouse_deps/re2_st_stub/ya.make b/library/cpp/clickhouse_deps/re2_st_stub/ya.make new file mode 100644 index 0000000000..6cfb279469 --- /dev/null +++ b/library/cpp/clickhouse_deps/re2_st_stub/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +PEERDIR( + contrib/libs/re2 +) + +ADDINCL( + GLOBAL library/cpp/clickhouse_deps/re2_st_stub/include +) + +END() diff --git a/library/cpp/consistent_hashing/consistent_hashing.cpp b/library/cpp/consistent_hashing/consistent_hashing.cpp new file mode 100644 index 0000000000..77ef3898ff --- /dev/null +++ b/library/cpp/consistent_hashing/consistent_hashing.cpp @@ -0,0 +1,119 @@ +#include "consistent_hashing.h" + +#include <library/cpp/pop_count/popcount.h> + +#include <util/generic/bitops.h> + +/* + * (all numbers are written in big-endian manner: the least significant digit on the right) + * (only bit representations are used - no hex or octal, leading zeroes are ommited) + * + * Consistent hashing scheme: + * + * (sizeof(TValue) * 8, y] (y, 0] + * a = * ablock + * b = * cblock + * + * (sizeof(TValue) * 8, k] (k, 0] + * c = * cblock + * + * d = * + * + * k - is determined by 2^(k-1) < n <= 2^k inequality + * z - is number of ones in cblock + * y - number of digits after first one in cblock + * + * The cblock determines logic of using a- and b- blocks: + * + * bits of cblock | result of a function + * 0 : 0 + * 1 : 1 (optimization, the next case includes this one) + * 1?..? : 1ablock (z is even) or 1bblock (z is odd) if possible (<n) + * + * If last case is not possible (>=n), than smooth moving from n=2^(k-1) to n=2^k is applied. + * Using "*" bits of a-,b-,c-,d- blocks ui64 value is combined, modulo of which determines + * if the value should be greather than 2^(k-1) or ConsistentHashing(x, 2^(k-1)) should be used. + * The last case is optimized according to previous checks. + */ + +namespace { + ui64 PowerOf2(size_t k) { + return (ui64)0x1 << k; + } + + template <class TValue> + TValue SelectAOrBBlock(TValue a, TValue b, TValue cBlock) { + size_t z = PopCount<unsigned long long>(cBlock); + bool useABlock = z % 2 == 0; + return useABlock ? a : b; + } + + // Gets the exact result for n = k2 = 2 ^ k + template <class TValue> + size_t ConsistentHashingForPowersOf2(TValue a, TValue b, TValue c, ui64 k2) { + TValue cBlock = c & (k2 - 1); // (k, 0] bits of c + // Zero and one cases + if (cBlock < 2) { + // First two cases of result function table: 0 if cblock is 0, 1 if cblock is 1. + return cBlock; + } + size_t y = GetValueBitCount<unsigned long long>(cBlock) - 1; // cblock = 0..01?..? (y = number of digits after 1), y > 0 + ui64 y2 = PowerOf2(y); // y2 = 2^y + TValue abBlock = SelectAOrBBlock(a, b, cBlock) & (y2 - 1); + return y2 + abBlock; + } + + template <class TValue> + ui64 GetAsteriskBits(TValue a, TValue b, TValue c, TValue d, size_t k) { + size_t shift = sizeof(TValue) * 8 - k; + ui64 res = (d << shift) | (c >> k); + ++shift; + res <<= shift; + res |= b >> (k - 1); + res <<= shift; + res |= a >> (k - 1); + + return res; + } + + template <class TValue> + size_t ConsistentHashingImpl(TValue a, TValue b, TValue c, TValue d, size_t n) { + Y_ABORT_UNLESS(n > 0, "Can't map consistently to a zero values."); + // Uninteresting case + if (n == 1) { + return 0; + } + size_t k = GetValueBitCount(n - 1); // 2^(k-1) < n <= 2^k, k >= 1 + ui64 k2 = PowerOf2(k); // k2 = 2^k + size_t largeValue; + { + // Bit determined variant. Large scheme. + largeValue = ConsistentHashingForPowersOf2(a, b, c, k2); + if (largeValue < n) { + return largeValue; + } + } + // Since largeValue is not assigned yet + // Smooth moving from one bit scheme to another + ui64 k21 = PowerOf2(k - 1); + { + size_t s = GetAsteriskBits(a, b, c, d, k) % (largeValue * (largeValue + 1)); + size_t largeValue2 = s / k2 + k21; + if (largeValue2 < n) { + return largeValue2; + } + } + // Bit determined variant. Short scheme. + return ConsistentHashingForPowersOf2(a, b, c, k21); // Do not apply checks. It is always less than k21 = 2^(k-1) + } + +} + +size_t ConsistentHashing(ui64 x, size_t n) { + ui32 lo = Lo32(x); + ui32 hi = Hi32(x); + return ConsistentHashingImpl<ui16>(Lo16(lo), Hi16(lo), Lo16(hi), Hi16(hi), n); +} +size_t ConsistentHashing(ui64 lo, ui64 hi, size_t n) { + return ConsistentHashingImpl<ui32>(Lo32(lo), Hi32(lo), Lo32(hi), Hi32(hi), n); +} diff --git a/library/cpp/consistent_hashing/consistent_hashing.h b/library/cpp/consistent_hashing/consistent_hashing.h new file mode 100644 index 0000000000..8e4d299150 --- /dev/null +++ b/library/cpp/consistent_hashing/consistent_hashing.h @@ -0,0 +1,16 @@ +#pragma once + +#include <util/system/defaults.h> + +/* + * Maps random ui64 x (in fact hash of some string) to n baskets/shards. + * Output value is id of a basket. 0 <= ConsistentHashing(x, n) < n. + * Probability of all baskets must be equal. Also, it should be consistent + * in terms, that with different n_1 < n_2 probability of + * ConsistentHashing(x, n_1) != ConsistentHashing(x, n_2) must be equal to + * (n_2 - n_1) / n_2 - the least possible with previous conditions. + * It requires O(1) memory and cpu to calculate. So, it is faster than classic + * consistent hashing algos with points on circle. + */ +size_t ConsistentHashing(ui64 x, size_t n); // Works good for n < 65536 +size_t ConsistentHashing(ui64 lo, ui64 hi, size_t n); // Works good for n < 4294967296 diff --git a/library/cpp/consistent_hashing/ya.make b/library/cpp/consistent_hashing/ya.make new file mode 100644 index 0000000000..b7e9e25426 --- /dev/null +++ b/library/cpp/consistent_hashing/ya.make @@ -0,0 +1,11 @@ +LIBRARY(consistent-hashing) + +PEERDIR( + library/cpp/pop_count +) + +SRCS( + consistent_hashing.cpp +) + +END() |