summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/digest/digest_udf.cpp
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-10-06 13:26:25 +0300
committervvvv <[email protected]>2025-10-06 14:06:25 +0300
commiteca8ce9cb1613d5c983185c4e43c20651a9638aa (patch)
tree61ee5ae779948e61af9a7691d19eaa2c09869121 /yql/essentials/udfs/common/digest/digest_udf.cpp
parent4adf7eecae16a9b228b28cc5f64c27ef69ad5ec2 (diff)
YQL-20086 udfs
init commit_hash:f9684778bf1ea956965f2360b80b91edb7d4ffbe
Diffstat (limited to 'yql/essentials/udfs/common/digest/digest_udf.cpp')
-rw-r--r--yql/essentials/udfs/common/digest/digest_udf.cpp799
1 files changed, 404 insertions, 395 deletions
diff --git a/yql/essentials/udfs/common/digest/digest_udf.cpp b/yql/essentials/udfs/common/digest/digest_udf.cpp
index 834d38aeaf6..63aa748e56c 100644
--- a/yql/essentials/udfs/common/digest/digest_udf.cpp
+++ b/yql/essentials/udfs/common/digest/digest_udf.cpp
@@ -27,456 +27,465 @@ using namespace NKikimr;
using namespace NUdf;
namespace {
- enum EDigestType {
- CRC32C, CRC64, FNV32, FNV64, MURMUR, MURMUR32, MURMUR2A, MURMUR2A32, CITY
- };
- const char* DigestNames[] = {
- "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash"
- };
-
- template<typename TResult>
- using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init);
-
- template<EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator>
- class TDigestFunctionUdf: public TBoxedValue {
- public:
- TDigestFunctionUdf(TSourcePosition pos) : Pos_(pos) {}
-
- static TStringRef Name() {
- static TString name = DigestNames[DigestType];
- return TStringRef(name);
- }
-
- static bool DeclareSignature(
- const TStringRef& name,
- TType*,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly)
- {
- if (Name() != name) {
- return false;
- }
-
- auto args = builder.Args();
- args->Add(builder.SimpleType<char *>()).Flags(ICallablePayload::TArgumentFlags::AutoMap);
- args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init");
- args->Done();
- builder.OptionalArgs(1);
- builder.Returns(builder.SimpleType<TResult>());
- builder.IsStrict();
-
- if (!typesOnly) {
- builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder)));
- }
-
- return true;
- }
-
- private:
- TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
- TMaybe<TResult> init = Nothing();
- if (auto val = args[1]) {
- init = val.Get<TResult>();
- }
- return TUnboxedValuePod(Generator(args[0].AsStringRef(), init));
- } catch (const std ::exception&) {
- TStringBuilder sb;
- sb << Pos_ << " ";
- sb << CurrentExceptionMessage();
- sb << Endl << "[" << TStringBuf(Name()) << "]";
- UdfTerminate(sb.c_str());
- }
-
- TSourcePosition Pos_;
- };
-
- SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
- const auto& inputRef = args[0].AsStringRef();
- ui32 hash = Crc32c(inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(hash);
+enum EDigestType {
+ CRC32C,
+ CRC64,
+ FNV32,
+ FNV64,
+ MURMUR,
+ MURMUR32,
+ MURMUR2A,
+ MURMUR2A32,
+ CITY
+};
+const char* DigestNames[] = {
+ "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash"};
+
+template <typename TResult>
+using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init);
+
+template <EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator>
+class TDigestFunctionUdf: public TBoxedValue {
+public:
+ TDigestFunctionUdf(TSourcePosition pos)
+ : Pos_(pos)
+ {
}
- using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) {
- return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT));
- }>;
+ static TStringRef Name() {
+ static TString name = DigestNames[DigestType];
+ return TStringRef(name);
+ }
- using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) {
- if (init) {
- return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init);
- } else {
- return FnvHash<ui32>(inputRef.Data(), inputRef.Size());
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
}
- }>;
- using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) {
- if (init) {
- return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init);
- } else {
- return FnvHash<ui64>(inputRef.Data(), inputRef.Size());
- }
- }>;
+ auto args = builder.Args();
+ args->Add(builder.SimpleType<char*>()).Flags(ICallablePayload::TArgumentFlags::AutoMap);
+ args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init");
+ args->Done();
+ builder.OptionalArgs(1);
+ builder.Returns(builder.SimpleType<TResult>());
+ builder.IsStrict();
- using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) {
- if (init) {
- return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init);
- } else {
- return MurmurHash<ui64>(inputRef.Data(), inputRef.Size());
+ if (!typesOnly) {
+ builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder)));
}
- }>;
- using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [] (auto& inputRef, auto init) {
- if (init) {
- return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init);
- } else {
- return MurmurHash<ui32>(inputRef.Data(), inputRef.Size());
- }
- }>;
+ return true;
+ }
- using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [] (auto& inputRef, auto init) {
- if (init) {
- return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value();
- } else {
- return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value();
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
+ TMaybe<TResult> init = Nothing();
+ if (auto val = args[1]) {
+ init = val.Get<TResult>();
}
- }>;
+ return TUnboxedValuePod(Generator(args[0].AsStringRef(), init));
+ } catch (const std ::exception&) {
+ TStringBuilder sb;
+ sb << Pos_ << " ";
+ sb << CurrentExceptionMessage();
+ sb << Endl << "[" << TStringBuf(Name()) << "]";
+ UdfTerminate(sb.c_str());
+ }
- using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [] (auto& inputRef, auto init) {
- if (init) {
- return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value();
- } else {
- return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value();
- }
- }>;
+ TSourcePosition Pos_;
+};
- using TCityHash = TDigestFunctionUdf<CITY, ui64, [] (auto& inputRef, auto init) {
- if (init) {
- return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init);
- } else {
- return CityHash64(inputRef.Data(), inputRef.Size());
- }
- }>;
+SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = Crc32c(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+}
- class TCityHash128: public TBoxedValue {
- public:
- static TStringRef Name() {
- static auto name = TStringRef::Of("CityHash128");
- return name;
- }
+using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) {
+ return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT));
+}>;
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
- builder.Args(1)->Add<TAutoMap<char*>>();
- builder.Returns(type);
- if (!typesOnly) {
- builder.Implementation(new TCityHash128);
- }
- builder.IsStrict();
- return true;
- } else {
- return false;
- }
- }
+using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) {
+ if (init) {
+ return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init);
+ } else {
+ return FnvHash<ui32>(inputRef.Data(), inputRef.Size());
+ }
+}>;
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- TUnboxedValue* items = nullptr;
- auto val = valueBuilder->NewArray(2U, items);
- const auto& inputRef = args[0].AsStringRef();
- uint128 hash = CityHash128(inputRef.Data(), inputRef.Size());
- items[0] = TUnboxedValuePod(hash.first);
- items[1] = TUnboxedValuePod(hash.second);
- return val;
- }
- };
+using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) {
+ if (init) {
+ return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init);
+ } else {
+ return FnvHash<ui64>(inputRef.Data(), inputRef.Size());
+ }
+}>;
- SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) {
- Y_UNUSED(valueBuilder);
- ui64 input = args[0].Get<ui64>();
- ui64 hash = (ui64)NumericHash(input);
- return TUnboxedValuePod(hash);
+using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) {
+ if (init) {
+ return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init);
+ } else {
+ return MurmurHash<ui64>(inputRef.Data(), inputRef.Size());
}
+}>;
- SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) {
- const auto& inputRef = args[0].AsStringRef();
- MD5 md5;
- const TString& hash = md5.Calc(inputRef);
- return valueBuilder->NewString(hash);
+using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [](auto& inputRef, auto init) {
+ if (init) {
+ return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init);
+ } else {
+ return MurmurHash<ui32>(inputRef.Data(), inputRef.Size());
}
+}>;
- SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) {
- const auto& inputRef = args[0].AsStringRef();
- MD5 md5;
- const TString& hash = md5.CalcRaw(inputRef);
- return valueBuilder->NewString(hash);
+using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [](auto& inputRef, auto init) {
+ if (init) {
+ return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value();
+ } else {
+ return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value();
}
+}>;
- SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
- return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef()));
+using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [](auto& inputRef, auto init) {
+ if (init) {
+ return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value();
+ } else {
+ return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value();
}
+}>;
- SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) {
- const static ui32 outSize = 32;
- const static NArgonish::TArgon2Factory afactory;
- const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create(
- NArgonish::EArgon2Type::Argon2d, 1, 32, 1);
-
- const TStringRef inputRef = args[0].AsStringRef();
- const TStringRef saltRef = args[1].AsStringRef();
- ui8 out[outSize];
- argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(),
- reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(),
- out, outSize);
- return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
+using TCityHash = TDigestFunctionUdf<CITY, ui64, [](auto& inputRef, auto init) {
+ if (init) {
+ return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init);
+ } else {
+ return CityHash64(inputRef.Data(), inputRef.Size());
}
+}>;
- SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) {
- const static ui32 outSize = 32;
- const static NArgonish::TBlake2BFactory bfactory;
- const TStringRef inputRef = args[0].AsStringRef();
-
- THolder<NArgonish::IBlake2Base> blake2b;
- if (args[1]) {
- const TStringRef keyRef = args[1].AsStringRef();
- if (keyRef.Size() == 0) {
- blake2b = bfactory.Create(outSize);
- } else {
- blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size());
+class TCityHash128: public TBoxedValue {
+public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("CityHash128");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TCityHash128);
}
+ builder.IsStrict();
+ return true;
} else {
- blake2b = bfactory.Create(outSize);
+ return false;
}
-
- ui8 out[outSize];
- blake2b->Update(inputRef.Data(), inputRef.Size());
- blake2b->Final(out, outSize);
- return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
}
- SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) {
- using namespace highwayhash;
- Y_UNUSED(valueBuilder);
- const TStringRef inputRef = args[2].AsStringRef();
- const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()};
- ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(hash);
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
+ const auto& inputRef = args[0].AsStringRef();
+ uint128 hash = CityHash128(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(hash.first);
+ items[1] = TUnboxedValuePod(hash.second);
+ return val;
}
+};
- SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) {
- using namespace highwayhash;
- Y_UNUSED(valueBuilder);
- const TStringRef inputRef = args[4].AsStringRef();
- const uint64_t key[4] = {
- args[0].Get<ui64>(),
- args[1].Get<ui64>(),
- args[2].Get<ui64>(),
- args[3].Get<ui64>()};
- ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(hash);
- }
+SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 input = args[0].Get<ui64>();
+ ui64 hash = (ui64)NumericHash(input);
+ return TUnboxedValuePod(hash);
+}
- SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) {
- Y_UNUSED(valueBuilder);
- ui64 input = args[0].Get<ui64>();
- ui64 hash = util::Fingerprint(input);
- return TUnboxedValuePod(hash);
- }
+SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ MD5 md5;
+ const TString& hash = md5.Calc(inputRef);
+ return valueBuilder->NewString(hash);
+}
- SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) {
- Y_UNUSED(valueBuilder);
- ui64 low = args[0].Get<ui64>();
- ui64 high = args[1].Get<ui64>();
- ui64 hash = util::Fingerprint(util::Uint128(low, high));
- return TUnboxedValuePod(hash);
- }
+SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ MD5 md5;
+ const TString& hash = md5.CalcRaw(inputRef);
+ return valueBuilder->NewString(hash);
+}
- SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
- const auto& inputRef = args[0].AsStringRef();
- auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(ui32(hash));
- }
+SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef()));
+}
- SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
- const auto& inputRef = args[0].AsStringRef();
- auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(ui64(hash));
- }
+SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) {
+ const static ui32 outSize = 32;
+ const static NArgonish::TArgon2Factory afactory;
+ const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create(
+ NArgonish::EArgon2Type::Argon2d, 1, 32, 1);
+
+ const TStringRef inputRef = args[0].AsStringRef();
+ const TStringRef saltRef = args[1].AsStringRef();
+ ui8 out[outSize];
+ argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(),
+ reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(),
+ out, outSize);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
+}
- class TFarmHashFingerprint128: public TBoxedValue {
- public:
- static TStringRef Name() {
- static auto name = TStringRef::Of("FarmHashFingerprint128");
- return name;
- }
+SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) {
+ const static ui32 outSize = 32;
+ const static NArgonish::TBlake2BFactory bfactory;
+ const TStringRef inputRef = args[0].AsStringRef();
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
- builder.Args(1)->Add<TAutoMap<char*>>();
- builder.Returns(type);
- if (!typesOnly) {
- builder.Implementation(new TFarmHashFingerprint128);
- }
- builder.IsStrict();
- return true;
- } else {
- return false;
- }
+ THolder<NArgonish::IBlake2Base> blake2b;
+ if (args[1]) {
+ const TStringRef keyRef = args[1].AsStringRef();
+ if (keyRef.Size() == 0) {
+ blake2b = bfactory.Create(outSize);
+ } else {
+ blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size());
}
+ } else {
+ blake2b = bfactory.Create(outSize);
+ }
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- TUnboxedValue* items = nullptr;
- auto val = valueBuilder->NewArray(2U, items);
- const auto& inputRef = args[0].AsStringRef();
- auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size());
- items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first));
- items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second));
- return val;
- }
- };
+ ui8 out[outSize];
+ blake2b->Update(inputRef.Data(), inputRef.Size());
+ blake2b->Final(out, outSize);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
+}
- SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
- const auto& inputRef = args[0].AsStringRef();
- ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(hash);
- }
+SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) {
+ using namespace highwayhash;
+ Y_UNUSED(valueBuilder);
+ const TStringRef inputRef = args[2].AsStringRef();
+ const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()};
+ ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+}
- SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) {
- const auto& inputRef = args[0].AsStringRef();
- SHA_CTX sha;
- SHA1_Init(&sha);
- SHA1_Update(&sha, inputRef.Data(), inputRef.Size());
- unsigned char hash[SHA_DIGEST_LENGTH];
- SHA1_Final(hash, &sha);
- return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
- }
+SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) {
+ using namespace highwayhash;
+ Y_UNUSED(valueBuilder);
+ const TStringRef inputRef = args[4].AsStringRef();
+ const uint64_t key[4] = {
+ args[0].Get<ui64>(),
+ args[1].Get<ui64>(),
+ args[2].Get<ui64>(),
+ args[3].Get<ui64>()};
+ ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+}
- SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) {
- const auto& inputRef = args[0].AsStringRef();
- SHA256_CTX sha;
- SHA256_Init(&sha);
- SHA256_Update(&sha, inputRef.Data(), inputRef.Size());
- unsigned char hash[SHA256_DIGEST_LENGTH];
- SHA256_Final(hash, &sha);
- return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
- }
+SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 input = args[0].Get<ui64>();
+ ui64 hash = util::Fingerprint(input);
+ return TUnboxedValuePod(hash);
+}
- SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) {
- const auto& inputRef = args[0].AsStringRef();
- SHA512_CTX sha;
- SHA512_Init(&sha);
- SHA512_Update(&sha, inputRef.Data(), inputRef.Size());
- unsigned char hash[SHA512_DIGEST_LENGTH];
- SHA512_Final(hash, &sha);
- return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 low = args[0].Get<ui64>();
+ ui64 high = args[1].Get<ui64>();
+ ui64 hash = util::Fingerprint(util::Uint128(low, high));
+ return TUnboxedValuePod(hash);
+}
+
+SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(ui32(hash));
+}
+
+SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(ui64(hash));
+}
+
+class TFarmHashFingerprint128: public TBoxedValue {
+public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("FarmHashFingerprint128");
+ return name;
}
- SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) {
- Y_UNUSED(valueBuilder);
- ui64 x = args[0].Get<ui64>();
- x ^= 0x4CF2D2BAAE6DA887ULL;
- x ^= x >> 33;
- x *= 0xff51afd7ed558ccdULL;
- x ^= x >> 33;
- x *= 0xc4ceb9fe1a85ec53ULL;
- x ^= x >> 33;
- return TUnboxedValuePod(x);
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TFarmHashFingerprint128);
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
}
- SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) {
- Y_UNUSED(valueBuilder);
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
const auto& inputRef = args[0].AsStringRef();
- const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size());
- return TUnboxedValuePod(hash);
+ auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first));
+ items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second));
+ return val;
}
+};
- class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming)
- public:
- static TStringRef Name() {
- static auto name = TStringRef::Of("XXH3_128");
- return name;
- }
+SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+}
- static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
- if (Name() == name) {
- const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
- builder.Args(1)->Add<TAutoMap<char*>>();
- builder.Returns(type);
- if (!typesOnly) {
- builder.Implementation(new TXXH3_128);
- }
- builder.IsStrict();
- return true;
- } else {
- return false;
- }
- }
+SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ SHA_CTX sha;
+ SHA1_Init(&sha);
+ SHA1_Update(&sha, inputRef.Data(), inputRef.Size());
+ unsigned char hash[SHA_DIGEST_LENGTH];
+ SHA1_Final(hash, &sha);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+}
- private:
- TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
- TUnboxedValue* items = nullptr;
- auto val = valueBuilder->NewArray(2U, items);
- const auto& inputRef = args[0].AsStringRef();
- const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size());
- items[0] = TUnboxedValuePod(ui64(hash.low64));
- items[1] = TUnboxedValuePod(ui64(hash.high64));
- return val;
- }
- };
-
- SIMPLE_MODULE(TDigestModule,
- TCrc32c,
- TCrc64,
- TFnv32,
- TFnv64,
- TMurMurHash,
- TMurMurHash32,
- TMurMurHash2A,
- TMurMurHash2A32,
- TCityHash,
- TCityHash128,
- TNumericHash,
- TMd5Hex,
- TMd5Raw,
- TMd5HalfMix,
- TArgon2,
- TBlake2B,
- TSipHash,
- THighwayHash,
- TFarmHashFingerprint,
- TFarmHashFingerprint2,
- TFarmHashFingerprint32,
- TFarmHashFingerprint64,
- TFarmHashFingerprint128,
- TSuperFastHash,
- TSha1,
- TSha256,
- TSha512,
- TIntHash64,
- TXXH3,
- TXXH3_128
- )
+SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ SHA256_CTX sha;
+ SHA256_Init(&sha);
+ SHA256_Update(&sha, inputRef.Data(), inputRef.Size());
+ unsigned char hash[SHA256_DIGEST_LENGTH];
+ SHA256_Final(hash, &sha);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+}
+
+SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) {
+ const auto& inputRef = args[0].AsStringRef();
+ SHA512_CTX sha;
+ SHA512_Init(&sha);
+ SHA512_Update(&sha, inputRef.Data(), inputRef.Size());
+ unsigned char hash[SHA512_DIGEST_LENGTH];
+ SHA512_Final(hash, &sha);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+}
+
+SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 x = args[0].Get<ui64>();
+ x ^= 0x4CF2D2BAAE6DA887ULL;
+ x ^= x >> 33;
+ x *= 0xff51afd7ed558ccdULL;
+ x ^= x >> 33;
+ x *= 0xc4ceb9fe1a85ec53ULL;
+ x ^= x >> 33;
+ return TUnboxedValuePod(x);
+}
+SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
}
+class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming)
+public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("XXH3_128");
+ return name;
+ }
+
+ static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() == name) {
+ const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TXXH3_128);
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
+ const auto& inputRef = args[0].AsStringRef();
+ const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(ui64(hash.low64));
+ items[1] = TUnboxedValuePod(ui64(hash.high64));
+ return val;
+ }
+};
+
+SIMPLE_MODULE(TDigestModule,
+ TCrc32c,
+ TCrc64,
+ TFnv32,
+ TFnv64,
+ TMurMurHash,
+ TMurMurHash32,
+ TMurMurHash2A,
+ TMurMurHash2A32,
+ TCityHash,
+ TCityHash128,
+ TNumericHash,
+ TMd5Hex,
+ TMd5Raw,
+ TMd5HalfMix,
+ TArgon2,
+ TBlake2B,
+ TSipHash,
+ THighwayHash,
+ TFarmHashFingerprint,
+ TFarmHashFingerprint2,
+ TFarmHashFingerprint32,
+ TFarmHashFingerprint64,
+ TFarmHashFingerprint128,
+ TSuperFastHash,
+ TSha1,
+ TSha256,
+ TSha512,
+ TIntHash64,
+ TXXH3,
+ TXXH3_128)
+
+} // namespace
+
REGISTER_MODULES(TDigestModule)