diff options
author | imunkin <imunkin@yandex-team.com> | 2025-02-25 00:04:30 +0300 |
---|---|---|
committer | imunkin <imunkin@yandex-team.com> | 2025-02-25 00:43:55 +0300 |
commit | afb5748729cbda07af01d989a854831978d1e0f8 (patch) | |
tree | 91da78f90ad9ffe5142f52caefe06ca3bebd19ea | |
parent | 2d6dbd67adbbe1bbda8a45355b186a2c490d3508 (diff) | |
download | ydb-afb5748729cbda07af01d989a854831978d1e0f8.tar.gz |
YQL-18303: Introduce Parse64 function
commit_hash:f2068081a3ddfb16f531239f3e0dcc923084d3b0
10 files changed, 355 insertions, 46 deletions
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp index 2cd2aeaee0..e5fe6f9410 100644 --- a/yql/essentials/sql/v1/builtin.cpp +++ b/yql/essentials/sql/v1/builtin.cpp @@ -3469,8 +3469,6 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec return BuildUdf(ctx, pos, moduleName, name, newArgs); } - } else if (ns == "datetime2" && (name == "Parse")) { - return BuildUdf(ctx, pos, nameSpace, name, args); } else if (ns == "pg" || ns == "pgagg" || ns == "pgproc") { bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal); bool isNormalFunc = NYql::NPg::HasProc(name, NYql::NPg::EProcKind::Function); diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index 4b2f711ac2..03af9ee6ce 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -630,9 +630,9 @@ ], "test.test[expr-tzdate_result-default.txt-Debug]": [ { - "checksum": "e686ef209841ef2196efb49ff5948533", - "size": 1672, - "uri": "https://{canondata_backend}/1942525/ede9d81525f3cde3c09402fe9435fdbba85f47bc/resource.tar.gz#test.test_expr-tzdate_result-default.txt-Debug_/opt.yql" + "checksum": "c798e2dcfc53e7d8738597618f525530", + "size": 1738, + "uri": "https://{canondata_backend}/1937424/d5801c5f9b3fed693a453918eecd6867c4180227/resource.tar.gz#test.test_expr-tzdate_result-default.txt-Debug_/opt.yql" } ], "test.test[expr-tzdate_result-default.txt-Results]": [ diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 1bdda7b12b..6f7f1cb82c 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -3312,9 +3312,9 @@ ], "test_sql2yql.test[expr-tzdate_result]": [ { - "checksum": "d9e70a97bb5dcd9ea59c1689b0c83669", - "size": 4337, - "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_expr-tzdate_result_/sql.yql" + "checksum": "25714e036675294420faf7d4747a19b5", + "size": 4385, + "uri": "https://{canondata_backend}/1937424/0282bf99f985d9dd5cf648994fc10170e7f998c8/resource.tar.gz#test_sql2yql.test_expr-tzdate_result_/sql.yql" } ], "test_sql2yql.test[expr-udaf_with_list_zip]": [ diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index 61ee555e56..5044d0d9b4 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -47,6 +47,8 @@ extern const char EndOfUDF[] = "EndOf"; extern const char ShiftYearsUDF[] = "ShiftYears"; extern const char ShiftQuartersUDF[] = "ShiftQuarters"; extern const char ShiftMonthsUDF[] = "ShiftMonths"; +extern const char ParseUDF[] = "Parse"; +extern const char Parse64UDF[] = "Parse64"; extern const char TMResourceName[] = "DateTime2.TM"; extern const char TM64ResourceName[] = "DateTime2.TM64"; @@ -2762,33 +2764,42 @@ private: const TSourcePosition Pos_; }; - template<size_t Digits> - struct ParseExaclyNDigits; + template<size_t Digits, bool Variable = false> + struct ParseNDigits; - template<> - struct ParseExaclyNDigits<0U> { + template<bool Variable> + struct ParseNDigits<0U, Variable> { template <typename T> static constexpr bool Do(std::string_view::const_iterator&, T&) { return true; } }; - template<size_t Digits> - struct ParseExaclyNDigits { + template<size_t Digits, bool Variable> + struct ParseNDigits { template <typename T> static constexpr bool Do(std::string_view::const_iterator& it, T& out) { const auto d = *it; if (!std::isdigit(d)) { + // XXX: If the current char is not a digit, the + // parsing succeeds iff there are no more digits + // to be parsed (see the class specialization + // above) or there are given less than N digits + // to be parsed. + if constexpr (Variable) { + return true; + } return false; } out *= 10U; out += d - '0'; - return ParseExaclyNDigits<Digits - 1U>::Do(++it, out); + return ParseNDigits<Digits - 1U, Variable>::Do(++it, out); } }; // Parse + template<const char* TUdfName, const char* TResourceName> class TParse : public TBoxedValue { public: class TFactory : public TBoxedValue { @@ -2808,7 +2819,7 @@ private: }; static const TStringRef& Name() { - static auto name = TStringRef::Of("Parse"); + static auto name = TStringRef(TUdfName, std::strlen(TUdfName)); return name; } @@ -2822,15 +2833,10 @@ private: return false; } - auto resourceType = builder.Resource(TMResourceName); - auto optionalResourceType = builder.Optional()->Item(resourceType).Build(); - - builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Add(builder.Optional()->Item<ui16>()) - .Done() - .OptionalArgs(1); - builder.RunConfig<char*>().Returns(optionalResourceType); - + builder.OptionalArgs(1).Args()->Add<char*>() + .template Add<TOptional<ui16>>(); + builder.Returns( + builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>()); if (!typesOnly) { builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); } @@ -2866,7 +2872,7 @@ private: const std::string_view buffer = args[0].AsStringRef(); TUnboxedValuePod result(0); - auto& storage = Reference(result); + auto& storage = Reference<TResourceName>(result); storage.MakeDefault(); auto& builder = valueBuilder->GetDateBuilder(); @@ -2918,13 +2924,27 @@ private: break; case 'Y': { - static constexpr size_t size = 4; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { - ui32 year = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) { - return false; + if constexpr (TResourceName == TMResourceName) { + static constexpr size_t size = 4; + ui32 year = 0U; + if (limit < size || !ParseNDigits<size>::Do(it, year) || !ValidateYear(year)) { + return false; + } + SetYear<TMResourceName>(result, year); + } else { + static constexpr size_t size = 6; + i64 year = 0LL; + i64 negative = 1LL; + if (*it == '-') { + negative = -1LL; + it++; + } + if (!ParseNDigits<size, true>::Do(it, year) || !ValidateYear(negative * year)) { + return false; + } + SetYear<TM64ResourceName>(result, negative * year); } - SetYear(result, year); return true; }); break; @@ -2933,10 +2953,10 @@ private: static constexpr size_t size = 2; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { ui32 month = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) { + if (limit < size || !ParseNDigits<size>::Do(it, month) || !ValidateMonth(month)) { return false; } - SetMonth(result, month); + SetMonth<TResourceName>(result, month); return true; }); break; @@ -2945,10 +2965,10 @@ private: static constexpr size_t size = 2; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { ui32 day = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) { + if (limit < size || !ParseNDigits<size>::Do(it, day) || !ValidateDay(day)) { return false; } - SetDay(result, day); + SetDay<TResourceName>(result, day); return true; }); break; @@ -2957,10 +2977,10 @@ private: static constexpr size_t size = 2; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { ui32 hour = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) { + if (limit < size || !ParseNDigits<size>::Do(it, hour) || !ValidateHour(hour)) { return false; } - SetHour(result, hour); + SetHour<TResourceName>(result, hour); return true; }); break; @@ -2969,10 +2989,10 @@ private: static constexpr size_t size = 2; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { ui32 minute = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) { + if (limit < size || !ParseNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) { return false; } - SetMinute(result, minute); + SetMinute<TResourceName>(result, minute); return true; }); break; @@ -2981,10 +3001,10 @@ private: static constexpr size_t size = 2; Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { ui32 second = 0U; - if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) { + if (limit < size || !ParseNDigits<size>::Do(it, second) || !ValidateSecond(second)) { return false; } - SetSecond(result, second); + SetSecond<TResourceName>(result, second); limit -= size; if (!limit || *it != '.') { @@ -3010,7 +3030,7 @@ private: while (digits--) { usec *= 10U; } - SetMicrosecond(result, usec); + SetMicrosecond<TResourceName>(result, usec); return true; }); break; @@ -3028,7 +3048,7 @@ private: if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) { return false; } - SetTimezoneId(result, timezoneId); + SetTimezoneId<TResourceName>(result, timezoneId); return true; }); break; @@ -3047,7 +3067,7 @@ private: if (cnt < size || !ValidateMonthShortName(monthName, month)) { return false; } - SetMonth(result, month); + SetMonth<TResourceName>(result, month); return true; }); break; @@ -3067,7 +3087,7 @@ private: if (!ValidateMonthFullName(monthName, month)) { return false; } - SetMonth(result, month); + SetMonth<TResourceName>(result, month); return true; }); break; @@ -3210,7 +3230,8 @@ private: TToUnits<ToMicrosecondsUDF, ui64, 1000000>, TFormat, - TParse, + TParse<ParseUDF, TMResourceName>, + TParse<Parse64UDF, TM64ResourceName>, TParseRfc822, TParseIso8601, diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json index 3b6fd32464..d4bd5e1d4d 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json @@ -19,6 +19,11 @@ "uri": "file://test.test_Get_/results.txt" } ], + "test.test[Parse64]": [ + { + "uri": "file://test.test_Parse64_/results.txt" + } + ], "test.test[Shift]": [ { "uri": "file://test.test_Shift_/results.txt" diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt new file mode 100644 index 0000000000..93521e1f28 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt @@ -0,0 +1,174 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + # + ]; + [ + [ + "-144169-01-01T00:00:00Z" + ]; + [ + "-144169-01-01T00:00:00Z" + ]; + [ + "-144169-01-01T00:00:00Z" + ]; + [ + "-144169-01-01T00:00:00Z" + ] + ]; + [ + [ + "-1-01-01T23:59:59.990000Z" + ]; + [ + "-1-01-01T23:59:59.999999Z" + ]; + [ + "-1-01-01T00:00:00Z" + ]; + [ + "-1-01-01T00:00:00Z" + ] + ]; + [ + [ + "1-01-01T00:00:00Z" + ]; + [ + "1-01-01T00:00:00Z" + ]; + [ + "1-01-01T00:00:00Z" + ]; + [ + "1-01-01T00:00:00Z" + ] + ]; + [ + [ + "1969-12-31T23:59:59.999999Z" + ]; + [ + "1969-12-31T23:59:59.999999Z" + ]; + [ + "1969-12-31T00:00:00Z" + ]; + [ + "1969-12-31T00:00:00Z" + ] + ]; + [ + [ + "1970-01-01T00:00:00Z" + ]; + [ + "1970-01-01T00:00:00Z" + ]; + [ + "1970-01-01T00:00:00Z" + ]; + [ + "1970-01-01T00:00:00Z" + ] + ]; + [ + #; + #; + #; + # + ]; + [ + [ + "2106-01-01T00:00:00Z" + ]; + [ + "2106-01-01T00:00:00Z" + ]; + [ + "2106-01-01T00:00:00Z" + ]; + [ + "2106-01-01T00:00:00Z" + ] + ]; + [ + [ + "148107-12-31T23:59:59.999999Z" + ]; + [ + "148107-12-31T23:59:59.999999Z" + ]; + [ + "148107-12-31T00:00:00Z" + ]; + [ + "148107-12-31T00:00:00Z" + ] + ]; + [ + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg new file mode 100644 index 0000000000..c16ed518f4 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg @@ -0,0 +1 @@ +in plato.Input Parse64.in diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in new file mode 100644 index 0000000000..ee33bd3d13 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in @@ -0,0 +1,60 @@ +{ + "fdatetime1"="-144170 12 31 235959 GMT text"; + "fdatetime2"="%text% 12/31/-144170 23:59:59."; + "fdatetime3"="december/31/-144170"; + "fdatetime4"="dec/31/-144170"; +}; +{ + "fdatetime1"="-144169 01 01 000000 GMT text"; + "fdatetime2"="%text% 01/01/-144169 00:00:00."; + "fdatetime3"="january/01/-144169"; + "fdatetime4"="jan/01/-144169"; +}; +{ + "fdatetime1"="-1 01 01 235959.99 GMT text"; + "fdatetime2"="%text% 01/01/-1 23:59:59.999999"; + "fdatetime3"="January/01/-1"; + "fdatetime4"="Jan/01/-1"; +}; +{ + "fdatetime1"="1 01 01 000000 GMT text"; + "fdatetime2"="%text% 01/01/1 00:00:00.0000000"; + "fdatetime3"="JANUARY/01/1"; + "fdatetime4"="JAN/01/1"; +}; +{ + "fdatetime1"="1969 12 31 235959.999999 GMT text"; + "fdatetime2"="%text% 12/31/1969 23:59:59.999999"; + "fdatetime3"="DeCeMbEr/31/1969"; + "fdatetime4"="DeC/31/1969"; +}; +{ + "fdatetime1"="1970 01 01 000000.00 GMT text"; + "fdatetime2"="%text% 01/01/1970 00:00:00.00"; + "fdatetime3"="JaNuArY/01/1970"; + "fdatetime4"="JaN/01/1970"; +}; +{ + "fdatetime1"="2025 01 01 000000 GMT bar"; + "fdatetime2"="%text% 01/01/2025 00:00:00.1234567890abcdef1234567890abcdef"; + "fdatetime3"="jEnuary/01/2025"; + "fdatetime4"="jEn/01/2025"; +}; +{ + "fdatetime1"="2106 01 01 000000 GMT text"; + "fdatetime2"="%text% 01/01/2106 00:00:00."; + "fdatetime3"="jANuARy/01/2106"; + "fdatetime4"="jAN/01/2106"; +}; +{ + "fdatetime1"="148107 12 31 235959.999999 GMT text"; + "fdatetime2"="%text% 12/31/148107 23:59:59.999999"; + "fdatetime3"="DECEMBER/31/148107"; + "fdatetime4"="DEC/31/148107"; +}; +{ + "fdatetime1"="148108 01 01 000000.000000 GMT text"; + "fdatetime2"="%text% 01/01/148108 00:00:00.000000"; + "fdatetime3"="jaNUARy/01/148108"; + "fdatetime4"="jaN/01/148108"; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr new file mode 100644 index 0000000000..a5b735bdca --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr @@ -0,0 +1,38 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdatetime1"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime2"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime3"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime4"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql new file mode 100644 index 0000000000..cc57f1aa49 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +$parse1 = DateTime::Parse64("%Y %m %d %H%M%S %Z text"); +$parse2 = DateTime::Parse64("%%text%% %m/%d/%Y %H:%M:%S"); +$parse3 = DateTime::Parse64("%B/%d/%Y"); +$parse4 = DateTime::Parse64("%b/%d/%Y"); + +select + CAST(DateTime::MakeTimestamp64($parse1(fdatetime1)) AS String), + CAST(DateTime::MakeTimestamp64($parse2(fdatetime2)) AS String), + CAST(DateTime::MakeTimestamp64($parse3(fdatetime3)) AS String), + CAST(DateTime::MakeTimestamp64($parse4(fdatetime4)) AS String), +from Input |