aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorimunkin <imunkin@yandex-team.com>2025-02-25 00:04:30 +0300
committerimunkin <imunkin@yandex-team.com>2025-02-25 00:43:55 +0300
commitafb5748729cbda07af01d989a854831978d1e0f8 (patch)
tree91da78f90ad9ffe5142f52caefe06ca3bebd19ea
parent2d6dbd67adbbe1bbda8a45355b186a2c490d3508 (diff)
downloadydb-afb5748729cbda07af01d989a854831978d1e0f8.tar.gz
YQL-18303: Introduce Parse64 function
commit_hash:f2068081a3ddfb16f531239f3e0dcc923084d3b0
-rw-r--r--yql/essentials/sql/v1/builtin.cpp2
-rw-r--r--yql/essentials/tests/sql/minirun/part0/canondata/result.json6
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json6
-rw-r--r--yql/essentials/udfs/common/datetime2/datetime_udf.cpp97
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json5
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt174
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg1
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in60
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr38
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql12
10 files changed, 355 insertions, 46 deletions
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp
index 2cd2aeaee0..e5fe6f9410 100644
--- a/yql/essentials/sql/v1/builtin.cpp
+++ b/yql/essentials/sql/v1/builtin.cpp
@@ -3469,8 +3469,6 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec
return BuildUdf(ctx, pos, moduleName, name, newArgs);
}
- } else if (ns == "datetime2" && (name == "Parse")) {
- return BuildUdf(ctx, pos, nameSpace, name, args);
} else if (ns == "pg" || ns == "pgagg" || ns == "pgproc") {
bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal);
bool isNormalFunc = NYql::NPg::HasProc(name, NYql::NPg::EProcKind::Function);
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
index 4b2f711ac2..03af9ee6ce 100644
--- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
@@ -630,9 +630,9 @@
],
"test.test[expr-tzdate_result-default.txt-Debug]": [
{
- "checksum": "e686ef209841ef2196efb49ff5948533",
- "size": 1672,
- "uri": "https://{canondata_backend}/1942525/ede9d81525f3cde3c09402fe9435fdbba85f47bc/resource.tar.gz#test.test_expr-tzdate_result-default.txt-Debug_/opt.yql"
+ "checksum": "c798e2dcfc53e7d8738597618f525530",
+ "size": 1738,
+ "uri": "https://{canondata_backend}/1937424/d5801c5f9b3fed693a453918eecd6867c4180227/resource.tar.gz#test.test_expr-tzdate_result-default.txt-Debug_/opt.yql"
}
],
"test.test[expr-tzdate_result-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 1bdda7b12b..6f7f1cb82c 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -3312,9 +3312,9 @@
],
"test_sql2yql.test[expr-tzdate_result]": [
{
- "checksum": "d9e70a97bb5dcd9ea59c1689b0c83669",
- "size": 4337,
- "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_expr-tzdate_result_/sql.yql"
+ "checksum": "25714e036675294420faf7d4747a19b5",
+ "size": 4385,
+ "uri": "https://{canondata_backend}/1937424/0282bf99f985d9dd5cf648994fc10170e7f998c8/resource.tar.gz#test_sql2yql.test_expr-tzdate_result_/sql.yql"
}
],
"test_sql2yql.test[expr-udaf_with_list_zip]": [
diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
index 61ee555e56..5044d0d9b4 100644
--- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
+++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
@@ -47,6 +47,8 @@ extern const char EndOfUDF[] = "EndOf";
extern const char ShiftYearsUDF[] = "ShiftYears";
extern const char ShiftQuartersUDF[] = "ShiftQuarters";
extern const char ShiftMonthsUDF[] = "ShiftMonths";
+extern const char ParseUDF[] = "Parse";
+extern const char Parse64UDF[] = "Parse64";
extern const char TMResourceName[] = "DateTime2.TM";
extern const char TM64ResourceName[] = "DateTime2.TM64";
@@ -2762,33 +2764,42 @@ private:
const TSourcePosition Pos_;
};
- template<size_t Digits>
- struct ParseExaclyNDigits;
+ template<size_t Digits, bool Variable = false>
+ struct ParseNDigits;
- template<>
- struct ParseExaclyNDigits<0U> {
+ template<bool Variable>
+ struct ParseNDigits<0U, Variable> {
template <typename T>
static constexpr bool Do(std::string_view::const_iterator&, T&) {
return true;
}
};
- template<size_t Digits>
- struct ParseExaclyNDigits {
+ template<size_t Digits, bool Variable>
+ struct ParseNDigits {
template <typename T>
static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
const auto d = *it;
if (!std::isdigit(d)) {
+ // XXX: If the current char is not a digit, the
+ // parsing succeeds iff there are no more digits
+ // to be parsed (see the class specialization
+ // above) or there are given less than N digits
+ // to be parsed.
+ if constexpr (Variable) {
+ return true;
+ }
return false;
}
out *= 10U;
out += d - '0';
- return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
+ return ParseNDigits<Digits - 1U, Variable>::Do(++it, out);
}
};
// Parse
+ template<const char* TUdfName, const char* TResourceName>
class TParse : public TBoxedValue {
public:
class TFactory : public TBoxedValue {
@@ -2808,7 +2819,7 @@ private:
};
static const TStringRef& Name() {
- static auto name = TStringRef::Of("Parse");
+ static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
return name;
}
@@ -2822,15 +2833,10 @@ private:
return false;
}
- auto resourceType = builder.Resource(TMResourceName);
- auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
-
- builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
- .Add(builder.Optional()->Item<ui16>())
- .Done()
- .OptionalArgs(1);
- builder.RunConfig<char*>().Returns(optionalResourceType);
-
+ builder.OptionalArgs(1).Args()->Add<char*>()
+ .template Add<TOptional<ui16>>();
+ builder.Returns(
+ builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>());
if (!typesOnly) {
builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
}
@@ -2866,7 +2872,7 @@ private:
const std::string_view buffer = args[0].AsStringRef();
TUnboxedValuePod result(0);
- auto& storage = Reference(result);
+ auto& storage = Reference<TResourceName>(result);
storage.MakeDefault();
auto& builder = valueBuilder->GetDateBuilder();
@@ -2918,13 +2924,27 @@ private:
break;
case 'Y': {
- static constexpr size_t size = 4;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
- ui32 year = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
- return false;
+ if constexpr (TResourceName == TMResourceName) {
+ static constexpr size_t size = 4;
+ ui32 year = 0U;
+ if (limit < size || !ParseNDigits<size>::Do(it, year) || !ValidateYear(year)) {
+ return false;
+ }
+ SetYear<TMResourceName>(result, year);
+ } else {
+ static constexpr size_t size = 6;
+ i64 year = 0LL;
+ i64 negative = 1LL;
+ if (*it == '-') {
+ negative = -1LL;
+ it++;
+ }
+ if (!ParseNDigits<size, true>::Do(it, year) || !ValidateYear(negative * year)) {
+ return false;
+ }
+ SetYear<TM64ResourceName>(result, negative * year);
}
- SetYear(result, year);
return true;
});
break;
@@ -2933,10 +2953,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 month = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -2945,10 +2965,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 day = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, day) || !ValidateDay(day)) {
return false;
}
- SetDay(result, day);
+ SetDay<TResourceName>(result, day);
return true;
});
break;
@@ -2957,10 +2977,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 hour = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
return false;
}
- SetHour(result, hour);
+ SetHour<TResourceName>(result, hour);
return true;
});
break;
@@ -2969,10 +2989,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 minute = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
return false;
}
- SetMinute(result, minute);
+ SetMinute<TResourceName>(result, minute);
return true;
});
break;
@@ -2981,10 +3001,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 second = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
return false;
}
- SetSecond(result, second);
+ SetSecond<TResourceName>(result, second);
limit -= size;
if (!limit || *it != '.') {
@@ -3010,7 +3030,7 @@ private:
while (digits--) {
usec *= 10U;
}
- SetMicrosecond(result, usec);
+ SetMicrosecond<TResourceName>(result, usec);
return true;
});
break;
@@ -3028,7 +3048,7 @@ private:
if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
return false;
}
- SetTimezoneId(result, timezoneId);
+ SetTimezoneId<TResourceName>(result, timezoneId);
return true;
});
break;
@@ -3047,7 +3067,7 @@ private:
if (cnt < size || !ValidateMonthShortName(monthName, month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -3067,7 +3087,7 @@ private:
if (!ValidateMonthFullName(monthName, month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -3210,7 +3230,8 @@ private:
TToUnits<ToMicrosecondsUDF, ui64, 1000000>,
TFormat,
- TParse,
+ TParse<ParseUDF, TMResourceName>,
+ TParse<Parse64UDF, TM64ResourceName>,
TParseRfc822,
TParseIso8601,
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
index 3b6fd32464..d4bd5e1d4d 100644
--- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
@@ -19,6 +19,11 @@
"uri": "file://test.test_Get_/results.txt"
}
],
+ "test.test[Parse64]": [
+ {
+ "uri": "file://test.test_Parse64_/results.txt"
+ }
+ ],
"test.test[Shift]": [
{
"uri": "file://test.test_Shift_/results.txt"
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt
new file mode 100644
index 0000000000..93521e1f28
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt
@@ -0,0 +1,174 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "-1-01-01T23:59:59.990000Z"
+ ];
+ [
+ "-1-01-01T23:59:59.999999Z"
+ ];
+ [
+ "-1-01-01T00:00:00Z"
+ ];
+ [
+ "-1-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1969-12-31T23:59:59.999999Z"
+ ];
+ [
+ "1969-12-31T23:59:59.999999Z"
+ ];
+ [
+ "1969-12-31T00:00:00Z"
+ ];
+ [
+ "1969-12-31T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "148107-12-31T23:59:59.999999Z"
+ ];
+ [
+ "148107-12-31T23:59:59.999999Z"
+ ];
+ [
+ "148107-12-31T00:00:00Z"
+ ];
+ [
+ "148107-12-31T00:00:00Z"
+ ]
+ ];
+ [
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg
new file mode 100644
index 0000000000..c16ed518f4
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg
@@ -0,0 +1 @@
+in plato.Input Parse64.in
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in
new file mode 100644
index 0000000000..ee33bd3d13
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in
@@ -0,0 +1,60 @@
+{
+ "fdatetime1"="-144170 12 31 235959 GMT text";
+ "fdatetime2"="%text% 12/31/-144170 23:59:59.";
+ "fdatetime3"="december/31/-144170";
+ "fdatetime4"="dec/31/-144170";
+};
+{
+ "fdatetime1"="-144169 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/-144169 00:00:00.";
+ "fdatetime3"="january/01/-144169";
+ "fdatetime4"="jan/01/-144169";
+};
+{
+ "fdatetime1"="-1 01 01 235959.99 GMT text";
+ "fdatetime2"="%text% 01/01/-1 23:59:59.999999";
+ "fdatetime3"="January/01/-1";
+ "fdatetime4"="Jan/01/-1";
+};
+{
+ "fdatetime1"="1 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/1 00:00:00.0000000";
+ "fdatetime3"="JANUARY/01/1";
+ "fdatetime4"="JAN/01/1";
+};
+{
+ "fdatetime1"="1969 12 31 235959.999999 GMT text";
+ "fdatetime2"="%text% 12/31/1969 23:59:59.999999";
+ "fdatetime3"="DeCeMbEr/31/1969";
+ "fdatetime4"="DeC/31/1969";
+};
+{
+ "fdatetime1"="1970 01 01 000000.00 GMT text";
+ "fdatetime2"="%text% 01/01/1970 00:00:00.00";
+ "fdatetime3"="JaNuArY/01/1970";
+ "fdatetime4"="JaN/01/1970";
+};
+{
+ "fdatetime1"="2025 01 01 000000 GMT bar";
+ "fdatetime2"="%text% 01/01/2025 00:00:00.1234567890abcdef1234567890abcdef";
+ "fdatetime3"="jEnuary/01/2025";
+ "fdatetime4"="jEn/01/2025";
+};
+{
+ "fdatetime1"="2106 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/2106 00:00:00.";
+ "fdatetime3"="jANuARy/01/2106";
+ "fdatetime4"="jAN/01/2106";
+};
+{
+ "fdatetime1"="148107 12 31 235959.999999 GMT text";
+ "fdatetime2"="%text% 12/31/148107 23:59:59.999999";
+ "fdatetime3"="DECEMBER/31/148107";
+ "fdatetime4"="DEC/31/148107";
+};
+{
+ "fdatetime1"="148108 01 01 000000.000000 GMT text";
+ "fdatetime2"="%text% 01/01/148108 00:00:00.000000";
+ "fdatetime3"="jaNUARy/01/148108";
+ "fdatetime4"="jaN/01/148108";
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr
new file mode 100644
index 0000000000..a5b735bdca
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr
@@ -0,0 +1,38 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdatetime1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime4";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql
new file mode 100644
index 0000000000..cc57f1aa49
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+$parse1 = DateTime::Parse64("%Y %m %d %H%M%S %Z text");
+$parse2 = DateTime::Parse64("%%text%% %m/%d/%Y %H:%M:%S");
+$parse3 = DateTime::Parse64("%B/%d/%Y");
+$parse4 = DateTime::Parse64("%b/%d/%Y");
+
+select
+ CAST(DateTime::MakeTimestamp64($parse1(fdatetime1)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse2(fdatetime2)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse3(fdatetime3)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse4(fdatetime4)) AS String),
+from Input