summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common
diff options
context:
space:
mode:
authorimunkin <[email protected]>2025-02-25 00:04:30 +0300
committerimunkin <[email protected]>2025-02-25 00:43:55 +0300
commitafb5748729cbda07af01d989a854831978d1e0f8 (patch)
tree91da78f90ad9ffe5142f52caefe06ca3bebd19ea /yql/essentials/udfs/common
parent2d6dbd67adbbe1bbda8a45355b186a2c490d3508 (diff)
YQL-18303: Introduce Parse64 function
commit_hash:f2068081a3ddfb16f531239f3e0dcc923084d3b0
Diffstat (limited to 'yql/essentials/udfs/common')
-rw-r--r--yql/essentials/udfs/common/datetime2/datetime_udf.cpp97
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json5
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt174
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg1
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in60
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr38
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql12
7 files changed, 349 insertions, 38 deletions
diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
index 61ee555e567..5044d0d9b40 100644
--- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
+++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
@@ -47,6 +47,8 @@ extern const char EndOfUDF[] = "EndOf";
extern const char ShiftYearsUDF[] = "ShiftYears";
extern const char ShiftQuartersUDF[] = "ShiftQuarters";
extern const char ShiftMonthsUDF[] = "ShiftMonths";
+extern const char ParseUDF[] = "Parse";
+extern const char Parse64UDF[] = "Parse64";
extern const char TMResourceName[] = "DateTime2.TM";
extern const char TM64ResourceName[] = "DateTime2.TM64";
@@ -2762,33 +2764,42 @@ private:
const TSourcePosition Pos_;
};
- template<size_t Digits>
- struct ParseExaclyNDigits;
+ template<size_t Digits, bool Variable = false>
+ struct ParseNDigits;
- template<>
- struct ParseExaclyNDigits<0U> {
+ template<bool Variable>
+ struct ParseNDigits<0U, Variable> {
template <typename T>
static constexpr bool Do(std::string_view::const_iterator&, T&) {
return true;
}
};
- template<size_t Digits>
- struct ParseExaclyNDigits {
+ template<size_t Digits, bool Variable>
+ struct ParseNDigits {
template <typename T>
static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
const auto d = *it;
if (!std::isdigit(d)) {
+ // XXX: If the current char is not a digit, the
+ // parsing succeeds iff there are no more digits
+ // to be parsed (see the class specialization
+ // above) or there are given less than N digits
+ // to be parsed.
+ if constexpr (Variable) {
+ return true;
+ }
return false;
}
out *= 10U;
out += d - '0';
- return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
+ return ParseNDigits<Digits - 1U, Variable>::Do(++it, out);
}
};
// Parse
+ template<const char* TUdfName, const char* TResourceName>
class TParse : public TBoxedValue {
public:
class TFactory : public TBoxedValue {
@@ -2808,7 +2819,7 @@ private:
};
static const TStringRef& Name() {
- static auto name = TStringRef::Of("Parse");
+ static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
return name;
}
@@ -2822,15 +2833,10 @@ private:
return false;
}
- auto resourceType = builder.Resource(TMResourceName);
- auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
-
- builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
- .Add(builder.Optional()->Item<ui16>())
- .Done()
- .OptionalArgs(1);
- builder.RunConfig<char*>().Returns(optionalResourceType);
-
+ builder.OptionalArgs(1).Args()->Add<char*>()
+ .template Add<TOptional<ui16>>();
+ builder.Returns(
+ builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>());
if (!typesOnly) {
builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
}
@@ -2866,7 +2872,7 @@ private:
const std::string_view buffer = args[0].AsStringRef();
TUnboxedValuePod result(0);
- auto& storage = Reference(result);
+ auto& storage = Reference<TResourceName>(result);
storage.MakeDefault();
auto& builder = valueBuilder->GetDateBuilder();
@@ -2918,13 +2924,27 @@ private:
break;
case 'Y': {
- static constexpr size_t size = 4;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
- ui32 year = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
- return false;
+ if constexpr (TResourceName == TMResourceName) {
+ static constexpr size_t size = 4;
+ ui32 year = 0U;
+ if (limit < size || !ParseNDigits<size>::Do(it, year) || !ValidateYear(year)) {
+ return false;
+ }
+ SetYear<TMResourceName>(result, year);
+ } else {
+ static constexpr size_t size = 6;
+ i64 year = 0LL;
+ i64 negative = 1LL;
+ if (*it == '-') {
+ negative = -1LL;
+ it++;
+ }
+ if (!ParseNDigits<size, true>::Do(it, year) || !ValidateYear(negative * year)) {
+ return false;
+ }
+ SetYear<TM64ResourceName>(result, negative * year);
}
- SetYear(result, year);
return true;
});
break;
@@ -2933,10 +2953,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 month = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -2945,10 +2965,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 day = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, day) || !ValidateDay(day)) {
return false;
}
- SetDay(result, day);
+ SetDay<TResourceName>(result, day);
return true;
});
break;
@@ -2957,10 +2977,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 hour = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
return false;
}
- SetHour(result, hour);
+ SetHour<TResourceName>(result, hour);
return true;
});
break;
@@ -2969,10 +2989,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 minute = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
return false;
}
- SetMinute(result, minute);
+ SetMinute<TResourceName>(result, minute);
return true;
});
break;
@@ -2981,10 +3001,10 @@ private:
static constexpr size_t size = 2;
Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
ui32 second = 0U;
- if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
+ if (limit < size || !ParseNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
return false;
}
- SetSecond(result, second);
+ SetSecond<TResourceName>(result, second);
limit -= size;
if (!limit || *it != '.') {
@@ -3010,7 +3030,7 @@ private:
while (digits--) {
usec *= 10U;
}
- SetMicrosecond(result, usec);
+ SetMicrosecond<TResourceName>(result, usec);
return true;
});
break;
@@ -3028,7 +3048,7 @@ private:
if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
return false;
}
- SetTimezoneId(result, timezoneId);
+ SetTimezoneId<TResourceName>(result, timezoneId);
return true;
});
break;
@@ -3047,7 +3067,7 @@ private:
if (cnt < size || !ValidateMonthShortName(monthName, month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -3067,7 +3087,7 @@ private:
if (!ValidateMonthFullName(monthName, month)) {
return false;
}
- SetMonth(result, month);
+ SetMonth<TResourceName>(result, month);
return true;
});
break;
@@ -3210,7 +3230,8 @@ private:
TToUnits<ToMicrosecondsUDF, ui64, 1000000>,
TFormat,
- TParse,
+ TParse<ParseUDF, TMResourceName>,
+ TParse<Parse64UDF, TM64ResourceName>,
TParseRfc822,
TParseIso8601,
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
index 3b6fd324643..d4bd5e1d4d8 100644
--- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
@@ -19,6 +19,11 @@
"uri": "file://test.test_Get_/results.txt"
}
],
+ "test.test[Parse64]": [
+ {
+ "uri": "file://test.test_Parse64_/results.txt"
+ }
+ ],
"test.test[Shift]": [
{
"uri": "file://test.test_Shift_/results.txt"
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt
new file mode 100644
index 00000000000..93521e1f28c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt
@@ -0,0 +1,174 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ];
+ [
+ "-144169-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "-1-01-01T23:59:59.990000Z"
+ ];
+ [
+ "-1-01-01T23:59:59.999999Z"
+ ];
+ [
+ "-1-01-01T00:00:00Z"
+ ];
+ [
+ "-1-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ];
+ [
+ "1-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1969-12-31T23:59:59.999999Z"
+ ];
+ [
+ "1969-12-31T23:59:59.999999Z"
+ ];
+ [
+ "1969-12-31T00:00:00Z"
+ ];
+ [
+ "1969-12-31T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ];
+ [
+ "1970-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ];
+ [
+ "2106-01-01T00:00:00Z"
+ ]
+ ];
+ [
+ [
+ "148107-12-31T23:59:59.999999Z"
+ ];
+ [
+ "148107-12-31T23:59:59.999999Z"
+ ];
+ [
+ "148107-12-31T00:00:00Z"
+ ];
+ [
+ "148107-12-31T00:00:00Z"
+ ]
+ ];
+ [
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg
new file mode 100644
index 00000000000..c16ed518f45
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg
@@ -0,0 +1 @@
+in plato.Input Parse64.in
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in
new file mode 100644
index 00000000000..ee33bd3d13f
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in
@@ -0,0 +1,60 @@
+{
+ "fdatetime1"="-144170 12 31 235959 GMT text";
+ "fdatetime2"="%text% 12/31/-144170 23:59:59.";
+ "fdatetime3"="december/31/-144170";
+ "fdatetime4"="dec/31/-144170";
+};
+{
+ "fdatetime1"="-144169 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/-144169 00:00:00.";
+ "fdatetime3"="january/01/-144169";
+ "fdatetime4"="jan/01/-144169";
+};
+{
+ "fdatetime1"="-1 01 01 235959.99 GMT text";
+ "fdatetime2"="%text% 01/01/-1 23:59:59.999999";
+ "fdatetime3"="January/01/-1";
+ "fdatetime4"="Jan/01/-1";
+};
+{
+ "fdatetime1"="1 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/1 00:00:00.0000000";
+ "fdatetime3"="JANUARY/01/1";
+ "fdatetime4"="JAN/01/1";
+};
+{
+ "fdatetime1"="1969 12 31 235959.999999 GMT text";
+ "fdatetime2"="%text% 12/31/1969 23:59:59.999999";
+ "fdatetime3"="DeCeMbEr/31/1969";
+ "fdatetime4"="DeC/31/1969";
+};
+{
+ "fdatetime1"="1970 01 01 000000.00 GMT text";
+ "fdatetime2"="%text% 01/01/1970 00:00:00.00";
+ "fdatetime3"="JaNuArY/01/1970";
+ "fdatetime4"="JaN/01/1970";
+};
+{
+ "fdatetime1"="2025 01 01 000000 GMT bar";
+ "fdatetime2"="%text% 01/01/2025 00:00:00.1234567890abcdef1234567890abcdef";
+ "fdatetime3"="jEnuary/01/2025";
+ "fdatetime4"="jEn/01/2025";
+};
+{
+ "fdatetime1"="2106 01 01 000000 GMT text";
+ "fdatetime2"="%text% 01/01/2106 00:00:00.";
+ "fdatetime3"="jANuARy/01/2106";
+ "fdatetime4"="jAN/01/2106";
+};
+{
+ "fdatetime1"="148107 12 31 235959.999999 GMT text";
+ "fdatetime2"="%text% 12/31/148107 23:59:59.999999";
+ "fdatetime3"="DECEMBER/31/148107";
+ "fdatetime4"="DEC/31/148107";
+};
+{
+ "fdatetime1"="148108 01 01 000000.000000 GMT text";
+ "fdatetime2"="%text% 01/01/148108 00:00:00.000000";
+ "fdatetime3"="jaNUARy/01/148108";
+ "fdatetime4"="jaN/01/148108";
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr
new file mode 100644
index 00000000000..a5b735bdca4
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr
@@ -0,0 +1,38 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdatetime1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime4";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql
new file mode 100644
index 00000000000..cc57f1aa491
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+$parse1 = DateTime::Parse64("%Y %m %d %H%M%S %Z text");
+$parse2 = DateTime::Parse64("%%text%% %m/%d/%Y %H:%M:%S");
+$parse3 = DateTime::Parse64("%B/%d/%Y");
+$parse4 = DateTime::Parse64("%b/%d/%Y");
+
+select
+ CAST(DateTime::MakeTimestamp64($parse1(fdatetime1)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse2(fdatetime2)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse3(fdatetime3)) AS String),
+ CAST(DateTime::MakeTimestamp64($parse4(fdatetime4)) AS String),
+from Input