diff options
| author | imunkin <[email protected]> | 2025-02-25 00:04:30 +0300 | 
|---|---|---|
| committer | imunkin <[email protected]> | 2025-02-25 00:43:55 +0300 | 
| commit | afb5748729cbda07af01d989a854831978d1e0f8 (patch) | |
| tree | 91da78f90ad9ffe5142f52caefe06ca3bebd19ea /yql/essentials/udfs/common | |
| parent | 2d6dbd67adbbe1bbda8a45355b186a2c490d3508 (diff) | |
YQL-18303: Introduce Parse64 function
commit_hash:f2068081a3ddfb16f531239f3e0dcc923084d3b0
Diffstat (limited to 'yql/essentials/udfs/common')
7 files changed, 349 insertions, 38 deletions
diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index 61ee555e567..5044d0d9b40 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -47,6 +47,8 @@ extern const char EndOfUDF[] = "EndOf";  extern const char ShiftYearsUDF[] = "ShiftYears";  extern const char ShiftQuartersUDF[] = "ShiftQuarters";  extern const char ShiftMonthsUDF[] = "ShiftMonths"; +extern const char ParseUDF[] = "Parse"; +extern const char Parse64UDF[] = "Parse64";  extern const char TMResourceName[] = "DateTime2.TM";  extern const char TM64ResourceName[] = "DateTime2.TM64"; @@ -2762,33 +2764,42 @@ private:          const TSourcePosition Pos_;      }; -    template<size_t Digits> -    struct ParseExaclyNDigits; +    template<size_t Digits, bool Variable = false> +    struct ParseNDigits; -    template<> -    struct ParseExaclyNDigits<0U> { +    template<bool Variable> +    struct ParseNDigits<0U, Variable> {          template <typename T>          static constexpr bool Do(std::string_view::const_iterator&, T&) {              return true;          }      }; -    template<size_t Digits> -    struct ParseExaclyNDigits { +    template<size_t Digits, bool Variable> +    struct ParseNDigits {          template <typename T>          static constexpr bool Do(std::string_view::const_iterator& it, T& out) {              const auto d = *it;              if (!std::isdigit(d)) { +                // XXX: If the current char is not a digit, the +                // parsing succeeds iff there are no more digits +                // to be parsed (see the class specialization +                // above) or there are given less than N digits +                // to be parsed. +                if constexpr (Variable) { +                    return true; +                }                  return false;              }              out *= 10U;              out += d - '0'; -            return ParseExaclyNDigits<Digits - 1U>::Do(++it, out); +            return ParseNDigits<Digits - 1U, Variable>::Do(++it, out);          }      };      // Parse +    template<const char* TUdfName, const char* TResourceName>      class TParse : public TBoxedValue {      public:          class TFactory : public TBoxedValue { @@ -2808,7 +2819,7 @@ private:          };          static const TStringRef& Name() { -            static auto name = TStringRef::Of("Parse"); +            static auto name = TStringRef(TUdfName, std::strlen(TUdfName));              return name;          } @@ -2822,15 +2833,10 @@ private:                  return false;              } -            auto resourceType = builder.Resource(TMResourceName); -            auto optionalResourceType = builder.Optional()->Item(resourceType).Build(); - -            builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap) -                .Add(builder.Optional()->Item<ui16>()) -                .Done() -                .OptionalArgs(1); -            builder.RunConfig<char*>().Returns(optionalResourceType); - +            builder.OptionalArgs(1).Args()->Add<char*>() +                .template Add<TOptional<ui16>>(); +            builder.Returns( +                builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>());              if (!typesOnly) {                  builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));              } @@ -2866,7 +2872,7 @@ private:                  const std::string_view buffer = args[0].AsStringRef();                  TUnboxedValuePod result(0); -                auto& storage = Reference(result); +                auto& storage = Reference<TResourceName>(result);                  storage.MakeDefault();                  auto& builder = valueBuilder->GetDateBuilder(); @@ -2918,13 +2924,27 @@ private:                      break;                  case 'Y': { -                    static constexpr size_t size = 4;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { -                        ui32 year = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) { -                            return false; +                        if constexpr (TResourceName == TMResourceName) { +                            static constexpr size_t size = 4; +                            ui32 year = 0U; +                            if (limit < size || !ParseNDigits<size>::Do(it, year) || !ValidateYear(year)) { +                                return false; +                            } +                            SetYear<TMResourceName>(result, year); +                        } else { +                            static constexpr size_t size = 6; +                            i64 year = 0LL; +                            i64 negative = 1LL; +                            if (*it == '-') { +                                negative = -1LL; +                                it++; +                            } +                            if (!ParseNDigits<size, true>::Do(it, year) || !ValidateYear(negative * year)) { +                                return false; +                            } +                            SetYear<TM64ResourceName>(result, negative * year);                          } -                        SetYear(result, year);                          return true;                      });                      break; @@ -2933,10 +2953,10 @@ private:                      static constexpr size_t size = 2;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {                          ui32 month = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) { +                        if (limit < size || !ParseNDigits<size>::Do(it, month) || !ValidateMonth(month)) {                              return false;                          } -                        SetMonth(result, month); +                        SetMonth<TResourceName>(result, month);                          return true;                      });                      break; @@ -2945,10 +2965,10 @@ private:                      static constexpr size_t size = 2;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {                          ui32 day = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) { +                        if (limit < size || !ParseNDigits<size>::Do(it, day) || !ValidateDay(day)) {                              return false;                          } -                        SetDay(result, day); +                        SetDay<TResourceName>(result, day);                          return true;                      });                      break; @@ -2957,10 +2977,10 @@ private:                      static constexpr size_t size = 2;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {                          ui32 hour = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) { +                        if (limit < size || !ParseNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {                              return false;                          } -                        SetHour(result, hour); +                        SetHour<TResourceName>(result, hour);                          return true;                      });                      break; @@ -2969,10 +2989,10 @@ private:                      static constexpr size_t size = 2;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {                          ui32 minute = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) { +                        if (limit < size || !ParseNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {                              return false;                          } -                        SetMinute(result, minute); +                        SetMinute<TResourceName>(result, minute);                          return true;                      });                      break; @@ -2981,10 +3001,10 @@ private:                      static constexpr size_t size = 2;                      Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {                          ui32 second = 0U; -                        if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) { +                        if (limit < size || !ParseNDigits<size>::Do(it, second) || !ValidateSecond(second)) {                              return false;                          } -                        SetSecond(result, second); +                        SetSecond<TResourceName>(result, second);                          limit -= size;                          if (!limit || *it != '.') { @@ -3010,7 +3030,7 @@ private:                          while (digits--) {                              usec *= 10U;                          } -                        SetMicrosecond(result, usec); +                        SetMicrosecond<TResourceName>(result, usec);                          return true;                      });                      break; @@ -3028,7 +3048,7 @@ private:                          if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {                              return false;                          } -                        SetTimezoneId(result, timezoneId); +                        SetTimezoneId<TResourceName>(result, timezoneId);                          return true;                      });                      break; @@ -3047,7 +3067,7 @@ private:                          if (cnt < size || !ValidateMonthShortName(monthName, month)) {                              return false;                          } -                        SetMonth(result, month); +                        SetMonth<TResourceName>(result, month);                          return true;                      });                      break; @@ -3067,7 +3087,7 @@ private:                          if (!ValidateMonthFullName(monthName, month)) {                              return false;                          } -                        SetMonth(result, month); +                        SetMonth<TResourceName>(result, month);                          return true;                      });                      break; @@ -3210,7 +3230,8 @@ private:          TToUnits<ToMicrosecondsUDF, ui64, 1000000>,          TFormat, -        TParse, +        TParse<ParseUDF, TMResourceName>, +        TParse<Parse64UDF, TM64ResourceName>,          TParseRfc822,          TParseIso8601, diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json index 3b6fd324643..d4bd5e1d4d8 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json @@ -19,6 +19,11 @@              "uri": "file://test.test_Get_/results.txt"          }      ], +    "test.test[Parse64]": [ +        { +            "uri": "file://test.test_Parse64_/results.txt" +        } +    ],      "test.test[Shift]": [          {              "uri": "file://test.test_Shift_/results.txt" diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt new file mode 100644 index 00000000000..93521e1f28c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Parse64_/results.txt @@ -0,0 +1,174 @@ +[ +    { +        "Write" = [ +            { +                "Type" = [ +                    "ListType"; +                    [ +                        "StructType"; +                        [ +                            [ +                                "column0"; +                                [ +                                    "OptionalType"; +                                    [ +                                        "DataType"; +                                        "String" +                                    ] +                                ] +                            ]; +                            [ +                                "column1"; +                                [ +                                    "OptionalType"; +                                    [ +                                        "DataType"; +                                        "String" +                                    ] +                                ] +                            ]; +                            [ +                                "column2"; +                                [ +                                    "OptionalType"; +                                    [ +                                        "DataType"; +                                        "String" +                                    ] +                                ] +                            ]; +                            [ +                                "column3"; +                                [ +                                    "OptionalType"; +                                    [ +                                        "DataType"; +                                        "String" +                                    ] +                                ] +                            ] +                        ] +                    ] +                ]; +                "Data" = [ +                    [ +                        #; +                        #; +                        #; +                        # +                    ]; +                    [ +                        [ +                            "-144169-01-01T00:00:00Z" +                        ]; +                        [ +                            "-144169-01-01T00:00:00Z" +                        ]; +                        [ +                            "-144169-01-01T00:00:00Z" +                        ]; +                        [ +                            "-144169-01-01T00:00:00Z" +                        ] +                    ]; +                    [ +                        [ +                            "-1-01-01T23:59:59.990000Z" +                        ]; +                        [ +                            "-1-01-01T23:59:59.999999Z" +                        ]; +                        [ +                            "-1-01-01T00:00:00Z" +                        ]; +                        [ +                            "-1-01-01T00:00:00Z" +                        ] +                    ]; +                    [ +                        [ +                            "1-01-01T00:00:00Z" +                        ]; +                        [ +                            "1-01-01T00:00:00Z" +                        ]; +                        [ +                            "1-01-01T00:00:00Z" +                        ]; +                        [ +                            "1-01-01T00:00:00Z" +                        ] +                    ]; +                    [ +                        [ +                            "1969-12-31T23:59:59.999999Z" +                        ]; +                        [ +                            "1969-12-31T23:59:59.999999Z" +                        ]; +                        [ +                            "1969-12-31T00:00:00Z" +                        ]; +                        [ +                            "1969-12-31T00:00:00Z" +                        ] +                    ]; +                    [ +                        [ +                            "1970-01-01T00:00:00Z" +                        ]; +                        [ +                            "1970-01-01T00:00:00Z" +                        ]; +                        [ +                            "1970-01-01T00:00:00Z" +                        ]; +                        [ +                            "1970-01-01T00:00:00Z" +                        ] +                    ]; +                    [ +                        #; +                        #; +                        #; +                        # +                    ]; +                    [ +                        [ +                            "2106-01-01T00:00:00Z" +                        ]; +                        [ +                            "2106-01-01T00:00:00Z" +                        ]; +                        [ +                            "2106-01-01T00:00:00Z" +                        ]; +                        [ +                            "2106-01-01T00:00:00Z" +                        ] +                    ]; +                    [ +                        [ +                            "148107-12-31T23:59:59.999999Z" +                        ]; +                        [ +                            "148107-12-31T23:59:59.999999Z" +                        ]; +                        [ +                            "148107-12-31T00:00:00Z" +                        ]; +                        [ +                            "148107-12-31T00:00:00Z" +                        ] +                    ]; +                    [ +                        #; +                        #; +                        #; +                        # +                    ] +                ] +            } +        ] +    } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg new file mode 100644 index 00000000000..c16ed518f45 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.cfg @@ -0,0 +1 @@ +in plato.Input Parse64.in diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in new file mode 100644 index 00000000000..ee33bd3d13f --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in @@ -0,0 +1,60 @@ +{ +    "fdatetime1"="-144170 12 31 235959 GMT text"; +    "fdatetime2"="%text% 12/31/-144170 23:59:59."; +    "fdatetime3"="december/31/-144170"; +    "fdatetime4"="dec/31/-144170"; +}; +{ +    "fdatetime1"="-144169 01 01 000000 GMT text"; +    "fdatetime2"="%text% 01/01/-144169 00:00:00."; +    "fdatetime3"="january/01/-144169"; +    "fdatetime4"="jan/01/-144169"; +}; +{ +    "fdatetime1"="-1 01 01 235959.99 GMT text"; +    "fdatetime2"="%text% 01/01/-1 23:59:59.999999"; +    "fdatetime3"="January/01/-1"; +    "fdatetime4"="Jan/01/-1"; +}; +{ +    "fdatetime1"="1 01 01 000000 GMT text"; +    "fdatetime2"="%text% 01/01/1 00:00:00.0000000"; +    "fdatetime3"="JANUARY/01/1"; +    "fdatetime4"="JAN/01/1"; +}; +{ +    "fdatetime1"="1969 12 31 235959.999999 GMT text"; +    "fdatetime2"="%text% 12/31/1969 23:59:59.999999"; +    "fdatetime3"="DeCeMbEr/31/1969"; +    "fdatetime4"="DeC/31/1969"; +}; +{ +    "fdatetime1"="1970 01 01 000000.00 GMT text"; +    "fdatetime2"="%text% 01/01/1970 00:00:00.00"; +    "fdatetime3"="JaNuArY/01/1970"; +    "fdatetime4"="JaN/01/1970"; +}; +{ +    "fdatetime1"="2025 01 01 000000 GMT bar"; +    "fdatetime2"="%text% 01/01/2025 00:00:00.1234567890abcdef1234567890abcdef"; +    "fdatetime3"="jEnuary/01/2025"; +    "fdatetime4"="jEn/01/2025"; +}; +{ +    "fdatetime1"="2106 01 01 000000 GMT text"; +    "fdatetime2"="%text% 01/01/2106 00:00:00."; +    "fdatetime3"="jANuARy/01/2106"; +    "fdatetime4"="jAN/01/2106"; +}; +{ +    "fdatetime1"="148107 12 31 235959.999999 GMT text"; +    "fdatetime2"="%text% 12/31/148107 23:59:59.999999"; +    "fdatetime3"="DECEMBER/31/148107"; +    "fdatetime4"="DEC/31/148107"; +}; +{ +    "fdatetime1"="148108 01 01 000000.000000 GMT text"; +    "fdatetime2"="%text% 01/01/148108 00:00:00.000000"; +    "fdatetime3"="jaNUARy/01/148108"; +    "fdatetime4"="jaN/01/148108"; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr new file mode 100644 index 00000000000..a5b735bdca4 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.in.attr @@ -0,0 +1,38 @@ +{ +    "_yql_row_spec" = { +        "Type" = [ +            "StructType"; +            [ +                [ +                    "fdatetime1"; +                    [ +                        "DataType"; +                        "String" +                    ] +                ]; +                [ +                    "fdatetime2"; +                    [ +                        "DataType"; +                        "String" +                    ] +                ]; +                [ +                    "fdatetime3"; +                    [ +                        "DataType"; +                        "String" +                    ] +                ]; +                [ +                    "fdatetime4"; +                    [ +                        "DataType"; +                        "String" +                    ] +                ]; +            ] +        ] +    } +} + diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql new file mode 100644 index 00000000000..cc57f1aa491 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Parse64.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +$parse1 = DateTime::Parse64("%Y %m %d %H%M%S %Z text"); +$parse2 = DateTime::Parse64("%%text%% %m/%d/%Y %H:%M:%S"); +$parse3 = DateTime::Parse64("%B/%d/%Y"); +$parse4 = DateTime::Parse64("%b/%d/%Y"); + +select +    CAST(DateTime::MakeTimestamp64($parse1(fdatetime1)) AS String), +    CAST(DateTime::MakeTimestamp64($parse2(fdatetime2)) AS String), +    CAST(DateTime::MakeTimestamp64($parse3(fdatetime3)) AS String), +    CAST(DateTime::MakeTimestamp64($parse4(fdatetime4)) AS String), +from Input  | 
