diff options
| author | vitalyisaev <[email protected]> | 2023-11-14 09:58:56 +0300 |
|---|---|---|
| committer | vitalyisaev <[email protected]> | 2023-11-14 10:20:20 +0300 |
| commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
| tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Functions/parseTimeDelta.cpp | |
| parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Functions/parseTimeDelta.cpp')
| -rw-r--r-- | contrib/clickhouse/src/Functions/parseTimeDelta.cpp | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Functions/parseTimeDelta.cpp b/contrib/clickhouse/src/Functions/parseTimeDelta.cpp new file mode 100644 index 00000000000..7743a0cb664 --- /dev/null +++ b/contrib/clickhouse/src/Functions/parseTimeDelta.cpp @@ -0,0 +1,312 @@ +#include <boost/convert.hpp> +#include <boost/convert/strtol.hpp> + +#include <Columns/ColumnsNumber.h> +#include <DataTypes/DataTypesNumber.h> +#include <Functions/FunctionFactory.h> +#include <Functions/IFunction.h> + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + const std::unordered_map<std::string_view, Float64> time_unit_to_float = + { + {"years", 365 * 24 * 3600}, + {"year", 365 * 24 * 3600}, + {"yr", 365 * 24 * 3600}, + {"y", 365 * 24 * 3600}, + + {"months", 30.5 * 24 * 3600}, + {"month", 30.5 * 24 * 3600}, + {"mo", 30.5 * 24 * 3600}, + + {"weeks", 7 * 24 * 3600}, + {"week", 7 * 24 * 3600}, + {"w", 7 * 24 * 3600}, + + {"days", 24 * 3600}, + {"day", 24 * 3600}, + {"d", 24 * 3600}, + + {"hours", 3600}, + {"hour", 3600}, + {"hr", 3600}, + {"h", 3600}, + + {"minutes", 60}, + {"minute", 60}, + {"min", 60}, + {"m", 60}, + + {"seconds", 1}, + {"second", 1}, + {"sec", 1}, + {"s", 1}, + + {"milliseconds", 1e-3}, + {"millisecond", 1e-3}, + {"millisec", 1e-3}, + {"ms", 1e-3}, + + {"microseconds", 1e-6}, + {"microsecond", 1e-6}, + {"microsec", 1e-6}, + {"μs", 1e-6}, + {"us", 1e-6}, + + {"nanoseconds", 1e-9}, + {"nanosecond", 1e-9}, + {"nanosec", 1e-9}, + {"ns", 1e-9}, + }; + + /** Prints amount of seconds in form of: + * "1 year 2 months 4 weeks 12 days 3 hours 1 minute 33 seconds". + * ' ', ';', '-', '+', ',', ':' can be used as separator, eg. "1yr-2mo", "2m:6s" + * + * valid expressions: + * SELECT parseTimeDelta('1 min 35 sec'); + * SELECT parseTimeDelta('0m;11.23s.'); + * SELECT parseTimeDelta('11hr 25min 3.1s'); + * SELECT parseTimeDelta('0.00123 seconds'); + * SELECT parseTimeDelta('1yr2mo'); + * SELECT parseTimeDelta('11s+22min'); + * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds'); + * + * invalid expressions: + * SELECT parseTimeDelta(); + * SELECT parseTimeDelta('1yr', 1); + * SELECT parseTimeDelta(1); + * SELECT parseTimeDelta(' '); + * SELECT parseTimeDelta('-1yr'); + * SELECT parseTimeDelta('1yr-'); + * SELECT parseTimeDelta('yr2mo'); + * SELECT parseTimeDelta('1.yr2mo'); + * SELECT parseTimeDelta('1-yr'); + * SELECT parseTimeDelta('1 1yr'); + * SELECT parseTimeDelta('1yyr'); + * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ;. 33 seconds'); + * + * The length of years and months (and even days in presence of time adjustments) are rough: + * year is just 365 days, month is 30.5 days, day is 86400 seconds, similarly to what formatReadableTimeDelta is doing. + */ + class FunctionParseTimeDelta : public IFunction + { + public: + static constexpr auto name = "parseTimeDelta"; + static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseTimeDelta>(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1.", + getName(), + arguments.size()); + + if (arguments.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1.", + getName(), + arguments.size()); + + const IDataType & type = *arguments[0]; + + if (!isString(type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} as time string.", type.getName()); + + return std::make_shared<DataTypeFloat64>(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_to = ColumnFloat64::create(); + auto & res_data = col_to->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + std::string_view str{arguments[0].column->getDataAt(i)}; + Int64 token_tail = 0; + Int64 token_front = 0; + Int64 last_pos = str.length() - 1; + Float64 result = 0; + + /// ignore '.' and ' ' at the end of string + while (last_pos >= 0 && (str[last_pos] == ' ' || str[last_pos] == '.')) + --last_pos; + + /// no valid characters + if (last_pos < 0) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {}, don't find valid characters, str: \"{}\".", + getName(), + String(str)); + } + + /// last pos character must be character and not be separator or number after ignoring '.' and ' ' + if (!isalpha(str[last_pos])) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, str: \"{}\".", getName(), String(str)); + } + + /// scan spaces at the beginning + scanSpaces(str, token_tail, last_pos); + token_front = token_tail; + + while (token_tail <= last_pos) + { + /// scan unsigned integer + if (!scanUnsignedInteger(str, token_tail, last_pos)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {}, find number failed, str: \"{}\".", + getName(), + String(str)); + } + + /// if there is a '.', then scan another integer to get a float number + if (token_tail <= last_pos && str[token_tail] == '.') + { + token_tail++; + if (!scanUnsignedInteger(str, token_tail, last_pos)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {}, find number after '.' failed, str: \"{}\".", + getName(), + String(str)); + } + } + + /// convert float/integer string to float + Float64 base = 0; + std::string_view base_str = str.substr(token_front, token_tail - token_front); + auto value = boost::convert<Float64>(base_str, boost::cnv::strtol()); + if (!value.has_value()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {}, convert string to float64 failed: \"{}\".", + getName(), + String(base_str)); + } + base = value.get(); + + scanSpaces(str, token_tail, last_pos); + token_front = token_tail; + + /// scan a unit + if (!scanUnit(str, token_tail, last_pos)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {}, find unit failed, str: \"{}\".", + getName(), + String(str)); + } + + /// get unit number + std::string_view unit = str.substr(token_front, token_tail - token_front); + auto iter = time_unit_to_float.find(unit); + if (iter == time_unit_to_float.end()) /// not find unit + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, parse unit failed: \"{}\".", getName(), unit); + } + result += base * iter->second; + + /// scan separator between two tokens + scanSeparator(str, token_tail, last_pos); + token_front = token_tail; + } + + res_data.emplace_back(result); + } + + return col_to; + } + + /// scan an unsigned integer number + static bool scanUnsignedInteger(std::string_view & str, Int64 & index, Int64 last_pos) + { + int64_t begin_index = index; + while (index <= last_pos && isdigit(str[index])) + { + index++; + } + return index != begin_index; + } + + /// scan a unit + static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos) + { + int64_t begin_index = index; + while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index])) + { + index++; + } + return index != begin_index; + } + + /// scan spaces + static void scanSpaces(std::string_view & str, Int64 & index, Int64 last_pos) + { + while (index <= last_pos && (str[index] == ' ')) + { + index++; + } + } + + /// scan for characters to ignore + static void scanSeparator(std::string_view & str, Int64 & index, Int64 last_pos) + { + /// ignore spaces + scanSpaces(str, index, last_pos); + + /// ignore separator + if (index <= last_pos && isSeparator(str[index])) + { + index++; + } + + scanSpaces(str, index, last_pos); + } + + static bool isSeparator(char symbol) + { + return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' '; + } + }; + +} + +REGISTER_FUNCTION(ParseTimeDelta) +{ + factory.registerFunction<FunctionParseTimeDelta>(); +} + +} |
