aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormrlolthe1st <mrlolthe1st@yandex-team.com>2022-09-15 20:53:20 +0300
committermrlolthe1st <mrlolthe1st@yandex-team.com>2022-09-15 20:53:20 +0300
commit9b4c67e69eece7ab0dfa95628bc0a2849f2c6883 (patch)
treec53c0338c2fd3637fd37f4f4f5b6a1e8077b421a
parentf5683eb1a7572efd69c18d03f560983e14917a4b (diff)
downloadydb-9b4c67e69eece7ab0dfa95628bc0a2849f2c6883.tar.gz
Fix
fix overflow + parse time offset
-rw-r--r--ydb/library/yql/minikql/mkql_type_ops.cpp206
-rw-r--r--ydb/library/yql/minikql/mkql_type_ops_ut.cpp56
2 files changed, 207 insertions, 55 deletions
diff --git a/ydb/library/yql/minikql/mkql_type_ops.cpp b/ydb/library/yql/minikql/mkql_type_ops.cpp
index 0c453160dce..8b6fe3caa4e 100644
--- a/ydb/library/yql/minikql/mkql_type_ops.cpp
+++ b/ydb/library/yql/minikql/mkql_type_ops.cpp
@@ -952,10 +952,10 @@ bool FromLocalTimeValidated(ui16 tzId, ui32 year, ui32 month, ui32 day, ui32 hou
} // namespace
-ui32 ParseNumber(ui32& pos, NUdf::TStringRef buf, ui32& value) {
+ui32 ParseNumber(ui32& pos, NUdf::TStringRef buf, ui32& value, i8 dig_cnt) {
value = 0;
ui32 count = 0;
- for (; pos < buf.Size(); ++pos) {
+ for (; dig_cnt && pos < buf.Size(); --dig_cnt, ++pos) {
char c = buf.Data()[pos];
if (c >= '0' && c <= '9') {
value = value * 10 + (c - '0');
@@ -1074,19 +1074,19 @@ bool ParseUuid(NUdf::TStringRef buf, void* out, bool shortForm) {
NUdf::TUnboxedValuePod ParseDate(NUdf::TStringRef buf) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos != buf.Size()) {
+ if (!ParseNumber(pos, buf, day, 2) || pos != buf.Size()) {
return NUdf::TUnboxedValuePod();
}
@@ -1113,19 +1113,19 @@ NUdf::TUnboxedValuePod ParseTzDate(NUdf::TStringRef str) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos != buf.size()) {
+ if (!ParseNumber(pos, buf, day, 2) || pos != buf.size()) {
return NUdf::TUnboxedValuePod();
}
@@ -1143,19 +1143,19 @@ NUdf::TUnboxedValuePod ParseTzDate(NUdf::TStringRef str) {
NUdf::TUnboxedValuePod ParseDatetime(NUdf::TStringRef buf) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos == buf.Size() || buf.Data()[pos] != 'T') {
+ if (!ParseNumber(pos, buf, day, 2) || pos == buf.Size() || buf.Data()[pos] != 'T') {
return NUdf::TUnboxedValuePod();
}
@@ -1167,26 +1167,61 @@ NUdf::TUnboxedValuePod ParseDatetime(NUdf::TStringRef buf) {
ui32 hour, minute, second;
// skip 'T'
++pos;
- if (!ParseNumber(pos, buf, hour) || pos == buf.Size() || buf.Data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, hour, 2) || pos == buf.Size() || buf.Data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, minute) || pos == buf.Size() || buf.Data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, minute, 2) || pos == buf.Size() || buf.Data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, second) || pos == buf.Size() || buf.Data()[pos] != 'Z') {
+ if (!ParseNumber(pos, buf, second, 2) || pos == buf.Size()) {
return NUdf::TUnboxedValuePod();
}
- // skip 'Z'
- ++pos;
- if (pos != buf.Size()) {
- return NUdf::TUnboxedValuePod();
+ bool waiting_for_z = true;
+
+ ui32 offset_hours = 0;
+ ui32 offset_minutes = 0;
+ bool is_offset_negative = false;
+ if (buf.Data()[pos] == '+' || buf.Data()[pos] == '-') {
+ is_offset_negative = buf.Data()[pos] == '-';
+
+ // Skip sign
+ ++pos;
+
+ if (!ParseNumber(pos, buf, offset_hours, 2) ||
+ pos == buf.Size() || buf.Data()[pos] != ':')
+ {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ // Skip ':'
+ ++pos;
+
+ if (!ParseNumber(pos, buf, offset_minutes, 2) || pos != buf.Size()) {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ waiting_for_z = false;
+ }
+
+ ui32 offset_value = ((offset_hours) * 60 + offset_minutes) * 60;
+
+ if (waiting_for_z) {
+ if (pos == buf.Size() || buf.Data()[pos] != 'Z') {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ // skip 'Z'
+ ++pos;
+ if (pos != buf.Size()) {
+ return NUdf::TUnboxedValuePod();
+ }
}
ui32 timeValue;
@@ -1195,6 +1230,19 @@ NUdf::TUnboxedValuePod ParseDatetime(NUdf::TStringRef buf) {
}
ui32 value = dateValue * 86400u + timeValue;
+
+ if (is_offset_negative) {
+ if (UINT32_MAX - value < offset_value) {
+ return NUdf::TUnboxedValuePod();
+ }
+ value += offset_value;
+ } else {
+ if (value < offset_value) {
+ return NUdf::TUnboxedValuePod();
+ }
+ value -= offset_value;
+ }
+
if (value >= NUdf::MAX_DATETIME) {
return NUdf::TUnboxedValuePod();
}
@@ -1213,38 +1261,38 @@ NUdf::TUnboxedValuePod ParseTzDatetime(NUdf::TStringRef str) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos == buf.size() || buf.data()[pos] != 'T') {
+ if (!ParseNumber(pos, buf, day, 2) || pos == buf.size() || buf.data()[pos] != 'T') {
return NUdf::TUnboxedValuePod();
}
ui32 hour, minute, second;
// skip 'T'
++pos;
- if (!ParseNumber(pos, buf, hour) || pos == buf.size() || buf.data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, hour, 2) || pos == buf.size() || buf.data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, minute) || pos == buf.size() || buf.data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, minute, 2) || pos == buf.size() || buf.data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, second) || pos != buf.size()) {
+ if (!ParseNumber(pos, buf, second, 2) || pos != buf.size()) {
return NUdf::TUnboxedValuePod();
}
@@ -1262,19 +1310,19 @@ NUdf::TUnboxedValuePod ParseTzDatetime(NUdf::TStringRef str) {
NUdf::TUnboxedValuePod ParseTimestamp(NUdf::TStringRef buf) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.Size() || buf.Data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos == buf.Size() || buf.Data()[pos] != 'T') {
+ if (!ParseNumber(pos, buf, day, 2) || pos == buf.Size() || buf.Data()[pos] != 'T') {
return NUdf::TUnboxedValuePod();
}
@@ -1286,53 +1334,88 @@ NUdf::TUnboxedValuePod ParseTimestamp(NUdf::TStringRef buf) {
ui32 hour, minute, second;
// skip 'T'
++pos;
- if (!ParseNumber(pos, buf, hour) || pos == buf.Size() || buf.Data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, hour, 2) || pos == buf.Size() || buf.Data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, minute) || pos == buf.Size() || buf.Data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, minute, 2) || pos == buf.Size() || buf.Data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, second) || pos == buf.Size()) {
+ if (!ParseNumber(pos, buf, second, 2) || pos == buf.Size()) {
return NUdf::TUnboxedValuePod();
}
- ui32 microseconds = 0;
- if (buf.Data()[pos] != 'Z') {
- if (buf.Data()[pos] != '.') {
- return NUdf::TUnboxedValuePod();
- }
+ bool waiting_for_z = true;
+ ui32 microseconds = 0;
+ if (buf.Data()[pos] == '.') {
+ // Skip dot
++pos;
ui32 prevPos = pos;
- if (!ParseNumber(pos, buf, microseconds)) {
+ if (!ParseNumber(pos, buf, microseconds, 6)) {
return NUdf::TUnboxedValuePod();
}
prevPos = pos - prevPos;
- if (prevPos > 6) {
- return NUdf::TUnboxedValuePod();
- }
while (prevPos < 6) {
microseconds *= 10;
++prevPos;
}
- if (pos == buf.Size() || buf.Data()[pos] != 'Z') {
+ // Skip unused digits
+ while (pos < buf.Size() && '0' <= buf.Data()[pos] && buf.Data()[pos] <= '9') {
+ ++pos;
+ }
+
+ if (pos == buf.Size()) {
return NUdf::TUnboxedValuePod();
}
+
}
- // skip 'Z'
- ++pos;
- if (pos != buf.Size()) {
- return NUdf::TUnboxedValuePod();
+ ui32 offset_hours = 0;
+ ui32 offset_minutes = 0;
+ bool is_offset_negative = false;
+ if (buf.Data()[pos] == '+' || buf.Data()[pos] == '-') {
+ is_offset_negative = buf.Data()[pos] == '-';
+
+ // Skip sign
+ ++pos;
+
+ if (!ParseNumber(pos, buf, offset_hours, 2) ||
+ pos == buf.Size() || buf.Data()[pos] != ':')
+ {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ // Skip ':'
+ ++pos;
+
+ if (!ParseNumber(pos, buf, offset_minutes, 2) || pos != buf.Size()) {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ waiting_for_z = false;
+ }
+
+ ui64 offset_value = ((offset_hours) * 60 + offset_minutes) * 60 * 1000000ull;
+
+ if (waiting_for_z) {
+ if (pos == buf.Size() || buf.Data()[pos] != 'Z') {
+ return NUdf::TUnboxedValuePod();
+ }
+
+ // skip 'Z'
+ ++pos;
+ if (pos != buf.Size()) {
+ return NUdf::TUnboxedValuePod();
+ }
}
ui32 timeValue;
@@ -1341,6 +1424,19 @@ NUdf::TUnboxedValuePod ParseTimestamp(NUdf::TStringRef buf) {
}
ui64 value = dateValue * 86400000000ull + timeValue * 1000000ull + microseconds;
+
+ if (is_offset_negative) {
+ if (UINT64_MAX - value < offset_value) {
+ return NUdf::TUnboxedValuePod();
+ }
+ value += offset_value;
+ } else {
+ if (value < offset_value) {
+ return NUdf::TUnboxedValuePod();
+ }
+ value -= offset_value;
+ }
+
if (value >= NUdf::MAX_TIMESTAMP) {
return NUdf::TUnboxedValuePod();
}
@@ -1359,38 +1455,38 @@ NUdf::TUnboxedValuePod ParseTzTimestamp(NUdf::TStringRef str) {
ui32 year, month, day;
ui32 pos = 0;
- if (!ParseNumber(pos, buf, year) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, year, 4) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, month) || pos == buf.size() || buf.data()[pos] != '-') {
+ if (!ParseNumber(pos, buf, month, 2) || pos == buf.size() || buf.data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
// skip '-'
++pos;
- if (!ParseNumber(pos, buf, day) || pos == buf.size() || buf.data()[pos] != 'T') {
+ if (!ParseNumber(pos, buf, day, 2) || pos == buf.size() || buf.data()[pos] != 'T') {
return NUdf::TUnboxedValuePod();
}
ui32 hour, minute, second;
// skip 'T'
++pos;
- if (!ParseNumber(pos, buf, hour) || pos == buf.size() || buf.data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, hour, 2) || pos == buf.size() || buf.data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, minute) || pos == buf.size() || buf.data()[pos] != ':') {
+ if (!ParseNumber(pos, buf, minute, 2) || pos == buf.size() || buf.data()[pos] != ':') {
return NUdf::TUnboxedValuePod();
}
// skip ':'
++pos;
- if (!ParseNumber(pos, buf, second)) {
+ if (!ParseNumber(pos, buf, second, 2)) {
return NUdf::TUnboxedValuePod();
}
@@ -1402,20 +1498,22 @@ NUdf::TUnboxedValuePod ParseTzTimestamp(NUdf::TStringRef str) {
++pos;
ui32 prevPos = pos;
- if (!ParseNumber(pos, buf, microseconds)) {
+ if (!ParseNumber(pos, buf, microseconds, 6)) {
return NUdf::TUnboxedValuePod();
}
prevPos = pos - prevPos;
- if (prevPos > 6) {
- return NUdf::TUnboxedValuePod();
- }
while (prevPos < 6) {
microseconds *= 10;
++prevPos;
}
+ // Skip unused digits
+ while (pos < buf.size() && '0' <= buf.data()[pos] && buf.data()[pos] <= '9') {
+ ++pos;
+ }
+
if (pos != buf.size()) {
return NUdf::TUnboxedValuePod();
}
diff --git a/ydb/library/yql/minikql/mkql_type_ops_ut.cpp b/ydb/library/yql/minikql/mkql_type_ops_ut.cpp
index 01828a9a525..f81942d5d9c 100644
--- a/ydb/library/yql/minikql/mkql_type_ops_ut.cpp
+++ b/ydb/library/yql/minikql/mkql_type_ops_ut.cpp
@@ -5,7 +5,6 @@
#include <util/stream/format.h>
#include <util/stream/str.h>
-
using namespace NYql;
using namespace NKikimr;
using namespace NKikimr::NMiniKQL;
@@ -103,4 +102,59 @@ Y_UNIT_TEST_SUITE(TMiniKQLTypeOps) {
UNIT_ASSERT_VALUES_EQUAL(out.Str(), TStringBuilder() << "\x00\x00\x00\x00\x00\x00\x03\x7a"sv << "\x00\x01"sv);
}
}
+
+ NUdf::TUnboxedValuePod ParseTimestamp(NUdf::TStringRef buf) {
+ return ValueFromString(NUdf::EDataSlot::Timestamp, buf);
+ }
+
+ Y_UNIT_TEST(TimestampSeriailization) {
+ UNIT_ASSERT(!ParseTimestamp("2020-07-28T21:46:05.55045#"));
+ UNIT_ASSERT(!ParseTimestamp("2020-07-28T21:46:05.55045"));
+ UNIT_ASSERT(!ParseTimestamp("2020-07-28T21:46:05."));
+ UNIT_ASSERT(!ParseTimestamp("2020-07-28T21:46:05.Z"));
+ UNIT_ASSERT(!ParseTimestamp("2020-071-28T21:46:05.1Z"));
+
+ UNIT_ASSERT(!!ParseTimestamp("2020-07-28T21:46:05.1Z"));
+ UNIT_ASSERT(!!ParseTimestamp("2020-07-28T21:46:05.1+01:00"));
+
+ UNIT_ASSERT(!ParseTimestamp("4294969318-09-4294967318T14:28:17Z"));
+ const auto& val1 = ParseTimestamp("2022-09-15T16:42:01.123456Z");
+ const auto& val2 = ParseTimestamp("2022-09-15T16:42:01.123456131231223Z");
+
+ UNIT_ASSERT(!!val1);
+ UNIT_ASSERT(!!val2);
+ UNIT_ASSERT_VALUES_EQUAL(val1.Get<ui64>(), val2.Get<ui64>());
+
+ const auto& val3 = ParseTimestamp("2022-09-15T18:16:01.123456Z");
+ const auto& val4 = ParseTimestamp("2022-09-15T16:42:01.123456131231223-12:34");
+
+ UNIT_ASSERT(!!val3);
+ UNIT_ASSERT(!!val4);
+ UNIT_ASSERT_VALUES_EQUAL(val1.Get<ui64>(), val2.Get<ui64>());
+ }
+
+ NUdf::TUnboxedValuePod ParseDatetime(NUdf::TStringRef buf) {
+ return ValueFromString(NUdf::EDataSlot::Datetime, buf);
+ }
+
+ Y_UNIT_TEST(DatetimeSeriailization) {
+ UNIT_ASSERT(!ParseDatetime("2020-07-28T21:46:05.55045#"));
+ UNIT_ASSERT(!ParseDatetime("2020-07-28T21:46:05.55045"));
+ UNIT_ASSERT(!ParseDatetime("2020-07-28T21:46:05"));
+ UNIT_ASSERT(!ParseDatetime("2020-07-28T21:46:05."));
+ UNIT_ASSERT(!ParseDatetime("2020-071-28T21:46:05Z"));
+
+ UNIT_ASSERT(!!ParseDatetime("2020-07-28T21:46:05Z"));
+ UNIT_ASSERT(!!ParseDatetime("2020-07-28T21:46:05+01:00"));
+
+ UNIT_ASSERT(!ParseDatetime("4294969318-09-4294967318T14:28:17Z"));
+
+ const auto& val1 = ParseDatetime("2022-09-15T04:08:01Z");
+ const auto& val2 = ParseDatetime("2022-09-15T16:42:01+12:34");
+
+ UNIT_ASSERT(!!val1);
+ UNIT_ASSERT(!!val2);
+ UNIT_ASSERT_VALUES_EQUAL(val1.Get<ui32>(), val2.Get<ui32>());
+
+ }
}