diff options
author | Vitaly Stoyan <vvvv@ydb.tech> | 2024-09-13 20:50:36 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-13 20:50:36 +0300 |
commit | f405ee4e9988846158a7efde1ee0b49aefe2d5c8 (patch) | |
tree | e4d769053087754a120448afb78891e3a08c7d62 | |
parent | fc767cd402a44ab11375cd8ec7f0b235d3e90f36 (diff) | |
download | ydb-f405ee4e9988846158a7efde1ee0b49aefe2d5c8.tar.gz |
Fixed PG error positions (#9229)
-rw-r--r-- | ydb/library/yql/parser/pg_wrapper/parser.cpp | 21 | ||||
-rw-r--r-- | ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp | 27 | ||||
-rw-r--r-- | ydb/library/yql/public/issue/yql_issue.cpp | 9 | ||||
-rw-r--r-- | ydb/library/yql/public/issue/yql_issue.h | 8 |
4 files changed, 55 insertions, 10 deletions
diff --git a/ydb/library/yql/parser/pg_wrapper/parser.cpp b/ydb/library/yql/parser/pg_wrapper/parser.cpp index c5f1a61953..d0159fcdd1 100644 --- a/ydb/library/yql/parser/pg_wrapper/parser.cpp +++ b/ydb/library/yql/parser/pg_wrapper/parser.cpp @@ -208,11 +208,22 @@ void PGParse(const TString& input, IPGParseEvents& events) { }; if (parsetree_and_error.error) { - TPosition position(1, 1); - TTextWalker walker(position); - size_t distance = Min(size_t(parsetree_and_error.error->cursorpos), input.Size()); - for (size_t i = 0; i < distance; ++i) { - walker.Advance(input[i]); + TPosition position(0, 1); + // cursorpos is about codepoints, not bytes + TTextWalker walker(position, true); + auto cursorpos = parsetree_and_error.error->cursorpos; + size_t codepoints = 0; + if (cursorpos >= 0) { + for (size_t i = 0; i < input.Size(); ++i) { + if (codepoints == cursorpos) { + break; + } + + if (!TTextWalker::IsUtf8Intermediate(input[i])) { + ++codepoints; + } + walker.Advance(input[i]); + } } events.OnError(TIssue(position, "ERROR: " + TString(parsetree_and_error.error->message) + "\n")); diff --git a/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp b/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp index 1e394933dc..df79077ccf 100644 --- a/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp +++ b/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp @@ -28,7 +28,7 @@ const TStringBuf ExpectedSelect1 = "({RAWSTMT :stmt {SELECTSTMT :distinctClause const TString Error1 = "ERROR: syntax error at or near \"SELECT1\"\n"; -Y_UNIT_TEST_SUITE(TWrapperTests) { +Y_UNIT_TEST_SUITE(ParseTests) { Y_UNIT_TEST(TestOk) { TEvents events; PGParse(TString("SELECT 1"), events); @@ -47,11 +47,34 @@ Y_UNIT_TEST_SUITE(TWrapperTests) { UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 2); UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 3); } + + Y_UNIT_TEST(TestErrorPosUtf8) { + { + TEvents events; + PGParse(TString("/* привет */SELECT1"), events); + UNIT_ASSERT(!events.Result); + UNIT_ASSERT(events.Issue); + auto msg = events.Issue->GetMessage(); + UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 1); + UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 13); + } + + { + TEvents events; + PGParse(TString("/* привет */\n\nSELECT1"), events); + UNIT_ASSERT(!events.Result); + UNIT_ASSERT(events.Issue); + auto msg = events.Issue->GetMessage(); + UNIT_ASSERT_NO_DIFF(msg, Error1); + UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 3); + UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 1); + } + } } const ui32 threadsCount = 10; -Y_UNIT_TEST_SUITE(TMTWrapperTests) { +Y_UNIT_TEST_SUITE(MTParseTests) { Y_UNIT_TEST(TestOk) { TVector<THolder<TThread>> threads; for (ui32 i = 0; i < threadsCount; ++i) { diff --git a/ydb/library/yql/public/issue/yql_issue.cpp b/ydb/library/yql/public/issue/yql_issue.cpp index 4a176cc29b..ee77d51db3 100644 --- a/ydb/library/yql/public/issue/yql_issue.cpp +++ b/ydb/library/yql/public/issue/yql_issue.cpp @@ -53,13 +53,18 @@ TTextWalker& TTextWalker::Advance(char c) { return *this; } + ui32 charDistance = 1; + if (Utf8Aware && IsUtf8Intermediate(c)) { + charDistance = 0; + } + // either not '\r' or second '\r' if (LfCount) { Position.Row += LfCount; - Position.Column = 1; + Position.Column = charDistance; LfCount = 0; } else { - Position.Column += 1 + (HaveCr && c != '\r'); + Position.Column += charDistance + (HaveCr && c != '\r'); } HaveCr = (c == '\r'); return *this; diff --git a/ydb/library/yql/public/issue/yql_issue.h b/ydb/library/yql/public/issue/yql_issue.h index 4db21ead9e..8717a36a27 100644 --- a/ydb/library/yql/public/issue/yql_issue.h +++ b/ydb/library/yql/public/issue/yql_issue.h @@ -55,13 +55,18 @@ struct TPosition { class TTextWalker { public: - TTextWalker(TPosition& position) + TTextWalker(TPosition& position, bool utf8Aware = false) : Position(position) + , Utf8Aware(utf8Aware) , HaveCr(false) , LfCount(0) { } + static inline bool IsUtf8Intermediate(char c) { + return (c & 0xC0) == 0x80; + } + template<typename T> TTextWalker& Advance(const T& buf) { for (char c : buf) { @@ -74,6 +79,7 @@ public: private: TPosition& Position; + const bool Utf8Aware; bool HaveCr; ui32 LfCount; }; |