aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Stoyan <vvvv@ydb.tech>2024-09-13 20:50:36 +0300
committerGitHub <noreply@github.com>2024-09-13 20:50:36 +0300
commitf405ee4e9988846158a7efde1ee0b49aefe2d5c8 (patch)
treee4d769053087754a120448afb78891e3a08c7d62
parentfc767cd402a44ab11375cd8ec7f0b235d3e90f36 (diff)
downloadydb-f405ee4e9988846158a7efde1ee0b49aefe2d5c8.tar.gz
Fixed PG error positions (#9229)
-rw-r--r--ydb/library/yql/parser/pg_wrapper/parser.cpp21
-rw-r--r--ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp27
-rw-r--r--ydb/library/yql/public/issue/yql_issue.cpp9
-rw-r--r--ydb/library/yql/public/issue/yql_issue.h8
4 files changed, 55 insertions, 10 deletions
diff --git a/ydb/library/yql/parser/pg_wrapper/parser.cpp b/ydb/library/yql/parser/pg_wrapper/parser.cpp
index c5f1a61953..d0159fcdd1 100644
--- a/ydb/library/yql/parser/pg_wrapper/parser.cpp
+++ b/ydb/library/yql/parser/pg_wrapper/parser.cpp
@@ -208,11 +208,22 @@ void PGParse(const TString& input, IPGParseEvents& events) {
};
if (parsetree_and_error.error) {
- TPosition position(1, 1);
- TTextWalker walker(position);
- size_t distance = Min(size_t(parsetree_and_error.error->cursorpos), input.Size());
- for (size_t i = 0; i < distance; ++i) {
- walker.Advance(input[i]);
+ TPosition position(0, 1);
+ // cursorpos is about codepoints, not bytes
+ TTextWalker walker(position, true);
+ auto cursorpos = parsetree_and_error.error->cursorpos;
+ size_t codepoints = 0;
+ if (cursorpos >= 0) {
+ for (size_t i = 0; i < input.Size(); ++i) {
+ if (codepoints == cursorpos) {
+ break;
+ }
+
+ if (!TTextWalker::IsUtf8Intermediate(input[i])) {
+ ++codepoints;
+ }
+ walker.Advance(input[i]);
+ }
}
events.OnError(TIssue(position, "ERROR: " + TString(parsetree_and_error.error->message) + "\n"));
diff --git a/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp b/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp
index 1e394933dc..df79077ccf 100644
--- a/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp
+++ b/ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp
@@ -28,7 +28,7 @@ const TStringBuf ExpectedSelect1 = "({RAWSTMT :stmt {SELECTSTMT :distinctClause
const TString Error1 = "ERROR: syntax error at or near \"SELECT1\"\n";
-Y_UNIT_TEST_SUITE(TWrapperTests) {
+Y_UNIT_TEST_SUITE(ParseTests) {
Y_UNIT_TEST(TestOk) {
TEvents events;
PGParse(TString("SELECT 1"), events);
@@ -47,11 +47,34 @@ Y_UNIT_TEST_SUITE(TWrapperTests) {
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 2);
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 3);
}
+
+ Y_UNIT_TEST(TestErrorPosUtf8) {
+ {
+ TEvents events;
+ PGParse(TString("/* привет */SELECT1"), events);
+ UNIT_ASSERT(!events.Result);
+ UNIT_ASSERT(events.Issue);
+ auto msg = events.Issue->GetMessage();
+ UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 1);
+ UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 13);
+ }
+
+ {
+ TEvents events;
+ PGParse(TString("/* привет */\n\nSELECT1"), events);
+ UNIT_ASSERT(!events.Result);
+ UNIT_ASSERT(events.Issue);
+ auto msg = events.Issue->GetMessage();
+ UNIT_ASSERT_NO_DIFF(msg, Error1);
+ UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 3);
+ UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 1);
+ }
+ }
}
const ui32 threadsCount = 10;
-Y_UNIT_TEST_SUITE(TMTWrapperTests) {
+Y_UNIT_TEST_SUITE(MTParseTests) {
Y_UNIT_TEST(TestOk) {
TVector<THolder<TThread>> threads;
for (ui32 i = 0; i < threadsCount; ++i) {
diff --git a/ydb/library/yql/public/issue/yql_issue.cpp b/ydb/library/yql/public/issue/yql_issue.cpp
index 4a176cc29b..ee77d51db3 100644
--- a/ydb/library/yql/public/issue/yql_issue.cpp
+++ b/ydb/library/yql/public/issue/yql_issue.cpp
@@ -53,13 +53,18 @@ TTextWalker& TTextWalker::Advance(char c) {
return *this;
}
+ ui32 charDistance = 1;
+ if (Utf8Aware && IsUtf8Intermediate(c)) {
+ charDistance = 0;
+ }
+
// either not '\r' or second '\r'
if (LfCount) {
Position.Row += LfCount;
- Position.Column = 1;
+ Position.Column = charDistance;
LfCount = 0;
} else {
- Position.Column += 1 + (HaveCr && c != '\r');
+ Position.Column += charDistance + (HaveCr && c != '\r');
}
HaveCr = (c == '\r');
return *this;
diff --git a/ydb/library/yql/public/issue/yql_issue.h b/ydb/library/yql/public/issue/yql_issue.h
index 4db21ead9e..8717a36a27 100644
--- a/ydb/library/yql/public/issue/yql_issue.h
+++ b/ydb/library/yql/public/issue/yql_issue.h
@@ -55,13 +55,18 @@ struct TPosition {
class TTextWalker {
public:
- TTextWalker(TPosition& position)
+ TTextWalker(TPosition& position, bool utf8Aware = false)
: Position(position)
+ , Utf8Aware(utf8Aware)
, HaveCr(false)
, LfCount(0)
{
}
+ static inline bool IsUtf8Intermediate(char c) {
+ return (c & 0xC0) == 0x80;
+ }
+
template<typename T>
TTextWalker& Advance(const T& buf) {
for (char c : buf) {
@@ -74,6 +79,7 @@ public:
private:
TPosition& Position;
+ const bool Utf8Aware;
bool HaveCr;
ui32 LfCount;
};