aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/csv/csv.cpp
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2022-12-23 17:22:10 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2022-12-23 17:22:10 +0300
commitf6fd83a9bc9c2d6bb9502d3dd77adb4f5c31ceae (patch)
treecca463108bcf236b61344602b47f474ce42b1ff4 /library/cpp/string_utils/csv/csv.cpp
parentaf6d4963aeec3543ef895865a0a24fa2e039efa3 (diff)
downloadydb-f6fd83a9bc9c2d6bb9502d3dd77adb4f5c31ceae.tar.gz
parse line on deploy by option
Diffstat (limited to 'library/cpp/string_utils/csv/csv.cpp')
-rw-r--r--library/cpp/string_utils/csv/csv.cpp82
1 files changed, 82 insertions, 0 deletions
diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp
new file mode 100644
index 0000000000..218473c62c
--- /dev/null
+++ b/library/cpp/string_utils/csv/csv.cpp
@@ -0,0 +1,82 @@
+#include "csv.h"
+
+TStringBuf NCsvFormat::CsvSplitter::Consume() {
+ if (Begin == End) {
+ return nullptr;
+ }
+ TString::iterator TokenStart = Begin;
+ TString::iterator TokenEnd = Begin;
+ if (Quote == '\0') {
+ while (1) {
+ if (TokenEnd == End || *TokenEnd == Delimeter) {
+ Begin = TokenEnd;
+ return TStringBuf(TokenStart, TokenEnd);
+ }
+ ++TokenEnd;
+ }
+ } else {
+ bool Escape = false;
+ if (*Begin == Quote) {
+ Escape = true;
+ ++TokenStart;
+ ++TokenEnd;
+ Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something"));
+ }
+ while (1) {
+ if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) {
+ Begin = TokenEnd;
+ return TStringBuf(TokenStart, TokenEnd);
+ } else if (*TokenEnd == Quote) {
+ Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only"));
+ if (TokenEnd + 1 == End) {
+ Begin = TokenEnd + 1;
+ } else if (*(TokenEnd + 1) == Delimeter) {
+ Begin = TokenEnd + 1;
+ } else if (*(TokenEnd + 1) == Quote) {
+ CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
+ TokenEnd += 2;
+ TokenStart = TokenEnd;
+ continue;
+ } else {
+ Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark"));
+ }
+ if (CustomStringBufs.size()) {
+ CustomString.clear();
+ for (auto CustomStringBuf : CustomStringBufs) {
+ CustomString += TString{ CustomStringBuf };
+ }
+ CustomString += TString{ TStringBuf(TokenStart, TokenEnd) };
+ CustomStringBufs.clear();
+ return TStringBuf(CustomString);
+ } else {
+ return TStringBuf(TokenStart, TokenEnd);
+ }
+ }
+ ++TokenEnd;
+ }
+ }
+};
+
+TString NCsvFormat::TLinesSplitter::ConsumeLine() {
+ bool Escape = false;
+ TString result;
+ TString line;
+ while (Input.ReadLine(line)) {
+ for (auto it = line.begin(); it != line.end(); ++it) {
+ if (*it == Quote) {
+ Escape = !Escape;
+ }
+ }
+ if (!result) {
+ result = line;
+ } else {
+ result += line;
+ }
+ if (!Escape) {
+ break;
+ } else {
+ result += "\n";
+ }
+ }
+ return result;
+};