aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/csv
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2022-12-23 17:22:10 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2022-12-23 17:22:10 +0300
commitf6fd83a9bc9c2d6bb9502d3dd77adb4f5c31ceae (patch)
treecca463108bcf236b61344602b47f474ce42b1ff4 /library/cpp/string_utils/csv
parentaf6d4963aeec3543ef895865a0a24fa2e039efa3 (diff)
downloadydb-f6fd83a9bc9c2d6bb9502d3dd77adb4f5c31ceae.tar.gz
parse line on deploy by option
Diffstat (limited to 'library/cpp/string_utils/csv')
-rw-r--r--library/cpp/string_utils/csv/CMakeLists.darwin.txt17
-rw-r--r--library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt18
-rw-r--r--library/cpp/string_utils/csv/CMakeLists.linux.txt18
-rw-r--r--library/cpp/string_utils/csv/CMakeLists.txt15
-rw-r--r--library/cpp/string_utils/csv/csv.cpp82
-rw-r--r--library/cpp/string_utils/csv/csv.h64
6 files changed, 214 insertions, 0 deletions
diff --git a/library/cpp/string_utils/csv/CMakeLists.darwin.txt b/library/cpp/string_utils/csv/CMakeLists.darwin.txt
new file mode 100644
index 0000000000..7dffad3566
--- /dev/null
+++ b/library/cpp/string_utils/csv/CMakeLists.darwin.txt
@@ -0,0 +1,17 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)
diff --git a/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt b/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..1941d5ba17
--- /dev/null
+++ b/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,18 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)
diff --git a/library/cpp/string_utils/csv/CMakeLists.linux.txt b/library/cpp/string_utils/csv/CMakeLists.linux.txt
new file mode 100644
index 0000000000..1941d5ba17
--- /dev/null
+++ b/library/cpp/string_utils/csv/CMakeLists.linux.txt
@@ -0,0 +1,18 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)
diff --git a/library/cpp/string_utils/csv/CMakeLists.txt b/library/cpp/string_utils/csv/CMakeLists.txt
new file mode 100644
index 0000000000..3e0811fb22
--- /dev/null
+++ b/library/cpp/string_utils/csv/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp
new file mode 100644
index 0000000000..218473c62c
--- /dev/null
+++ b/library/cpp/string_utils/csv/csv.cpp
@@ -0,0 +1,82 @@
+#include "csv.h"
+
+TStringBuf NCsvFormat::CsvSplitter::Consume() {
+ if (Begin == End) {
+ return nullptr;
+ }
+ TString::iterator TokenStart = Begin;
+ TString::iterator TokenEnd = Begin;
+ if (Quote == '\0') {
+ while (1) {
+ if (TokenEnd == End || *TokenEnd == Delimeter) {
+ Begin = TokenEnd;
+ return TStringBuf(TokenStart, TokenEnd);
+ }
+ ++TokenEnd;
+ }
+ } else {
+ bool Escape = false;
+ if (*Begin == Quote) {
+ Escape = true;
+ ++TokenStart;
+ ++TokenEnd;
+ Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something"));
+ }
+ while (1) {
+ if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) {
+ Begin = TokenEnd;
+ return TStringBuf(TokenStart, TokenEnd);
+ } else if (*TokenEnd == Quote) {
+ Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only"));
+ if (TokenEnd + 1 == End) {
+ Begin = TokenEnd + 1;
+ } else if (*(TokenEnd + 1) == Delimeter) {
+ Begin = TokenEnd + 1;
+ } else if (*(TokenEnd + 1) == Quote) {
+ CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
+ TokenEnd += 2;
+ TokenStart = TokenEnd;
+ continue;
+ } else {
+ Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark"));
+ }
+ if (CustomStringBufs.size()) {
+ CustomString.clear();
+ for (auto CustomStringBuf : CustomStringBufs) {
+ CustomString += TString{ CustomStringBuf };
+ }
+ CustomString += TString{ TStringBuf(TokenStart, TokenEnd) };
+ CustomStringBufs.clear();
+ return TStringBuf(CustomString);
+ } else {
+ return TStringBuf(TokenStart, TokenEnd);
+ }
+ }
+ ++TokenEnd;
+ }
+ }
+};
+
+TString NCsvFormat::TLinesSplitter::ConsumeLine() {
+ bool Escape = false;
+ TString result;
+ TString line;
+ while (Input.ReadLine(line)) {
+ for (auto it = line.begin(); it != line.end(); ++it) {
+ if (*it == Quote) {
+ Escape = !Escape;
+ }
+ }
+ if (!result) {
+ result = line;
+ } else {
+ result += line;
+ }
+ if (!Escape) {
+ break;
+ } else {
+ result += "\n";
+ }
+ }
+ return result;
+};
diff --git a/library/cpp/string_utils/csv/csv.h b/library/cpp/string_utils/csv/csv.h
new file mode 100644
index 0000000000..8cb96e6bb9
--- /dev/null
+++ b/library/cpp/string_utils/csv/csv.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/stream/input.h>
+
+/*
+ Split string by rfc4180
+*/
+
+namespace NCsvFormat {
+ class TLinesSplitter {
+ private:
+ IInputStream& Input;
+ const char Quote;
+ public:
+ TLinesSplitter(IInputStream& input, const char quote = '"')
+ : Input(input)
+ , Quote(quote) {
+ }
+ TString ConsumeLine();
+ };
+
+ class CsvSplitter {
+ public:
+ CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"')
+ // quote = '\0' ignores quoting in values and words like simple split
+ : Delimeter(delimeter)
+ , Quote(quote)
+ , Begin(data.begin())
+ , End(data.end())
+ {
+ }
+
+ bool Step() {
+ if (Begin == End) {
+ return false;
+ }
+ ++Begin;
+ return true;
+ }
+
+ TStringBuf Consume();
+ explicit operator TVector<TString>() {
+ TVector<TString> ret;
+
+ do {
+ TStringBuf buf = Consume();
+ ret.push_back(TString{buf});
+ } while (Step());
+
+ return ret;
+ }
+
+ private:
+ const char Delimeter;
+ const char Quote;
+ TString::iterator Begin;
+ const TString::const_iterator End;
+ TString CustomString;
+ TVector<TStringBuf> CustomStringBufs;
+ };
+}