aboutsummaryrefslogtreecommitdiffstats
path: root/library
diff options
context:
space:
mode:
authorAlexander Smirnov <alex@ydb.tech>2024-12-10 18:03:37 +0000
committerAlexander Smirnov <alex@ydb.tech>2024-12-10 18:03:37 +0000
commitd2954d4f1174129e3daa53998ff9f0507063d15d (patch)
tree26768147deb7964ec9008cfe507e89ec884114cd /library
parent15933e4b4c25961cecb34190ed5108907efaa4d5 (diff)
parent0ca47b802c1f53773f1b14b88f851e959d227e77 (diff)
downloadydb-d2954d4f1174129e3daa53998ff9f0507063d15d.tar.gz
Merge branch 'rightlib' into mergelibs-241210-1802
Diffstat (limited to 'library')
-rw-r--r--library/cpp/string_utils/csv/csv.cpp28
-rw-r--r--library/cpp/string_utils/csv/csv.h10
2 files changed, 24 insertions, 14 deletions
diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp
index bca9a5d7f1..fd3d932fcd 100644
--- a/library/cpp/string_utils/csv/csv.cpp
+++ b/library/cpp/string_utils/csv/csv.cpp
@@ -4,8 +4,8 @@ TStringBuf NCsvFormat::CsvSplitter::Consume() {
if (Begin == End) {
return nullptr;
}
- TString::iterator TokenStart = Begin;
- TString::iterator TokenEnd = Begin;
+ TString::const_iterator TokenStart = Begin;
+ TString::const_iterator TokenEnd = Begin;
if (Quote == '\0') {
while (1) {
if (TokenEnd == End || *TokenEnd == Delimeter) {
@@ -33,21 +33,29 @@ TStringBuf NCsvFormat::CsvSplitter::Consume() {
} else if (*(TokenEnd + 1) == Delimeter) {
Begin = TokenEnd + 1;
} else if (*(TokenEnd + 1) == Quote) {
- CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
+ TempResultParts.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
TokenEnd += 2;
TokenStart = TokenEnd;
continue;
} else {
Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark"));
}
- if (CustomStringBufs.size()) {
- CustomString.clear();
- for (auto CustomStringBuf : CustomStringBufs) {
- CustomString += TString{ CustomStringBuf };
+ if (TempResultParts.size()) {
+ auto newEscapedStringPtr = std::make_unique<TString>();
+ size_t newStringSize = 0;
+ for (auto tempResultPart : TempResultParts) {
+ newStringSize += tempResultPart.size();
}
- CustomString += TString{ TStringBuf(TokenStart, TokenEnd) };
- CustomStringBufs.clear();
- return TStringBuf(CustomString);
+ newStringSize += TokenEnd - TokenStart;
+ newEscapedStringPtr->reserve(newStringSize);
+ for (auto tempResultPart : TempResultParts) {
+ *newEscapedStringPtr += TString{ tempResultPart };
+ }
+ *newEscapedStringPtr += TString{ TStringBuf(TokenStart, TokenEnd) };
+ TempResultParts.clear();
+ // Storing built string so that returned TStringBuf won't change until this splitter is destroyed
+ TempResults.push_back(std::move(newEscapedStringPtr));
+ return TStringBuf(*TempResults.back());
} else {
return TStringBuf(TokenStart, TokenEnd);
}
diff --git a/library/cpp/string_utils/csv/csv.h b/library/cpp/string_utils/csv/csv.h
index 8cb96e6bb9..53fdafab1c 100644
--- a/library/cpp/string_utils/csv/csv.h
+++ b/library/cpp/string_utils/csv/csv.h
@@ -5,6 +5,8 @@
#include <util/generic/vector.h>
#include <util/stream/input.h>
+#include <vector>
+
/*
Split string by rfc4180
*/
@@ -24,7 +26,7 @@ namespace NCsvFormat {
class CsvSplitter {
public:
- CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"')
+ CsvSplitter(const TString& data, const char delimeter = ',', const char quote = '"')
// quote = '\0' ignores quoting in values and words like simple split
: Delimeter(delimeter)
, Quote(quote)
@@ -56,9 +58,9 @@ namespace NCsvFormat {
private:
const char Delimeter;
const char Quote;
- TString::iterator Begin;
+ TString::const_iterator Begin;
const TString::const_iterator End;
- TString CustomString;
- TVector<TStringBuf> CustomStringBufs;
+ std::vector<std::unique_ptr<TString>> TempResults; // CsvSplitter lifetime
+ std::vector<TStringBuf> TempResultParts; // Single Consume() method call lifetime
};
}