diff options
author | Alexander Smirnov <alex@ydb.tech> | 2024-12-10 18:03:37 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-12-10 18:03:37 +0000 |
commit | d2954d4f1174129e3daa53998ff9f0507063d15d (patch) | |
tree | 26768147deb7964ec9008cfe507e89ec884114cd /library | |
parent | 15933e4b4c25961cecb34190ed5108907efaa4d5 (diff) | |
parent | 0ca47b802c1f53773f1b14b88f851e959d227e77 (diff) | |
download | ydb-d2954d4f1174129e3daa53998ff9f0507063d15d.tar.gz |
Merge branch 'rightlib' into mergelibs-241210-1802
Diffstat (limited to 'library')
-rw-r--r-- | library/cpp/string_utils/csv/csv.cpp | 28 | ||||
-rw-r--r-- | library/cpp/string_utils/csv/csv.h | 10 |
2 files changed, 24 insertions, 14 deletions
diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp index bca9a5d7f1..fd3d932fcd 100644 --- a/library/cpp/string_utils/csv/csv.cpp +++ b/library/cpp/string_utils/csv/csv.cpp @@ -4,8 +4,8 @@ TStringBuf NCsvFormat::CsvSplitter::Consume() { if (Begin == End) { return nullptr; } - TString::iterator TokenStart = Begin; - TString::iterator TokenEnd = Begin; + TString::const_iterator TokenStart = Begin; + TString::const_iterator TokenEnd = Begin; if (Quote == '\0') { while (1) { if (TokenEnd == End || *TokenEnd == Delimeter) { @@ -33,21 +33,29 @@ TStringBuf NCsvFormat::CsvSplitter::Consume() { } else if (*(TokenEnd + 1) == Delimeter) { Begin = TokenEnd + 1; } else if (*(TokenEnd + 1) == Quote) { - CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1))); + TempResultParts.push_back(TStringBuf(TokenStart, (TokenEnd + 1))); TokenEnd += 2; TokenStart = TokenEnd; continue; } else { Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark")); } - if (CustomStringBufs.size()) { - CustomString.clear(); - for (auto CustomStringBuf : CustomStringBufs) { - CustomString += TString{ CustomStringBuf }; + if (TempResultParts.size()) { + auto newEscapedStringPtr = std::make_unique<TString>(); + size_t newStringSize = 0; + for (auto tempResultPart : TempResultParts) { + newStringSize += tempResultPart.size(); } - CustomString += TString{ TStringBuf(TokenStart, TokenEnd) }; - CustomStringBufs.clear(); - return TStringBuf(CustomString); + newStringSize += TokenEnd - TokenStart; + newEscapedStringPtr->reserve(newStringSize); + for (auto tempResultPart : TempResultParts) { + *newEscapedStringPtr += TString{ tempResultPart }; + } + *newEscapedStringPtr += TString{ TStringBuf(TokenStart, TokenEnd) }; + TempResultParts.clear(); + // Storing built string so that returned TStringBuf won't change until this splitter is destroyed + TempResults.push_back(std::move(newEscapedStringPtr)); + return TStringBuf(*TempResults.back()); } else { return TStringBuf(TokenStart, TokenEnd); } diff --git a/library/cpp/string_utils/csv/csv.h b/library/cpp/string_utils/csv/csv.h index 8cb96e6bb9..53fdafab1c 100644 --- a/library/cpp/string_utils/csv/csv.h +++ b/library/cpp/string_utils/csv/csv.h @@ -5,6 +5,8 @@ #include <util/generic/vector.h> #include <util/stream/input.h> +#include <vector> + /* Split string by rfc4180 */ @@ -24,7 +26,7 @@ namespace NCsvFormat { class CsvSplitter { public: - CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"') + CsvSplitter(const TString& data, const char delimeter = ',', const char quote = '"') // quote = '\0' ignores quoting in values and words like simple split : Delimeter(delimeter) , Quote(quote) @@ -56,9 +58,9 @@ namespace NCsvFormat { private: const char Delimeter; const char Quote; - TString::iterator Begin; + TString::const_iterator Begin; const TString::const_iterator End; - TString CustomString; - TVector<TStringBuf> CustomStringBufs; + std::vector<std::unique_ptr<TString>> TempResults; // CsvSplitter lifetime + std::vector<TStringBuf> TempResultParts; // Single Consume() method call lifetime }; } |