diff options
| author | ivanmorozov <[email protected]> | 2022-12-23 17:22:10 +0300 | 
|---|---|---|
| committer | ivanmorozov <[email protected]> | 2022-12-23 17:22:10 +0300 | 
| commit | f6fd83a9bc9c2d6bb9502d3dd77adb4f5c31ceae (patch) | |
| tree | cca463108bcf236b61344602b47f474ce42b1ff4 /library/cpp/string_utils/csv | |
| parent | af6d4963aeec3543ef895865a0a24fa2e039efa3 (diff) | |
parse line on deploy by option
Diffstat (limited to 'library/cpp/string_utils/csv')
| -rw-r--r-- | library/cpp/string_utils/csv/CMakeLists.darwin.txt | 17 | ||||
| -rw-r--r-- | library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt | 18 | ||||
| -rw-r--r-- | library/cpp/string_utils/csv/CMakeLists.linux.txt | 18 | ||||
| -rw-r--r-- | library/cpp/string_utils/csv/CMakeLists.txt | 15 | ||||
| -rw-r--r-- | library/cpp/string_utils/csv/csv.cpp | 82 | ||||
| -rw-r--r-- | library/cpp/string_utils/csv/csv.h | 64 | 
6 files changed, 214 insertions, 0 deletions
diff --git a/library/cpp/string_utils/csv/CMakeLists.darwin.txt b/library/cpp/string_utils/csv/CMakeLists.darwin.txt new file mode 100644 index 00000000000..7dffad35661 --- /dev/null +++ b/library/cpp/string_utils/csv/CMakeLists.darwin.txt @@ -0,0 +1,17 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-string_utils-csv) +target_link_libraries(cpp-string_utils-csv PUBLIC +  contrib-libs-cxxsupp +  yutil +) +target_sources(cpp-string_utils-csv PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp +) diff --git a/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt b/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..1941d5ba178 --- /dev/null +++ b/library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt @@ -0,0 +1,18 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-string_utils-csv) +target_link_libraries(cpp-string_utils-csv PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +) +target_sources(cpp-string_utils-csv PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp +) diff --git a/library/cpp/string_utils/csv/CMakeLists.linux.txt b/library/cpp/string_utils/csv/CMakeLists.linux.txt new file mode 100644 index 00000000000..1941d5ba178 --- /dev/null +++ b/library/cpp/string_utils/csv/CMakeLists.linux.txt @@ -0,0 +1,18 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-string_utils-csv) +target_link_libraries(cpp-string_utils-csv PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +) +target_sources(cpp-string_utils-csv PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp +) diff --git a/library/cpp/string_utils/csv/CMakeLists.txt b/library/cpp/string_utils/csv/CMakeLists.txt new file mode 100644 index 00000000000..3e0811fb22e --- /dev/null +++ b/library/cpp/string_utils/csv/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) +  include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) +  include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) +  include(CMakeLists.linux.txt) +endif() diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp new file mode 100644 index 00000000000..218473c62c4 --- /dev/null +++ b/library/cpp/string_utils/csv/csv.cpp @@ -0,0 +1,82 @@ +#include "csv.h" + +TStringBuf NCsvFormat::CsvSplitter::Consume() { +    if (Begin == End) { +        return nullptr; +    } +    TString::iterator TokenStart = Begin; +    TString::iterator TokenEnd = Begin; +    if (Quote == '\0') { +        while (1) { +            if (TokenEnd == End || *TokenEnd == Delimeter) { +                Begin = TokenEnd; +                return TStringBuf(TokenStart, TokenEnd); +            } +            ++TokenEnd; +        } +    } else { +        bool Escape = false; +        if (*Begin == Quote) { +            Escape = true; +            ++TokenStart; +            ++TokenEnd; +            Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something")); +        } +        while (1) { +            if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) { +                Begin = TokenEnd; +                return TStringBuf(TokenStart, TokenEnd); +            } else if (*TokenEnd == Quote) { +                Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only")); +                if (TokenEnd + 1 == End) { +                    Begin = TokenEnd + 1; +                } else if (*(TokenEnd + 1) == Delimeter) { +                    Begin = TokenEnd + 1; +                } else if (*(TokenEnd + 1) == Quote) { +                    CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1))); +                    TokenEnd += 2; +                    TokenStart = TokenEnd; +                    continue; +                } else { +                    Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark")); +                } +                if (CustomStringBufs.size()) { +                    CustomString.clear(); +                    for (auto CustomStringBuf : CustomStringBufs) { +                        CustomString += TString{ CustomStringBuf }; +                    } +                    CustomString += TString{ TStringBuf(TokenStart, TokenEnd) }; +                    CustomStringBufs.clear(); +                    return TStringBuf(CustomString); +                } else { +                    return TStringBuf(TokenStart, TokenEnd); +                } +            } +            ++TokenEnd; +        } +    } +}; + +TString NCsvFormat::TLinesSplitter::ConsumeLine() { +    bool Escape = false; +    TString result; +    TString line; +    while (Input.ReadLine(line)) { +        for (auto it = line.begin(); it != line.end(); ++it) { +            if (*it == Quote) { +                Escape = !Escape; +            } +        } +        if (!result) { +            result = line; +        } else { +            result += line; +        } +        if (!Escape) { +            break; +        } else { +            result += "\n"; +        } +    } +    return result; +}; diff --git a/library/cpp/string_utils/csv/csv.h b/library/cpp/string_utils/csv/csv.h new file mode 100644 index 00000000000..8cb96e6bb92 --- /dev/null +++ b/library/cpp/string_utils/csv/csv.h @@ -0,0 +1,64 @@ +#pragma once + +#include <util/generic/yexception.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> + +/* +    Split string by rfc4180 +*/ + +namespace NCsvFormat { +    class TLinesSplitter { +    private: +        IInputStream& Input; +        const char Quote; +    public: +        TLinesSplitter(IInputStream& input, const char quote = '"') +            : Input(input) +            , Quote(quote) { +        } +        TString ConsumeLine(); +    }; + +    class CsvSplitter { +    public: +        CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"') +        // quote = '\0' ignores quoting in values and words like simple split +            : Delimeter(delimeter) +            , Quote(quote) +            , Begin(data.begin()) +            , End(data.end()) +        { +        } + +        bool Step() { +            if (Begin == End) { +                return false; +            } +            ++Begin; +            return true; +        } + +        TStringBuf Consume(); +        explicit operator TVector<TString>() { +            TVector<TString> ret; + +            do { +                TStringBuf buf = Consume(); +                ret.push_back(TString{buf}); +            } while (Step()); + +            return ret; +        } + +    private: +        const char Delimeter; +        const char Quote; +        TString::iterator Begin; +        const TString::const_iterator End; +        TString CustomString; +        TVector<TStringBuf> CustomStringBufs; +    }; +}  | 
