diff options
author | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
---|---|---|
committer | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
commit | 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch) | |
tree | bffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/wordlistreader/wordlistreader.h | |
parent | 332b99e2173f0425444abb759eebcb2fafaa9209 (diff) | |
download | ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz |
validate canons without yatest_common
Diffstat (limited to 'library/cpp/wordlistreader/wordlistreader.h')
-rw-r--r-- | library/cpp/wordlistreader/wordlistreader.h | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/library/cpp/wordlistreader/wordlistreader.h b/library/cpp/wordlistreader/wordlistreader.h new file mode 100644 index 0000000000..03abe78fe7 --- /dev/null +++ b/library/cpp/wordlistreader/wordlistreader.h @@ -0,0 +1,43 @@ +#pragma once + +#include <util/generic/string.h> +#include <library/cpp/charset/codepage.h> +#include <util/stream/output.h> +#include <util/stream/file.h> + +#include <library/cpp/langmask/langmask.h> + +// Mix-in class for loading configuration files built of language sections. Handles version, encoding, +// comments, and language section switching; delegates actual processing to derived classes +// via ParseLine() function (pure virtual). + +class TWordListReader { +private: + ELanguage LangCode; + ECharset Encoding; + int Version; + bool SkippingByError; + +public: + TWordListReader() + : LangCode(LANG_UNK) + , Encoding(CODES_YANDEX) + , Version(0) + , SkippingByError(false) + { + } + virtual ~TWordListReader() { + } + +protected: + virtual void ParseLine(const TUtf16String& line, ELanguage langcode, int version) = 0; + + void ReadDataFile(const char* filename) { + TBuffered<TUnbufferedFileInput> src(4096, filename); + ReadDataFile(src); + } + void ReadDataFile(IInputStream& src); + +private: + void ProcessLine(const TString& line); +}; |