aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/wordlistreader/wordlistreader.h
diff options
context:
space:
mode:
authorqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
committerqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
commit22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
treebffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/wordlistreader/wordlistreader.h
parent332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
downloadydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
validate canons without yatest_common
Diffstat (limited to 'library/cpp/wordlistreader/wordlistreader.h')
-rw-r--r--library/cpp/wordlistreader/wordlistreader.h43
1 files changed, 43 insertions, 0 deletions
diff --git a/library/cpp/wordlistreader/wordlistreader.h b/library/cpp/wordlistreader/wordlistreader.h
new file mode 100644
index 0000000000..03abe78fe7
--- /dev/null
+++ b/library/cpp/wordlistreader/wordlistreader.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <library/cpp/charset/codepage.h>
+#include <util/stream/output.h>
+#include <util/stream/file.h>
+
+#include <library/cpp/langmask/langmask.h>
+
+// Mix-in class for loading configuration files built of language sections. Handles version, encoding,
+// comments, and language section switching; delegates actual processing to derived classes
+// via ParseLine() function (pure virtual).
+
+class TWordListReader {
+private:
+ ELanguage LangCode;
+ ECharset Encoding;
+ int Version;
+ bool SkippingByError;
+
+public:
+ TWordListReader()
+ : LangCode(LANG_UNK)
+ , Encoding(CODES_YANDEX)
+ , Version(0)
+ , SkippingByError(false)
+ {
+ }
+ virtual ~TWordListReader() {
+ }
+
+protected:
+ virtual void ParseLine(const TUtf16String& line, ELanguage langcode, int version) = 0;
+
+ void ReadDataFile(const char* filename) {
+ TBuffered<TUnbufferedFileInput> src(4096, filename);
+ ReadDataFile(src);
+ }
+ void ReadDataFile(IInputStream& src);
+
+private:
+ void ProcessLine(const TString& line);
+};