aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/extraencodings.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/regex/pire/extraencodings.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/regex/pire/extraencodings.cpp')
-rw-r--r--library/cpp/regex/pire/extraencodings.cpp81
1 files changed, 81 insertions, 0 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp
new file mode 100644
index 0000000000..2e507e4b67
--- /dev/null
+++ b/library/cpp/regex/pire/extraencodings.cpp
@@ -0,0 +1,81 @@
+#include <util/system/defaults.h>
+#include <util/system/yassert.h>
+#include <library/cpp/charset/codepage.h>
+#include <util/generic/singleton.h>
+#include <util/generic/yexception.h>
+#include <library/cpp/charset/doccodes.h>
+
+#include "pire.h"
+
+namespace NPire {
+ namespace {
+ // A one-byte encoding which is capable of transforming upper half of the character
+ // table to/from Unicode chars.
+ class TOneByte: public TEncoding {
+ public:
+ TOneByte(ECharset doccode) {
+ Table_ = CodePageByCharset(doccode)->unicode;
+ for (size_t i = 0; i < 256; ++i)
+ Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
+ }
+
+ wchar32 FromLocal(const char*& begin, const char* end) const override {
+ if (begin != end)
+ return Table_[static_cast<unsigned char>(*begin++)];
+ else
+ ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
+ }
+
+ TString ToLocal(wchar32 c) const override {
+ THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
+ if (i != Reverse_.end())
+ return TString(1, i->second);
+ else
+ return TString();
+ }
+
+ void AppendDot(TFsm& fsm) const override {
+ fsm.AppendDot();
+ }
+
+ private:
+ const wchar32* Table_;
+ THashMap<wchar32, char> Reverse_;
+ };
+
+ template <unsigned N>
+ struct TOneByteHelper: public TOneByte {
+ inline TOneByteHelper()
+ : TOneByte((ECharset)N)
+ {
+ }
+ };
+ }
+
+ namespace NEncodings {
+ const NPire::TEncoding& Koi8r() {
+ return *Singleton<TOneByteHelper<CODES_KOI8>>();
+ }
+
+ const NPire::TEncoding& Cp1251() {
+ return *Singleton<TOneByteHelper<CODES_WIN>>();
+ }
+
+ const NPire::TEncoding& Get(ECharset encoding) {
+ switch (encoding) {
+ case CODES_WIN:
+ return Cp1251();
+ case CODES_KOI8:
+ return Koi8r();
+ case CODES_ASCII:
+ return NPire::NEncodings::Latin1();
+ case CODES_UTF8:
+ return NPire::NEncodings::Utf8();
+ default:
+ ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
+ }
+ }
+
+ }
+
+}