diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/regex/pire/extraencodings.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/regex/pire/extraencodings.cpp')
-rw-r--r-- | library/cpp/regex/pire/extraencodings.cpp | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp new file mode 100644 index 0000000000..2e507e4b67 --- /dev/null +++ b/library/cpp/regex/pire/extraencodings.cpp @@ -0,0 +1,81 @@ +#include <util/system/defaults.h> +#include <util/system/yassert.h> +#include <library/cpp/charset/codepage.h> +#include <util/generic/singleton.h> +#include <util/generic/yexception.h> +#include <library/cpp/charset/doccodes.h> + +#include "pire.h" + +namespace NPire { + namespace { + // A one-byte encoding which is capable of transforming upper half of the character + // table to/from Unicode chars. + class TOneByte: public TEncoding { + public: + TOneByte(ECharset doccode) { + Table_ = CodePageByCharset(doccode)->unicode; + for (size_t i = 0; i < 256; ++i) + Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); + } + + wchar32 FromLocal(const char*& begin, const char* end) const override { + if (begin != end) + return Table_[static_cast<unsigned char>(*begin++)]; + else + ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; + } + + TString ToLocal(wchar32 c) const override { + THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); + if (i != Reverse_.end()) + return TString(1, i->second); + else + return TString(); + } + + void AppendDot(TFsm& fsm) const override { + fsm.AppendDot(); + } + + private: + const wchar32* Table_; + THashMap<wchar32, char> Reverse_; + }; + + template <unsigned N> + struct TOneByteHelper: public TOneByte { + inline TOneByteHelper() + : TOneByte((ECharset)N) + { + } + }; + } + + namespace NEncodings { + const NPire::TEncoding& Koi8r() { + return *Singleton<TOneByteHelper<CODES_KOI8>>(); + } + + const NPire::TEncoding& Cp1251() { + return *Singleton<TOneByteHelper<CODES_WIN>>(); + } + + const NPire::TEncoding& Get(ECharset encoding) { + switch (encoding) { + case CODES_WIN: + return Cp1251(); + case CODES_KOI8: + return Koi8r(); + case CODES_ASCII: + return NPire::NEncodings::Latin1(); + case CODES_UTF8: + return NPire::NEncodings::Utf8(); + default: + ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; + } + } + + } + +} |