aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/extraencodings.cpp
blob: 2e507e4b67f3b1297510767071c011787642ce69 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <util/system/defaults.h>
#include <util/system/yassert.h>
#include <library/cpp/charset/codepage.h>
#include <util/generic/singleton.h>
#include <util/generic/yexception.h>
#include <library/cpp/charset/doccodes.h>

#include "pire.h"

namespace NPire {
    namespace {
        // A one-byte encoding which is capable of transforming upper half of the character
        // table to/from Unicode chars.
        class TOneByte: public TEncoding {
        public:
            TOneByte(ECharset doccode) {
                Table_ = CodePageByCharset(doccode)->unicode;
                for (size_t i = 0; i < 256; ++i)
                    Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
            }

            wchar32 FromLocal(const char*& begin, const char* end) const override {
                if (begin != end)
                    return Table_[static_cast<unsigned char>(*begin++)];
                else
                    ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
            }

            TString ToLocal(wchar32 c) const override {
                THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
                if (i != Reverse_.end())
                    return TString(1, i->second);
                else
                    return TString();
            }

            void AppendDot(TFsm& fsm) const override {
                fsm.AppendDot();
            }

        private:
            const wchar32* Table_;
            THashMap<wchar32, char> Reverse_;
        };

        template <unsigned N>
        struct TOneByteHelper: public TOneByte {
            inline TOneByteHelper()
                : TOneByte((ECharset)N)
            {
            }
        };
    }

    namespace NEncodings {
        const NPire::TEncoding& Koi8r() {
            return *Singleton<TOneByteHelper<CODES_KOI8>>();
        }

        const NPire::TEncoding& Cp1251() {
            return *Singleton<TOneByteHelper<CODES_WIN>>();
        }

        const NPire::TEncoding& Get(ECharset encoding) {
            switch (encoding) {
                case CODES_WIN:
                    return Cp1251();
                case CODES_KOI8:
                    return Koi8r();
                case CODES_ASCII:
                    return NPire::NEncodings::Latin1();
                case CODES_UTF8:
                    return NPire::NEncodings::Utf8();
                default:
                    ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
            }
        }

    }

}