aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/extraencodings.cpp
blob: 8645d6cd4f24a71fadc206ad566a3bd984bdc09b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <util/system/defaults.h>
#include <util/system/yassert.h>
#include <library/cpp/charset/codepage.h>
#include <util/generic/singleton.h>
#include <util/generic/yexception.h>
#include <library/cpp/charset/doccodes.h>

#include "pire.h"

namespace NPire {
    namespace { 
        // A one-byte encoding which is capable of transforming upper half of the character 
        // table to/from Unicode chars. 
        class TOneByte: public TEncoding { 
        public: 
            TOneByte(ECharset doccode) { 
                Table_ = CodePageByCharset(doccode)->unicode; 
                for (size_t i = 0; i < 256; ++i) 
                    Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); 
            } 

            wchar32 FromLocal(const char*& begin, const char* end) const override { 
                if (begin != end) 
                    return Table_[static_cast<unsigned char>(*begin++)]; 
                else 
                    ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; 
            } 

            TString ToLocal(wchar32 c) const override { 
                THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); 
                if (i != Reverse_.end()) 
                    return TString(1, i->second); 
                else 
                    return TString(); 
            } 

            void AppendDot(TFsm& fsm) const override { 
                fsm.AppendDot(); 
            } 

        private: 
            const wchar32* Table_; 
            THashMap<wchar32, char> Reverse_; 
        }; 

        template <unsigned N> 
        struct TOneByteHelper: public TOneByte { 
            inline TOneByteHelper() 
                : TOneByte((ECharset)N) 
            { 
            } 
        }; 
    } 

    namespace NEncodings { 
        const NPire::TEncoding& Koi8r() { 
            return *Singleton<TOneByteHelper<CODES_KOI8>>(); 
        } 

        const NPire::TEncoding& Cp1251() { 
            return *Singleton<TOneByteHelper<CODES_WIN>>(); 
        }

        const NPire::TEncoding& Get(ECharset encoding) { 
            switch (encoding) { 
                case CODES_WIN: 
                    return Cp1251(); 
                case CODES_KOI8: 
                    return Koi8r(); 
                case CODES_ASCII: 
                    return NPire::NEncodings::Latin1(); 
                case CODES_UTF8: 
                    return NPire::NEncodings::Utf8(); 
                default: 
                    ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; 
            } 
        } 

    }

}