blob: 5bc198651d18cec16679d1ebafa5859f942cb3c9 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
#include <util/system/defaults.h>
#include <util/system/yassert.h>
#include <library/cpp/charset/codepage.h>
#include <util/generic/singleton.h>
#include <util/generic/yexception.h>
#include <library/cpp/charset/doccodes.h>
#include "pire.h"
namespace NPire {
namespace {
// A one-byte encoding which is capable of transforming upper half of the character
// table to/from Unicode chars.
class TOneByte: public TEncoding {
public:
TOneByte(ECharset doccode) {
Table_ = CodePageByCharset(doccode)->unicode;
for (size_t i = 0; i < 256; ++i)
Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
}
wchar32 FromLocal(const char*& begin, const char* end) const override {
if (begin != end)
return Table_[static_cast<unsigned char>(*begin++)];
else
ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
}
TString ToLocal(wchar32 c) const override {
THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
if (i != Reverse_.end())
return TString(1, i->second);
else
return TString();
}
void AppendDot(TFsm& fsm) const override {
fsm.AppendDot();
}
private:
const wchar32* Table_;
THashMap<wchar32, char> Reverse_;
};
template <unsigned N>
struct TOneByteHelper: public TOneByte {
inline TOneByteHelper()
: TOneByte((ECharset)N)
{
}
};
}
namespace NEncodings {
const NPire::TEncoding& Koi8r() {
return *Singleton<TOneByteHelper<CODES_KOI8>>();
}
const NPire::TEncoding& Cp1251() {
return *Singleton<TOneByteHelper<CODES_WIN>>();
}
const NPire::TEncoding& Get(ECharset encoding) {
switch (encoding) {
case CODES_WIN:
return Cp1251();
case CODES_KOI8:
return Koi8r();
case CODES_ASCII:
return NPire::NEncodings::Latin1();
case CODES_UTF8:
return NPire::NEncodings::Utf8();
default:
ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
}
}
}
}
|