aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/charset/codepage_ut.cpp
diff options
context:
space:
mode:
authorgrig <grig@yandex-team.ru>2022-02-10 16:50:24 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:24 +0300
commitbeb63ece3a6872dfbe113104f524ab6fdbec0adc (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/charset/codepage_ut.cpp
parentda383a4f674027527827ad076134241fc5da0cbf (diff)
downloadydb-beb63ece3a6872dfbe113104f524ab6fdbec0adc.tar.gz
Restoring authorship annotation for <grig@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/charset/codepage_ut.cpp')
-rw-r--r--library/cpp/charset/codepage_ut.cpp204
1 files changed, 102 insertions, 102 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp
index f854d338ea..c3ac3ac478 100644
--- a/library/cpp/charset/codepage_ut.cpp
+++ b/library/cpp/charset/codepage_ut.cpp
@@ -6,7 +6,7 @@
#include <util/charset/utf8.h>
#include <util/system/yassert.h>
-
+
#if defined(_MSC_VER)
#pragma warning(disable : 4309) /*truncation of constant value*/
#endif
@@ -30,8 +30,8 @@ namespace {
}
class TCodepageTest: public TTestBase {
-private:
- UNIT_TEST_SUITE(TCodepageTest);
+private:
+ UNIT_TEST_SUITE(TCodepageTest);
UNIT_TEST(TestUTF);
UNIT_TEST(TestUTFFromUnknownPlane);
UNIT_TEST(TestBrokenMultibyte);
@@ -42,14 +42,14 @@ private:
UNIT_TEST(TestUpperLower);
UNIT_TEST(TestBrokenRune);
UNIT_TEST(TestCanEncode);
- UNIT_TEST_SUITE_END();
-
-public:
- void TestUTF();
- void TestUTFFromUnknownPlane();
+ UNIT_TEST_SUITE_END();
+
+public:
+ void TestUTF();
+ void TestUTFFromUnknownPlane();
void TestBrokenMultibyte();
void TestSurrogatePairs();
- void TestEncodingHints();
+ void TestEncodingHints();
void TestToLower();
void TestToUpper();
@@ -71,55 +71,55 @@ public:
void TestBrokenRune() {
UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu);
}
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TCodepageTest);
-
-void TCodepageTest::TestUTF() {
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TCodepageTest);
+
+void TCodepageTest::TestUTF() {
for (wchar32 i = 0; i <= 0x10FFFF; i++) {
- unsigned char buffer[32];
+ unsigned char buffer[32];
Zero(buffer);
- size_t rune_len;
- size_t ref_len = 0;
-
- if (i < 0x80)
- ref_len = 1;
- else if (i < 0x800)
- ref_len = 2;
- else if (i < 0x10000)
- ref_len = 3;
- else
- ref_len = 4;
-
+ size_t rune_len;
+ size_t ref_len = 0;
+
+ if (i < 0x80)
+ ref_len = 1;
+ else if (i < 0x800)
+ ref_len = 2;
+ else if (i < 0x10000)
+ ref_len = 3;
+ else
+ ref_len = 4;
+
RECODE_RESULT res = SafeWriteUTF8Char(i, rune_len, buffer, buffer + 32);
- UNIT_ASSERT(res == RECODE_OK);
- UNIT_ASSERT(rune_len == ref_len);
-
+ UNIT_ASSERT(res == RECODE_OK);
+ UNIT_ASSERT(rune_len == ref_len);
+
res = SafeWriteUTF8Char(i, rune_len, buffer, buffer + ref_len - 1);
- UNIT_ASSERT(res == RECODE_EOOUTPUT);
-
- wchar32 rune;
+ UNIT_ASSERT(res == RECODE_EOOUTPUT);
+
+ wchar32 rune;
res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + 32);
- UNIT_ASSERT(res == RECODE_OK);
- UNIT_ASSERT(rune == i);
- UNIT_ASSERT(rune_len == ref_len);
-
+ UNIT_ASSERT(res == RECODE_OK);
+ UNIT_ASSERT(rune == i);
+ UNIT_ASSERT(rune_len == ref_len);
+
res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len - 1);
- UNIT_ASSERT(res == RECODE_EOINPUT);
-
- if (ref_len > 1) {
+ UNIT_ASSERT(res == RECODE_EOINPUT);
+
+ if (ref_len > 1) {
res = SafeReadUTF8Char(rune, rune_len, buffer + 1, buffer + ref_len);
- UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
-
- buffer[1] |= 0xC0;
+ UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
+
+ buffer[1] |= 0xC0;
res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len);
- UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
-
- buffer[1] &= 0x3F;
+ UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
+
+ buffer[1] &= 0x3F;
res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len);
- UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
- }
- }
+ UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
+ }
+ }
const char* badStrings[] = {
"\xfe",
"\xff",
@@ -158,8 +158,8 @@ void TCodepageTest::TestUTF() {
RECODE_RESULT res = SafeReadUTF8Char(rune, len, p, p + strlen(badStrings[i]));
UNIT_ASSERT(res == RECODE_BROKENSYMBOL);
}
-}
-
+}
+
void TCodepageTest::TestBrokenMultibyte() {
const ECharset cp = CODES_EUC_JP;
@@ -180,24 +180,24 @@ void TCodepageTest::TestBrokenMultibyte() {
UNIT_ASSERT(nread == Y_ARRAY_SIZE(bigSample));
}
-void TCodepageTest::TestUTFFromUnknownPlane() {
+void TCodepageTest::TestUTFFromUnknownPlane() {
static const wchar32 sampletext[] = {0x61, 0x62, 0x63, 0x20,
0x430, 0x431, 0x432, 0x20,
0x1001, 0x1002, 0x1003, 0x20,
0x10001, 0x10002, 0x10003};
-
- static const size_t BUFFER_SIZE = 1024;
- char bytebuffer[BUFFER_SIZE];
-
- size_t readchars = 0;
- size_t writtenbytes = 0;
+
+ static const size_t BUFFER_SIZE = 1024;
+ char bytebuffer[BUFFER_SIZE];
+
+ size_t readchars = 0;
+ size_t writtenbytes = 0;
size_t samplelen = Y_ARRAY_SIZE(sampletext);
RECODE_RESULT res = RecodeFromUnicode(CODES_UTF8, sampletext, bytebuffer, samplelen, BUFFER_SIZE, readchars, writtenbytes);
-
+
UNIT_ASSERT(res == RECODE_OK);
UNIT_ASSERT(samplelen == readchars);
-
+
size_t writtenbytes2 = 0;
char bytebuffer2[BUFFER_SIZE];
for (size_t i = 0; i != samplelen; ++i) {
@@ -209,45 +209,45 @@ void TCodepageTest::TestUTFFromUnknownPlane() {
}
UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2));
- wchar32 charbuffer[BUFFER_SIZE];
- size_t readbytes = 0;
- size_t writtenchars = 0;
-
+ wchar32 charbuffer[BUFFER_SIZE];
+ size_t readbytes = 0;
+ size_t writtenchars = 0;
+
res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer, writtenbytes, BUFFER_SIZE, readbytes, writtenchars);
-
+
UNIT_ASSERT(res == RECODE_OK);
UNIT_ASSERT(readbytes == writtenbytes);
-
- wchar32* charbufferend = charbuffer + writtenchars;
+
+ wchar32* charbufferend = charbuffer + writtenchars;
DecodeUnknownPlane(charbuffer, charbufferend, CODES_UTF8);
-
+
UNIT_ASSERT(charbufferend == charbuffer + samplelen);
for (size_t i = 0; i < samplelen; ++i)
UNIT_ASSERT(sampletext[i] == charbuffer[i]);
-
- // Now, concatenate the thing with an explicit character and retest
+
+ // Now, concatenate the thing with an explicit character and retest
res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer, writtenbytes, BUFFER_SIZE, readbytes, writtenchars);
UNIT_ASSERT(res == RECODE_OK);
UNIT_ASSERT(readbytes == writtenbytes);
-
- charbuffer[writtenchars] = 0x1234;
-
- size_t morewrittenchars = 0;
+
+ charbuffer[writtenchars] = 0x1234;
+
+ size_t morewrittenchars = 0;
res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer + writtenchars + 1, writtenbytes, BUFFER_SIZE, readbytes, morewrittenchars);
UNIT_ASSERT(res == RECODE_OK);
UNIT_ASSERT(readbytes == writtenbytes);
UNIT_ASSERT(writtenchars == morewrittenchars);
-
- charbuffer[2 * writtenchars + 1] = 0x5678;
-
- charbufferend = charbuffer + 2 * writtenchars + 2;
+
+ charbuffer[2 * writtenchars + 1] = 0x5678;
+
+ charbufferend = charbuffer + 2 * writtenchars + 2;
DecodeUnknownPlane(charbuffer, charbufferend, CODES_UTF8);
-
+
UNIT_ASSERT(charbufferend == charbuffer + 2 * samplelen + 2);
- for (size_t i = 0; i < samplelen; ++i) {
+ for (size_t i = 0; i < samplelen; ++i) {
UNIT_ASSERT(sampletext[i] == charbuffer[i]);
UNIT_ASSERT(sampletext[i] == charbuffer[samplelen + 1 + i]);
- }
+ }
UNIT_ASSERT(0x1234 == charbuffer[samplelen]);
UNIT_ASSERT(0x5678 == charbuffer[2 * samplelen + 1]);
@@ -265,8 +265,8 @@ void TCodepageTest::TestUTFFromUnknownPlane() {
UNIT_ASSERT_VALUES_EQUAL(wtr[i], sampletext[i]);
}
}
-}
-
+}
+
static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize) {
size_t sSize = strlen(str);
size_t wSize = sSize * 2;
@@ -298,24 +298,24 @@ void TCodepageTest::TestSurrogatePairs() {
TestSurrogates(utf8NonBMP2, wNonBMPDummy2, Y_ARRAY_SIZE(wNonBMPDummy2));
}
-void TCodepageTest::TestEncodingHints() {
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("windows-1251"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("Windows1251"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("WIN1251"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("window-cp1251"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("!!!CP1251???"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("'ansi-cp1251;'"));
- UNIT_ASSERT(CODES_WIN == EncodingHintByName("charset=Microsoft-CP1251;"));
-
- UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-8859-2"));
- UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-2"));
- UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-latin-2"));
- UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("charset=\"Latin2\";"));
-
- UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("widow1251"));
- UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("default"));
- UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("$phpcharset"));
-
+void TCodepageTest::TestEncodingHints() {
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("windows-1251"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("Windows1251"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("WIN1251"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("window-cp1251"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("!!!CP1251???"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("'ansi-cp1251;'"));
+ UNIT_ASSERT(CODES_WIN == EncodingHintByName("charset=Microsoft-CP1251;"));
+
+ UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-8859-2"));
+ UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-2"));
+ UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-latin-2"));
+ UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("charset=\"Latin2\";"));
+
+ UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("widow1251"));
+ UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("default"));
+ UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("$phpcharset"));
+
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ShiftJIS"));
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("Shift_JIS"));
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("Big5"));
@@ -324,7 +324,7 @@ void TCodepageTest::TestEncodingHints() {
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("charset='Shift_JIS';;"));
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-KR"));
UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-jp"));
-}
+}
void TCodepageTest::TestToLower() {
TTempBuf buf;