diff options
| author | grig <[email protected]> | 2022-02-10 16:50:24 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:50:24 +0300 | 
| commit | da383a4f674027527827ad076134241fc5da0cbf (patch) | |
| tree | abffc62e3ae11fc9f73a623ad95ff2c035124fbb /library/cpp/charset/codepage_ut.cpp | |
| parent | 763020232922c49e25864d2457dbbd772dc4ce55 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset/codepage_ut.cpp')
| -rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 204 | 
1 files changed, 102 insertions, 102 deletions
| diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478e..f854d338ea1 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -6,7 +6,7 @@  #include <util/charset/utf8.h>  #include <util/system/yassert.h> - +   #if defined(_MSC_VER)  #pragma warning(disable : 4309) /*truncation of constant value*/  #endif @@ -30,8 +30,8 @@ namespace {  }  class TCodepageTest: public TTestBase { -private: -    UNIT_TEST_SUITE(TCodepageTest); +private:  +    UNIT_TEST_SUITE(TCodepageTest);       UNIT_TEST(TestUTF);      UNIT_TEST(TestUTFFromUnknownPlane);      UNIT_TEST(TestBrokenMultibyte); @@ -42,14 +42,14 @@ private:      UNIT_TEST(TestUpperLower);      UNIT_TEST(TestBrokenRune);      UNIT_TEST(TestCanEncode); -    UNIT_TEST_SUITE_END(); - -public: -    void TestUTF(); -    void TestUTFFromUnknownPlane(); +    UNIT_TEST_SUITE_END();  +  +public:  +    void TestUTF();  +    void TestUTFFromUnknownPlane();       void TestBrokenMultibyte();      void TestSurrogatePairs(); -    void TestEncodingHints(); +    void TestEncodingHints();       void TestToLower();      void TestToUpper(); @@ -71,55 +71,55 @@ public:      void TestBrokenRune() {          UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu);      } -}; - -UNIT_TEST_SUITE_REGISTRATION(TCodepageTest); - -void TCodepageTest::TestUTF() { +};  +  +UNIT_TEST_SUITE_REGISTRATION(TCodepageTest);  +  +void TCodepageTest::TestUTF() {       for (wchar32 i = 0; i <= 0x10FFFF; i++) { -        unsigned char buffer[32]; +        unsigned char buffer[32];           Zero(buffer); -        size_t rune_len; -        size_t ref_len = 0; - -        if (i < 0x80) -            ref_len = 1; -        else if (i < 0x800) -            ref_len = 2; -        else if (i < 0x10000) -            ref_len = 3; -        else -            ref_len = 4; - +        size_t rune_len;  +        size_t ref_len = 0;  +  +        if (i < 0x80)  +            ref_len = 1;  +        else if (i < 0x800)  +            ref_len = 2;  +        else if (i < 0x10000)  +            ref_len = 3;  +        else  +            ref_len = 4;  +           RECODE_RESULT res = SafeWriteUTF8Char(i, rune_len, buffer, buffer + 32); -        UNIT_ASSERT(res == RECODE_OK); -        UNIT_ASSERT(rune_len == ref_len); - +        UNIT_ASSERT(res == RECODE_OK);  +        UNIT_ASSERT(rune_len == ref_len);  +           res = SafeWriteUTF8Char(i, rune_len, buffer, buffer + ref_len - 1); -        UNIT_ASSERT(res == RECODE_EOOUTPUT); - -        wchar32 rune; +        UNIT_ASSERT(res == RECODE_EOOUTPUT);  +  +        wchar32 rune;           res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + 32); -        UNIT_ASSERT(res == RECODE_OK); -        UNIT_ASSERT(rune == i); -        UNIT_ASSERT(rune_len == ref_len); - +        UNIT_ASSERT(res == RECODE_OK);  +        UNIT_ASSERT(rune == i);  +        UNIT_ASSERT(rune_len == ref_len);  +           res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len - 1); -        UNIT_ASSERT(res == RECODE_EOINPUT); - -        if (ref_len > 1) { +        UNIT_ASSERT(res == RECODE_EOINPUT);  +  +        if (ref_len > 1) {               res = SafeReadUTF8Char(rune, rune_len, buffer + 1, buffer + ref_len); -            UNIT_ASSERT(res == RECODE_BROKENSYMBOL); - -            buffer[1] |= 0xC0; +            UNIT_ASSERT(res == RECODE_BROKENSYMBOL);  +  +            buffer[1] |= 0xC0;               res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len); -            UNIT_ASSERT(res == RECODE_BROKENSYMBOL); - -            buffer[1] &= 0x3F; +            UNIT_ASSERT(res == RECODE_BROKENSYMBOL);  +  +            buffer[1] &= 0x3F;               res = SafeReadUTF8Char(rune, rune_len, buffer, buffer + ref_len); -            UNIT_ASSERT(res == RECODE_BROKENSYMBOL); -        } -    } +            UNIT_ASSERT(res == RECODE_BROKENSYMBOL);  +        }  +    }       const char* badStrings[] = {          "\xfe",          "\xff", @@ -158,8 +158,8 @@ void TCodepageTest::TestUTF() {          RECODE_RESULT res = SafeReadUTF8Char(rune, len, p, p + strlen(badStrings[i]));          UNIT_ASSERT(res == RECODE_BROKENSYMBOL);      } -} - +}  +   void TCodepageTest::TestBrokenMultibyte() {      const ECharset cp = CODES_EUC_JP; @@ -180,24 +180,24 @@ void TCodepageTest::TestBrokenMultibyte() {      UNIT_ASSERT(nread == Y_ARRAY_SIZE(bigSample));  } -void TCodepageTest::TestUTFFromUnknownPlane() { +void TCodepageTest::TestUTFFromUnknownPlane() {       static const wchar32 sampletext[] = {0x61, 0x62, 0x63, 0x20,                                           0x430, 0x431, 0x432, 0x20,                                           0x1001, 0x1002, 0x1003, 0x20,                                           0x10001, 0x10002, 0x10003}; - -    static const size_t BUFFER_SIZE = 1024; -    char bytebuffer[BUFFER_SIZE]; - -    size_t readchars = 0; -    size_t writtenbytes = 0; +  +    static const size_t BUFFER_SIZE = 1024;  +    char bytebuffer[BUFFER_SIZE];  +  +    size_t readchars = 0;  +    size_t writtenbytes = 0;       size_t samplelen = Y_ARRAY_SIZE(sampletext);      RECODE_RESULT res = RecodeFromUnicode(CODES_UTF8, sampletext, bytebuffer, samplelen, BUFFER_SIZE, readchars, writtenbytes); - +       UNIT_ASSERT(res == RECODE_OK);      UNIT_ASSERT(samplelen == readchars); - +       size_t writtenbytes2 = 0;      char bytebuffer2[BUFFER_SIZE];      for (size_t i = 0; i != samplelen; ++i) { @@ -209,45 +209,45 @@ void TCodepageTest::TestUTFFromUnknownPlane() {      }      UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2)); -    wchar32 charbuffer[BUFFER_SIZE]; -    size_t readbytes = 0; -    size_t writtenchars = 0; - +    wchar32 charbuffer[BUFFER_SIZE];  +    size_t readbytes = 0;  +    size_t writtenchars = 0;  +       res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer, writtenbytes, BUFFER_SIZE, readbytes, writtenchars); - +       UNIT_ASSERT(res == RECODE_OK);      UNIT_ASSERT(readbytes == writtenbytes); - -    wchar32* charbufferend = charbuffer + writtenchars; +  +    wchar32* charbufferend = charbuffer + writtenchars;       DecodeUnknownPlane(charbuffer, charbufferend, CODES_UTF8); - +       UNIT_ASSERT(charbufferend == charbuffer + samplelen);      for (size_t i = 0; i < samplelen; ++i)          UNIT_ASSERT(sampletext[i] == charbuffer[i]); - -    // Now, concatenate the thing with an explicit character and retest +  +    // Now, concatenate the thing with an explicit character and retest       res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer, writtenbytes, BUFFER_SIZE, readbytes, writtenchars);      UNIT_ASSERT(res == RECODE_OK);      UNIT_ASSERT(readbytes == writtenbytes); - -    charbuffer[writtenchars] = 0x1234; - -    size_t morewrittenchars = 0; +  +    charbuffer[writtenchars] = 0x1234;  +  +    size_t morewrittenchars = 0;       res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer + writtenchars + 1, writtenbytes, BUFFER_SIZE, readbytes, morewrittenchars);      UNIT_ASSERT(res == RECODE_OK);      UNIT_ASSERT(readbytes == writtenbytes);      UNIT_ASSERT(writtenchars == morewrittenchars); - -    charbuffer[2 * writtenchars + 1] = 0x5678; - -    charbufferend = charbuffer + 2 * writtenchars + 2; +  +    charbuffer[2 * writtenchars + 1] = 0x5678;  +  +    charbufferend = charbuffer + 2 * writtenchars + 2;       DecodeUnknownPlane(charbuffer, charbufferend, CODES_UTF8); - +       UNIT_ASSERT(charbufferend == charbuffer + 2 * samplelen + 2); -    for (size_t i = 0; i < samplelen; ++i) { +    for (size_t i = 0; i < samplelen; ++i) {           UNIT_ASSERT(sampletext[i] == charbuffer[i]);          UNIT_ASSERT(sampletext[i] == charbuffer[samplelen + 1 + i]); -    } +    }       UNIT_ASSERT(0x1234 == charbuffer[samplelen]);      UNIT_ASSERT(0x5678 == charbuffer[2 * samplelen + 1]); @@ -265,8 +265,8 @@ void TCodepageTest::TestUTFFromUnknownPlane() {              UNIT_ASSERT_VALUES_EQUAL(wtr[i], sampletext[i]);          }      } -} - +}  +   static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize) {      size_t sSize = strlen(str);      size_t wSize = sSize * 2; @@ -298,24 +298,24 @@ void TCodepageTest::TestSurrogatePairs() {      TestSurrogates(utf8NonBMP2, wNonBMPDummy2, Y_ARRAY_SIZE(wNonBMPDummy2));  } -void TCodepageTest::TestEncodingHints() { -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("windows-1251")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("Windows1251")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("WIN1251")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("window-cp1251")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("!!!CP1251???")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("'ansi-cp1251;'")); -    UNIT_ASSERT(CODES_WIN == EncodingHintByName("charset=Microsoft-CP1251;")); - -    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-8859-2")); -    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-2")); -    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-latin-2")); -    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("charset=\"Latin2\";")); - -    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("widow1251")); -    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("default")); -    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("$phpcharset")); - +void TCodepageTest::TestEncodingHints() {  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("windows-1251"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("Windows1251"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("WIN1251"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("window-cp1251"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("!!!CP1251???"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("'ansi-cp1251;'"));  +    UNIT_ASSERT(CODES_WIN == EncodingHintByName("charset=Microsoft-CP1251;"));  +  +    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-8859-2"));  +    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-2"));  +    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("iso-latin-2"));  +    UNIT_ASSERT(CODES_ISO_EAST == EncodingHintByName("charset=\"Latin2\";"));  +  +    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("widow1251"));  +    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("default"));  +    UNIT_ASSERT(CODES_UNKNOWN == EncodingHintByName("$phpcharset"));  +       UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ShiftJIS"));      UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("Shift_JIS"));      UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("Big5")); @@ -324,7 +324,7 @@ void TCodepageTest::TestEncodingHints() {      UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("charset='Shift_JIS';;"));      UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-KR"));      UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-jp")); -} +}   void TCodepageTest::TestToLower() {      TTempBuf buf; | 
