Set SpacesInLineCommentPrefix to 1 in /util

Set SpacesInLineCommentPrefix to 1 in /util 3853f9ec5143722c1bebd8dc0ffc9b61a6c17657
author: dmasloff <dmasloff@yandex-team.com> 2024-08-17 23:33:42 +0300
committer: dmasloff <dmasloff@yandex-team.com> 2024-08-17 23:43:45 +0300
commit: 69340f4614e853b9319df4b454ab7497711ee3cd (patch)
tree: 9902a3e2f58fe0bd9a157e7b51ad1cc52efa5744 /util/charset
parent: a905b53ec410defd5d2c40031ef8b34bb50a29f8 (diff)
download: ydb-69340f4614e853b9319df4b454ab7497711ee3cd.tar.gz
5 files changed, 57 insertions, 57 deletions
diff --git a/util/charset/utf8.h b/util/charset/utf8.h
index d0c45e9d06..b105d8db9d 100644
--- a/util/charset/utf8.h
+++ b/util/charset/utf8.h
@@ -16,23 +16,23 @@ inline unsigned char UTF8LeadByteMask(size_t utf8_rune_len) {
 }
 
 inline size_t UTF8RuneLen(const unsigned char lead_byte) {
-    //b0XXXXXXX
+    // b0XXXXXXX
     if ((lead_byte & 0x80) == 0x00) {
         return 1;
     }
-    //b110XXXXX
+    // b110XXXXX
     if ((lead_byte & 0xe0) == 0xc0) {
         return 2;
     }
-    //b1110XXXX
+    // b1110XXXX
     if ((lead_byte & 0xf0) == 0xe0) {
         return 3;
     }
-    //b11110XXX
+    // b11110XXX
     if ((lead_byte & 0xf8) == 0xf0) {
         return 4;
     }
-    //b10XXXXXX
+    // b10XXXXXX
     return 0;
 }
 
@@ -73,7 +73,7 @@ inline RECODE_RESULT GetUTF8CharLen(size_t& n, const unsigned char* p, const uns
     Y_ASSERT(p < e); // since p < e then we will check RECODE_EOINPUT only for n > 1 (see calls of this functions)
     switch (UTF8RuneLen(*p)) {
         case 0:
-            return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in first byte
+            return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in first byte
 
         case 1:
             n = 1;
@@ -194,27 +194,27 @@ inline RECODE_RESULT SafeReadUTF8Char(wchar32& rune, size_t& rune_len, const uns
 
     size_t _len = UTF8RuneLen(*s);
     if (s + _len > end)
-        return RECODE_EOINPUT; //[EOINPUT]
+        return RECODE_EOINPUT; // [EOINPUT]
     if (_len == 0)
-        return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in first byte
-    _rune = *s++;                   //[00000000 0XXXXXXX]
+        return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in first byte
+    _rune = *s++;                   // [00000000 0XXXXXXX]
 
     if (_len > 1) {
         _rune &= UTF8LeadByteMask(_len);
         unsigned char ch = *s++;
         if (!IsUTF8ContinuationByte(ch))
-            return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in second byte
-        PutUTF8SixBits(_rune, ch);      //[00000XXX XXYYYYYY]
+            return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in second byte
+        PutUTF8SixBits(_rune, ch);      // [00000XXX XXYYYYYY]
         if (_len > 2) {
             ch = *s++;
             if (!IsUTF8ContinuationByte(ch))
-                return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in third byte
-            PutUTF8SixBits(_rune, ch);      //[XXXXYYYY YYZZZZZZ]
+                return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in third byte
+            PutUTF8SixBits(_rune, ch);      // [XXXXYYYY YYZZZZZZ]
             if (_len > 3) {
                 ch = *s;
                 if (!IsUTF8ContinuationByte(ch))
-                    return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in fourth byte
-                PutUTF8SixBits(_rune, ch);      //[XXXYY YYYYZZZZ ZZQQQQQQ]
+                    return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in fourth byte
+                PutUTF8SixBits(_rune, ch);      // [XXXYY YYYYZZZZ ZZQQQQQQ]
                 if (!IsValidUTF8Rune<4, strictMode>(_rune))
                     return RECODE_BROKENSYMBOL;
             } else {
@@ -241,10 +241,10 @@ Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigne
     switch (UTF8RuneLen(*p)) {
         case 0:
             rune = BROKEN_RUNE;
-            return RECODE_BROKENSYMBOL; //[BROKENSYMBOL] in first byte
+            return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in first byte
 
         case 1:
-            rune = *p; //[00000000 0XXXXXXX]
+            rune = *p; // [00000000 0XXXXXXX]
             ++p;
             return RECODE_OK;
 
@@ -255,8 +255,8 @@ Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigne
                 rune = BROKEN_RUNE;
                 return RECODE_BROKENSYMBOL;
             } else {
-                PutUTF8LeadBits(rune, *p++, 2); //[00000000 000XXXXX]
-                PutUTF8SixBits(rune, *p++);     //[00000XXX XXYYYYYY]
+                PutUTF8LeadBits(rune, *p++, 2); // [00000000 000XXXXX]
+                PutUTF8SixBits(rune, *p++);     // [00000XXX XXYYYYYY]
                 if (!IsValidUTF8Rune<2, strictMode>(rune)) {
                     p -= 2;
                     rune = BROKEN_RUNE;
@@ -271,9 +271,9 @@ Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigne
                 rune = BROKEN_RUNE;
                 return RECODE_BROKENSYMBOL;
             } else {
-                PutUTF8LeadBits(rune, *p++, 3); //[00000000 0000XXXX]
-                PutUTF8SixBits(rune, *p++);     //[000000XX XXYYYYYY]
-                PutUTF8SixBits(rune, *p++);     //[XXXXYYYY YYZZZZZZ]
+                PutUTF8LeadBits(rune, *p++, 3); // [00000000 0000XXXX]
+                PutUTF8SixBits(rune, *p++);     // [000000XX XXYYYYYY]
+                PutUTF8SixBits(rune, *p++);     // [XXXXYYYY YYZZZZZZ]
                 // check for overlong encoding and surrogates
                 if (!IsValidUTF8Rune<3, strictMode>(rune)) {
                     p -= 3;
@@ -289,10 +289,10 @@ Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigne
                 rune = BROKEN_RUNE;
                 return RECODE_BROKENSYMBOL;
             } else {
-                PutUTF8LeadBits(rune, *p++, 4); //[00000000 00000000 00000XXX]
-                PutUTF8SixBits(rune, *p++);     //[00000000 0000000X XXYYYYYY]
-                PutUTF8SixBits(rune, *p++);     //[00000000 0XXXYYYY YYZZZZZZ]
-                PutUTF8SixBits(rune, *p++);     //[000XXXYY YYYYZZZZ ZZQQQQQQ]
+                PutUTF8LeadBits(rune, *p++, 4); // [00000000 00000000 00000XXX]
+                PutUTF8SixBits(rune, *p++);     // [00000000 0000000X XXYYYYYY]
+                PutUTF8SixBits(rune, *p++);     // [00000000 0XXXYYYY YYZZZZZZ]
+                PutUTF8SixBits(rune, *p++);     // [000XXXYY YYYYZZZZ ZZQQQQQQ]
                 if (!IsValidUTF8Rune<4, strictMode>(rune)) {
                     p -= 4;
                     rune = BROKEN_RUNE;
diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp
index 9e68881cca..00981f8060 100644
--- a/util/charset/utf8_ut.cpp
+++ b/util/charset/utf8_ut.cpp
@@ -27,17 +27,17 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
 
         {
             const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3
-            const char* turkI = "İ";        //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
+            const char* turkI = "İ";        // strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
             TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI};
             const int N = Y_ARRAY_SIZE(chars);
-            //try all combinations of these letters.
+            // try all combinations of these letters.
             int numberOfVariants = 1;
             for (int len = 0; len <= 4; ++len) {
                 for (int i = 0; i < numberOfVariants; ++i) {
                     TString s;
                     int k = i;
                     for (int j = 0; j < len; ++j) {
-                        //Treat 'i' like number in base-N system with digits from 'chars'-array
+                        // Treat 'i' like number in base-N system with digits from 'chars'-array
                         s += chars[k % N];
                         k /= N;
                     }
@@ -67,17 +67,17 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
 
         {
             const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(ToUpper_utf8(weird)) is 3
-            const char* turkI = "İ";        //strlen("İ") == 2, strlen(ToUpper_utf8("İ") == 1
+            const char* turkI = "İ";        // strlen("İ") == 2, strlen(ToUpper_utf8("İ") == 1
             TStringBuf chars[] = {"F", "f", "б", "Б", turkI, weird};
             const int N = Y_ARRAY_SIZE(chars);
-            //try all combinations of these letters.
+            // try all combinations of these letters.
             int numberOfVariants = 1;
             for (int len = 0; len <= 4; ++len) {
                 for (int i = 0; i < numberOfVariants; ++i) {
                     TString s;
                     int k = i;
                     for (int j = 0; j < len; ++j) {
-                        //Treat 'i' like number in base-N system with digits from 'chars'-array
+                        // Treat 'i' like number in base-N system with digits from 'chars'-array
                         s += chars[k % N];
                         k /= N;
                     }
diff --git a/util/charset/wide.h b/util/charset/wide.h
index 5a81f8aa47..06f48d60da 100644
--- a/util/charset/wide.h
+++ b/util/charset/wide.h
@@ -341,7 +341,7 @@ inline size_t UTF8ToWideImpl(const char* text, size_t len, TCharType* dest, size
     const unsigned char* cur = reinterpret_cast<const unsigned char*>(text);
     const unsigned char* last = cur + len;
     TCharType* p = dest;
-#ifdef _sse_ //can't check for sse4, as we build most of arcadia without sse4 support even on platforms that support it
+#ifdef _sse_ // can't check for sse4, as we build most of arcadia without sse4 support even on platforms that support it
     if (cur + 16 <= last && NX86::CachedHaveSSE41()) {
         ::NDetail::UTF8ToWideImplSSE41(cur, last, p);
     }
@@ -606,7 +606,7 @@ namespace NDetail {
 
 #ifdef _sse2_
     inline bool DoIsStringASCIISSE(const unsigned char* first, const unsigned char* last) {
-        //scalar version for short strings
+        // scalar version for short strings
         if (first + 8 > last) {
             return ::NDetail::DoIsStringASCIISlow(first, last);
         }
@@ -637,7 +637,7 @@ namespace NDetail {
 
         return ::NDetail::DoIsStringASCIISlow(first, last);
     }
-#endif //_sse2_
+#endif // _sse2_
 
 }
 
diff --git a/util/charset/wide_sse41.cpp b/util/charset/wide_sse41.cpp
index d1f2a74851..0d86cb95f8 100644
--- a/util/charset/wide_sse41.cpp
+++ b/util/charset/wide_sse41.cpp
@@ -18,17 +18,17 @@ namespace NDetail {
     #include <emmintrin.h>
     #include <smmintrin.h>
 
-//processes to the first error, or until less then 16 bytes left
-//most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html
+// processes to the first error, or until less then 16 bytes left
+// most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html
 
-//return dstAdvance 0 in case of problems
+// return dstAdvance 0 in case of problems
 static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned char*& cur, __m128i& utf16Low, __m128i& utf16High) {
     unsigned char curAligned[16];
 
     memcpy(curAligned, cur, sizeof(__m128i));
     __m128i chunk = _mm_load_si128(reinterpret_cast<const __m128i*>(curAligned));
 
-    //only ascii characters - simple copy
+    // only ascii characters - simple copy
     if (!_mm_movemask_epi8(chunk)) {
         utf16Low = _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
         utf16High = _mm_unpackhi_epi8(chunk, _mm_setzero_si128());
@@ -50,9 +50,9 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
     __m128i chunkLow, chunkHigh;
 
     if (Y_LIKELY(!_mm_movemask_epi8(cond3))) {
-        //main case: no bloks of size 3 or 4
+        // main case: no bloks of size 3 or 4
 
-        //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.)
+        // rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.)
         __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7));
 
         __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1));
@@ -68,7 +68,7 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
         __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0));
         __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuation1, _mm_set1_epi8(0));
         __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask);
-        //each symbol should be exactly one of ascii, continuation or begin
+        // each symbol should be exactly one of ascii, continuation or begin
         __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask);
 
         if (_mm_movemask_epi8(okMask) != 0xFFFF) {
@@ -114,7 +114,7 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
             return 0;
         }
 
-        //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.)
+        // rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.)
         __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7));
 
         __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1));
@@ -132,7 +132,7 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
         __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0));
         __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuationsRunelen, _mm_set1_epi8(0));
         __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask);
-        //each symbol should be exactly one of ascii, continuation or begin
+        // each symbol should be exactly one of ascii, continuation or begin
         __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask);
 
         if (_mm_movemask_epi8(okMask) != 0xFFFF) {
@@ -209,7 +209,7 @@ namespace NDetail {
             memcpy(dest, destAligned, sizeof(__m128i) * 2);
             dest += dstAdvance;
         }
-        //The rest will be handled sequencially.
+        // The rest will be handled sequencially.
         // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence
     }
 
@@ -225,7 +225,7 @@ namespace NDetail {
                 break;
             }
 
-            //NOTE: we only work in case without surrogat pairs, so we can make simple copying with zeroes in 2 high bytes
+            // NOTE: we only work in case without surrogat pairs, so we can make simple copying with zeroes in 2 high bytes
             __m128i utf32_lowlow = _mm_unpacklo_epi16(utf16Low, _mm_set1_epi8(0));
             __m128i utf32_lowhigh = _mm_unpackhi_epi16(utf16Low, _mm_set1_epi8(0));
             __m128i utf32_highlow = _mm_unpacklo_epi16(utf16High, _mm_set1_epi8(0));
@@ -239,7 +239,7 @@ namespace NDetail {
             memcpy(dest, destAligned, sizeof(__m128i) * 4);
             dest += dstAdvance;
         }
-        //The rest will be handled sequencially.
+        // The rest will be handled sequencially.
         // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence
     }
 }
diff --git a/util/charset/wide_ut.cpp b/util/charset/wide_ut.cpp
index b33dd0c0de..dec843717e 100644
--- a/util/charset/wide_ut.cpp
+++ b/util/charset/wide_ut.cpp
@@ -111,16 +111,16 @@ namespace {
     }
 
     //! use this function to dump UTF8 text into a file in case of any changes
-    //    void DumpUTF8Text() {
-    //        TString s = WideToUTF8(UnicodeText);
-    //        std::ofstream f("utf8.txt");
-    //        f << std::hex;
-    //        for (int i = 0; i < (int)s.size(); ++i) {
-    //            f << "0x" << std::setw(2) << std::setfill('0') << (int)(ui8)s[i] << ", ";
-    //            if ((i + 1) % 16 == 0)
-    //                f << std::endl;
-    //        }
-    //    }
+    // void DumpUTF8Text() {
+    //     TString s = WideToUTF8(UnicodeText);
+    //     std::ofstream f("utf8.txt");
+    //     f << std::hex;
+    //     for (int i = 0; i < (int)s.size(); ++i) {
+    //         f << "0x" << std::setw(2) << std::setfill('0') << (int)(ui8)s[i] << ", ";
+    //         if ((i + 1) % 16 == 0)
+    //             f << std::endl;
+    //     }
+    // }
 
     template <StrictUTF8 strictMode = StrictUTF8::No>
     void CheckRecodeOK(wchar32 expected, unsigned char* first, size_t n) {
author	dmasloff <dmasloff@yandex-team.com>	2024-08-17 23:33:42 +0300
committer	dmasloff <dmasloff@yandex-team.com>	2024-08-17 23:43:45 +0300
commit	69340f4614e853b9319df4b454ab7497711ee3cd (patch)
tree	9902a3e2f58fe0bd9a157e7b51ad1cc52efa5744 /util/charset
parent	a905b53ec410defd5d2c40031ef8b34bb50a29f8 (diff)
download	ydb-69340f4614e853b9319df4b454ab7497711ee3cd.tar.gz