Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:23 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:23 +0300
commit: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree: 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/inputext.cpp
parent: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download: ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
1 files changed, 163 insertions, 163 deletions
diff --git a/contrib/libs/icu/i18n/inputext.cpp b/contrib/libs/icu/i18n/inputext.cpp
index 2d4f8a388a..0d7d40cb5c 100644
--- a/contrib/libs/icu/i18n/inputext.cpp
+++ b/contrib/libs/icu/i18n/inputext.cpp
@@ -1,164 +1,164 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- *   Copyright (C) 2005-2016, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- **********************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "inputext.h"
-
-#include "cmemory.h"
-#include "cstring.h"
-
-#include <string.h>
-
-U_NAMESPACE_BEGIN
-
-#define BUFFER_SIZE 8192
-
-#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
-#define DELETE_ARRAY(array) uprv_free((void *) (array))
-
-InputText::InputText(UErrorCode &status)
-    : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked.  Markup will have been
-                                                 //   removed if appropriate.
-      fByteStats(NEW_ARRAY(int16_t, 256)),       // byte frequency statistics for the input text.
-                                                 //   Value is percent, not absolute.
-      fDeclaredEncoding(0),
-      fRawInput(0),
-      fRawLength(0)
-{
-    if (fInputBytes == NULL || fByteStats == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-    }
-}
-
-InputText::~InputText()
-{
-    DELETE_ARRAY(fDeclaredEncoding);
-    DELETE_ARRAY(fByteStats);
-    DELETE_ARRAY(fInputBytes);
-}
-
-void InputText::setText(const char *in, int32_t len)
-{
-    fInputLen  = 0;
-    fC1Bytes   = FALSE;
-    fRawInput  = (const uint8_t *) in;
-    fRawLength = len == -1? (int32_t)uprv_strlen(in) : len;
-}
-
-void InputText::setDeclaredEncoding(const char* encoding, int32_t len)
-{
-    if(encoding) {
-        if (len == -1) {
-            len = (int32_t)uprv_strlen(encoding);
-        }
-
-        len += 1;     // to make place for the \0 at the end.
-        uprv_free(fDeclaredEncoding);
-        fDeclaredEncoding = NEW_ARRAY(char, len);
-        uprv_strncpy(fDeclaredEncoding, encoding, len);
-    }
-}
-
-UBool InputText::isSet() const 
-{
-    return fRawInput != NULL;
-}
-
-/**
-*  MungeInput - after getting a set of raw input data to be analyzed, preprocess
-*               it by removing what appears to be html markup.
-* 
-* @internal
-*/
-void InputText::MungeInput(UBool fStripTags) {
-    int     srci = 0;
-    int     dsti = 0;
-    uint8_t b;
-    bool    inMarkup = FALSE;
-    int32_t openTags = 0;
-    int32_t badTags  = 0;
-
-    //
-    //  html / xml markup stripping.
-    //     quick and dirty, not 100% accurate, but hopefully good enough, statistically.
-    //     discard everything within < brackets >
-    //     Count how many total '<' and illegal (nested) '<' occur, so we can make some
-    //     guess as to whether the input was actually marked up at all.
-    // TODO: Think about how this interacts with EBCDIC charsets that are detected.
-    if (fStripTags) {
-        for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) {
-            b = fRawInput[srci];
-
-            if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */
-                if (inMarkup) {
-                    badTags += 1;
-                }
-
-                inMarkup = TRUE;
-                openTags += 1;
-            }
-
-            if (! inMarkup) {
-                fInputBytes[dsti++] = b;
-            }
-
-            if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */
-                inMarkup = FALSE;
-            }
-        }
-
-        fInputLen = dsti;
-    }
-
-    //
-    //  If it looks like this input wasn't marked up, or if it looks like it's
-    //    essentially nothing but markup abandon the markup stripping.
-    //    Detection will have to work on the unstripped input.
-    //
-    if (openTags<5 || openTags/5 < badTags || 
-        (fInputLen < 100 && fRawLength>600))
-    {
-        int32_t limit = fRawLength;
-
-        if (limit > BUFFER_SIZE) {
-            limit = BUFFER_SIZE;
-        }
-
-        for (srci=0; srci<limit; srci++) {
-            fInputBytes[srci] = fRawInput[srci];
-        }
-
-        fInputLen = srci;
-    }
-
-    //
-    // Tally up the byte occurence statistics.
-    // These are available for use by the various detectors.
-    //
-
-    uprv_memset(fByteStats, 0, (sizeof fByteStats[0]) * 256);
-
-    for (srci = 0; srci < fInputLen; srci += 1) {
-        fByteStats[fInputBytes[srci]] += 1;
-    }
-
-    for (int32_t i = 0x80; i <= 0x9F; i += 1) {
-        if (fByteStats[i] != 0) {
-            fC1Bytes = TRUE;
-            break;
-        }
-    }
-}
-
-U_NAMESPACE_END
-#endif
-
+// License & terms of use: http://www.unicode.org/copyright.html 
+/* 
+ ********************************************************************** 
+ *   Copyright (C) 2005-2016, International Business Machines 
+ *   Corporation and others.  All Rights Reserved. 
+ ********************************************************************** 
+ */ 
+ 
+#include "unicode/utypes.h" 
+ 
+#if !UCONFIG_NO_CONVERSION 
+ 
+#include "inputext.h" 
+ 
+#include "cmemory.h" 
+#include "cstring.h" 
+ 
+#include <string.h> 
+ 
+U_NAMESPACE_BEGIN 
+ 
+#define BUFFER_SIZE 8192 
+ 
+#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) 
+#define DELETE_ARRAY(array) uprv_free((void *) (array)) 
+ 
+InputText::InputText(UErrorCode &status) 
+    : fInputBytes(NEW_ARRAY(uint8_t, BUFFER_SIZE)), // The text to be checked.  Markup will have been 
+                                                 //   removed if appropriate. 
+      fByteStats(NEW_ARRAY(int16_t, 256)),       // byte frequency statistics for the input text. 
+                                                 //   Value is percent, not absolute. 
+      fDeclaredEncoding(0), 
+      fRawInput(0), 
+      fRawLength(0) 
+{ 
+    if (fInputBytes == NULL || fByteStats == NULL) { 
+        status = U_MEMORY_ALLOCATION_ERROR; 
+    } 
+} 
+ 
+InputText::~InputText() 
+{ 
+    DELETE_ARRAY(fDeclaredEncoding); 
+    DELETE_ARRAY(fByteStats); 
+    DELETE_ARRAY(fInputBytes); 
+} 
+ 
+void InputText::setText(const char *in, int32_t len) 
+{ 
+    fInputLen  = 0; 
+    fC1Bytes   = FALSE; 
+    fRawInput  = (const uint8_t *) in; 
+    fRawLength = len == -1? (int32_t)uprv_strlen(in) : len; 
+} 
+ 
+void InputText::setDeclaredEncoding(const char* encoding, int32_t len) 
+{ 
+    if(encoding) { 
+        if (len == -1) { 
+            len = (int32_t)uprv_strlen(encoding); 
+        } 
+ 
+        len += 1;     // to make place for the \0 at the end. 
+        uprv_free(fDeclaredEncoding); 
+        fDeclaredEncoding = NEW_ARRAY(char, len); 
+        uprv_strncpy(fDeclaredEncoding, encoding, len); 
+    } 
+} 
+ 
+UBool InputText::isSet() const  
+{ 
+    return fRawInput != NULL; 
+} 
+ 
+/** 
+*  MungeInput - after getting a set of raw input data to be analyzed, preprocess 
+*               it by removing what appears to be html markup. 
+*  
+* @internal 
+*/ 
+void InputText::MungeInput(UBool fStripTags) { 
+    int     srci = 0; 
+    int     dsti = 0; 
+    uint8_t b; 
+    bool    inMarkup = FALSE; 
+    int32_t openTags = 0; 
+    int32_t badTags  = 0; 
+ 
+    // 
+    //  html / xml markup stripping. 
+    //     quick and dirty, not 100% accurate, but hopefully good enough, statistically. 
+    //     discard everything within < brackets > 
+    //     Count how many total '<' and illegal (nested) '<' occur, so we can make some 
+    //     guess as to whether the input was actually marked up at all. 
+    // TODO: Think about how this interacts with EBCDIC charsets that are detected. 
+    if (fStripTags) { 
+        for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) { 
+            b = fRawInput[srci]; 
+ 
+            if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */ 
+                if (inMarkup) { 
+                    badTags += 1; 
+                } 
+ 
+                inMarkup = TRUE; 
+                openTags += 1; 
+            } 
+ 
+            if (! inMarkup) { 
+                fInputBytes[dsti++] = b; 
+            } 
+ 
+            if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */ 
+                inMarkup = FALSE; 
+            } 
+        } 
+ 
+        fInputLen = dsti; 
+    } 
+ 
+    // 
+    //  If it looks like this input wasn't marked up, or if it looks like it's 
+    //    essentially nothing but markup abandon the markup stripping. 
+    //    Detection will have to work on the unstripped input. 
+    // 
+    if (openTags<5 || openTags/5 < badTags ||  
+        (fInputLen < 100 && fRawLength>600)) 
+    { 
+        int32_t limit = fRawLength; 
+ 
+        if (limit > BUFFER_SIZE) { 
+            limit = BUFFER_SIZE; 
+        } 
+ 
+        for (srci=0; srci<limit; srci++) { 
+            fInputBytes[srci] = fRawInput[srci]; 
+        } 
+ 
+        fInputLen = srci; 
+    } 
+ 
+    // 
+    // Tally up the byte occurence statistics. 
+    // These are available for use by the various detectors. 
+    // 
+ 
+    uprv_memset(fByteStats, 0, (sizeof fByteStats[0]) * 256); 
+ 
+    for (srci = 0; srci < fInputLen; srci += 1) { 
+        fByteStats[fInputBytes[srci]] += 1; 
+    } 
+ 
+    for (int32_t i = 0x80; i <= 0x9F; i += 1) { 
+        if (fByteStats[i] != 0) { 
+            fC1Bytes = TRUE; 
+            break; 
+        } 
+    } 
+} 
+ 
+U_NAMESPACE_END 
+#endif 
+
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:23 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:23 +0300
commit	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree	83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/inputext.cpp
parent	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download	ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz