Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:23 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:23 +0300
commit: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree: 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/csrmbcs.h
parent: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download: ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
1 files changed, 206 insertions, 206 deletions
diff --git a/contrib/libs/icu/i18n/csrmbcs.h b/contrib/libs/icu/i18n/csrmbcs.h
index 8ccf1d56a9..ce4f5dbee4 100644
--- a/contrib/libs/icu/i18n/csrmbcs.h
+++ b/contrib/libs/icu/i18n/csrmbcs.h
@@ -1,207 +1,207 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- **********************************************************************
- */
-
-#ifndef __CSRMBCS_H
-#define __CSRMBCS_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "csrecog.h"
-
-U_NAMESPACE_BEGIN
-
-// "Character"  iterated character class.
-//    Recognizers for specific mbcs encodings make their "characters" available
-//    by providing a nextChar() function that fills in an instance of IteratedChar
-//    with the next char from the input.
-//    The returned characters are not converted to Unicode, but remain as the raw
-//    bytes (concatenated into an int) from the codepage data.
-//
-//  For Asian charsets, use the raw input rather than the input that has been
-//   stripped of markup.  Detection only considers multi-byte chars, effectively
-//   stripping markup anyway, and double byte chars do occur in markup too.
-//
-class IteratedChar : public UMemory
-{
-public:
-    uint32_t charValue;             // 1-4 bytes from the raw input data
-    int32_t  index;
-    int32_t  nextIndex;
-    UBool    error;
-    UBool    done;
-
-public:
-    IteratedChar();
-    //void reset();
-    int32_t nextByte(InputText* det);
-};
-
-
-class CharsetRecog_mbcs : public CharsetRecognizer {
-
-protected:
-    /**
-     * Test the match of this charset with the input text data
-     *      which is obtained via the CharsetDetector object.
-     *
-     * @param det  The CharsetDetector, which contains the input text
-     *             to be checked for being in this charset.
-     * @return     Two values packed into one int  (Damn java, anyhow)
-     *             <br/>
-     *             bits 0-7:  the match confidence, ranging from 0-100
-     *             <br/>
-     *             bits 8-15: The match reason, an enum-like value.
-     */
-    int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen) const;
-
-public:
-
-    virtual ~CharsetRecog_mbcs();
-
-    /**
-     * Get the IANA name of this charset.
-     * @return the charset name.
-     */
-
-    const char *getName() const = 0;
-    const char *getLanguage() const = 0;
-    UBool match(InputText* input, CharsetMatch *results) const = 0;
-
-    /**
-     * Get the next character (however many bytes it is) from the input data
-     *    Subclasses for specific charset encodings must implement this function
-     *    to get characters according to the rules of their encoding scheme.
-     *
-     *  This function is not a method of class IteratedChar only because
-     *   that would require a lot of extra derived classes, which is awkward.
-     * @param it  The IteratedChar "struct" into which the returned char is placed.
-     * @param det The charset detector, which is needed to get at the input byte data
-     *            being iterated over.
-     * @return    True if a character was returned, false at end of input.
-     */
-    virtual UBool nextChar(IteratedChar *it, InputText *textIn) const = 0;
-
-};
-
-
-/**
- *   Shift-JIS charset recognizer.
- *
- */
-class CharsetRecog_sjis : public CharsetRecog_mbcs {
-public:
-    virtual ~CharsetRecog_sjis();
-
-    UBool nextChar(IteratedChar *it, InputText *det) const;
-
-    UBool match(InputText* input, CharsetMatch *results) const;
-
-    const char *getName() const;
-    const char *getLanguage() const;
-
-};
-
-
-/**
- *   EUC charset recognizers.  One abstract class that provides the common function
- *             for getting the next character according to the EUC encoding scheme,
- *             and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
- *
- */
-class CharsetRecog_euc : public CharsetRecog_mbcs
-{
-public:
-    virtual ~CharsetRecog_euc();
-
-    const char *getName() const = 0;
-    const char *getLanguage() const = 0;
-
-    UBool match(InputText* input, CharsetMatch *results) const = 0;
-    /*
-     *  (non-Javadoc)
-     *  Get the next character value for EUC based encodings.
-     *  Character "value" is simply the raw bytes that make up the character
-     *     packed into an int.
-     */
-    UBool nextChar(IteratedChar *it, InputText *det) const;
-};
-
-/**
- * The charset recognize for EUC-JP.  A singleton instance of this class
- *    is created and kept by the public CharsetDetector class
- */
-class CharsetRecog_euc_jp : public CharsetRecog_euc
-{
-public:
-    virtual ~CharsetRecog_euc_jp();
-
-    const char *getName() const;
-    const char *getLanguage() const;
-
-    UBool match(InputText* input, CharsetMatch *results) const;
-};
-
-/**
- * The charset recognize for EUC-KR.  A singleton instance of this class
- *    is created and kept by the public CharsetDetector class
- */
-class CharsetRecog_euc_kr : public CharsetRecog_euc
-{
-public:
-    virtual ~CharsetRecog_euc_kr();
-
-    const char *getName() const;
-    const char *getLanguage() const;
-
-    UBool match(InputText* input, CharsetMatch *results) const;
-};
-
-/**
- *
- *   Big5 charset recognizer.
- *
- */
-class CharsetRecog_big5 : public CharsetRecog_mbcs
-{
-public:
-    virtual ~CharsetRecog_big5();
-
-    UBool nextChar(IteratedChar* it, InputText* det) const;
-
-    const char *getName() const;
-    const char *getLanguage() const;
-
-    UBool match(InputText* input, CharsetMatch *results) const;
-};
-
-
-/**
- *
- *   GB-18030 recognizer. Uses simplified Chinese statistics.
- *
- */
-class CharsetRecog_gb_18030 : public CharsetRecog_mbcs
-{
-public:
-    virtual ~CharsetRecog_gb_18030();
-
-    UBool nextChar(IteratedChar* it, InputText* det) const;
-
-    const char *getName() const;
-    const char *getLanguage() const;
-
-    UBool match(InputText* input, CharsetMatch *results) const;
-};
-
-U_NAMESPACE_END
-
-#endif
-#endif /* __CSRMBCS_H */
+// License & terms of use: http://www.unicode.org/copyright.html 
+/* 
+ ********************************************************************** 
+ *   Copyright (C) 2005-2012, International Business Machines 
+ *   Corporation and others.  All Rights Reserved. 
+ ********************************************************************** 
+ */ 
+ 
+#ifndef __CSRMBCS_H 
+#define __CSRMBCS_H 
+ 
+#include "unicode/utypes.h" 
+ 
+#if !UCONFIG_NO_CONVERSION 
+ 
+#include "csrecog.h" 
+ 
+U_NAMESPACE_BEGIN 
+ 
+// "Character"  iterated character class. 
+//    Recognizers for specific mbcs encodings make their "characters" available 
+//    by providing a nextChar() function that fills in an instance of IteratedChar 
+//    with the next char from the input. 
+//    The returned characters are not converted to Unicode, but remain as the raw 
+//    bytes (concatenated into an int) from the codepage data. 
+// 
+//  For Asian charsets, use the raw input rather than the input that has been 
+//   stripped of markup.  Detection only considers multi-byte chars, effectively 
+//   stripping markup anyway, and double byte chars do occur in markup too. 
+// 
+class IteratedChar : public UMemory 
+{ 
+public: 
+    uint32_t charValue;             // 1-4 bytes from the raw input data 
+    int32_t  index; 
+    int32_t  nextIndex; 
+    UBool    error; 
+    UBool    done; 
+ 
+public: 
+    IteratedChar(); 
+    //void reset(); 
+    int32_t nextByte(InputText* det); 
+}; 
+ 
+ 
+class CharsetRecog_mbcs : public CharsetRecognizer { 
+ 
+protected: 
+    /** 
+     * Test the match of this charset with the input text data 
+     *      which is obtained via the CharsetDetector object. 
+     * 
+     * @param det  The CharsetDetector, which contains the input text 
+     *             to be checked for being in this charset. 
+     * @return     Two values packed into one int  (Damn java, anyhow) 
+     *             <br/> 
+     *             bits 0-7:  the match confidence, ranging from 0-100 
+     *             <br/> 
+     *             bits 8-15: The match reason, an enum-like value. 
+     */ 
+    int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen) const; 
+ 
+public: 
+ 
+    virtual ~CharsetRecog_mbcs(); 
+ 
+    /** 
+     * Get the IANA name of this charset. 
+     * @return the charset name. 
+     */ 
+ 
+    const char *getName() const = 0; 
+    const char *getLanguage() const = 0; 
+    UBool match(InputText* input, CharsetMatch *results) const = 0; 
+ 
+    /** 
+     * Get the next character (however many bytes it is) from the input data 
+     *    Subclasses for specific charset encodings must implement this function 
+     *    to get characters according to the rules of their encoding scheme. 
+     * 
+     *  This function is not a method of class IteratedChar only because 
+     *   that would require a lot of extra derived classes, which is awkward. 
+     * @param it  The IteratedChar "struct" into which the returned char is placed. 
+     * @param det The charset detector, which is needed to get at the input byte data 
+     *            being iterated over. 
+     * @return    True if a character was returned, false at end of input. 
+     */ 
+    virtual UBool nextChar(IteratedChar *it, InputText *textIn) const = 0; 
+ 
+}; 
+ 
+ 
+/** 
+ *   Shift-JIS charset recognizer. 
+ * 
+ */ 
+class CharsetRecog_sjis : public CharsetRecog_mbcs { 
+public: 
+    virtual ~CharsetRecog_sjis(); 
+ 
+    UBool nextChar(IteratedChar *it, InputText *det) const; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const; 
+ 
+    const char *getName() const; 
+    const char *getLanguage() const; 
+ 
+}; 
+ 
+ 
+/** 
+ *   EUC charset recognizers.  One abstract class that provides the common function 
+ *             for getting the next character according to the EUC encoding scheme, 
+ *             and nested derived classes for EUC_KR, EUC_JP, EUC_CN. 
+ * 
+ */ 
+class CharsetRecog_euc : public CharsetRecog_mbcs 
+{ 
+public: 
+    virtual ~CharsetRecog_euc(); 
+ 
+    const char *getName() const = 0; 
+    const char *getLanguage() const = 0; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const = 0; 
+    /* 
+     *  (non-Javadoc) 
+     *  Get the next character value for EUC based encodings. 
+     *  Character "value" is simply the raw bytes that make up the character 
+     *     packed into an int. 
+     */ 
+    UBool nextChar(IteratedChar *it, InputText *det) const; 
+}; 
+ 
+/** 
+ * The charset recognize for EUC-JP.  A singleton instance of this class 
+ *    is created and kept by the public CharsetDetector class 
+ */ 
+class CharsetRecog_euc_jp : public CharsetRecog_euc 
+{ 
+public: 
+    virtual ~CharsetRecog_euc_jp(); 
+ 
+    const char *getName() const; 
+    const char *getLanguage() const; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const; 
+}; 
+ 
+/** 
+ * The charset recognize for EUC-KR.  A singleton instance of this class 
+ *    is created and kept by the public CharsetDetector class 
+ */ 
+class CharsetRecog_euc_kr : public CharsetRecog_euc 
+{ 
+public: 
+    virtual ~CharsetRecog_euc_kr(); 
+ 
+    const char *getName() const; 
+    const char *getLanguage() const; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const; 
+}; 
+ 
+/** 
+ * 
+ *   Big5 charset recognizer. 
+ * 
+ */ 
+class CharsetRecog_big5 : public CharsetRecog_mbcs 
+{ 
+public: 
+    virtual ~CharsetRecog_big5(); 
+ 
+    UBool nextChar(IteratedChar* it, InputText* det) const; 
+ 
+    const char *getName() const; 
+    const char *getLanguage() const; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const; 
+}; 
+ 
+ 
+/** 
+ * 
+ *   GB-18030 recognizer. Uses simplified Chinese statistics. 
+ * 
+ */ 
+class CharsetRecog_gb_18030 : public CharsetRecog_mbcs 
+{ 
+public: 
+    virtual ~CharsetRecog_gb_18030(); 
+ 
+    UBool nextChar(IteratedChar* it, InputText* det) const; 
+ 
+    const char *getName() const; 
+    const char *getLanguage() const; 
+ 
+    UBool match(InputText* input, CharsetMatch *results) const; 
+}; 
+ 
+U_NAMESPACE_END 
+ 
+#endif 
+#endif /* __CSRMBCS_H */
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:23 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:23 +0300
commit	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree	83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/csrmbcs.h
parent	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download	ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz