aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/poco/Foundation/include/Poco/Unicode.h
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:44:49 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:49 +0300
commit718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/poco/Foundation/include/Poco/Unicode.h
parente9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
downloadydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/poco/Foundation/include/Poco/Unicode.h')
-rw-r--r--contrib/libs/poco/Foundation/include/Poco/Unicode.h654
1 files changed, 327 insertions, 327 deletions
diff --git a/contrib/libs/poco/Foundation/include/Poco/Unicode.h b/contrib/libs/poco/Foundation/include/Poco/Unicode.h
index b6d027685a..f1506edb97 100644
--- a/contrib/libs/poco/Foundation/include/Poco/Unicode.h
+++ b/contrib/libs/poco/Foundation/include/Poco/Unicode.h
@@ -1,327 +1,327 @@
-//
-// Unicode.h
-//
-// Library: Foundation
-// Package: Text
-// Module: Unicode
-//
-// Definition of the Unicode class.
-//
-// Copyright (c) 2007, Applied Informatics Software Engineering GmbH.
-// and Contributors.
-//
-// SPDX-License-Identifier: BSL-1.0
-//
-
-
-#ifndef Foundation_Unicode_INCLUDED
-#define Foundation_Unicode_INCLUDED
-
-
-#include "Poco/Foundation.h"
-
-
-namespace Poco {
-
-
-class Foundation_API Unicode
- /// This class contains enumerations and static
- /// utility functions for dealing with Unicode characters
- /// and their properties.
- ///
- /// For more information on Unicode, see <http://www.unicode.org>.
- ///
- /// The implementation is based on the Unicode support
- /// functions in PCRE.
-{
-public:
- // Implementation note: the following definitions must be kept
- // in sync with those from ucp.h (PCRE).
- enum CharacterCategory
- /// Unicode character categories.
- {
- UCP_OTHER,
- UCP_LETTER,
- UCP_MARK,
- UCP_NUMBER,
- UCP_PUNCTUATION,
- UCP_SYMBOL,
- UCP_SEPARATOR
- };
-
- enum CharacterType
- /// Unicode character types.
- {
- UCP_CONTROL,
- UCP_FORMAT,
- UCP_UNASSIGNED,
- UCP_PRIVATE_USE,
- UCP_SURROGATE,
- UCP_LOWER_CASE_LETTER,
- UCP_MODIFIER_LETTER,
- UCP_OTHER_LETTER,
- UCP_TITLE_CASE_LETTER,
- UCP_UPPER_CASE_LETTER,
- UCP_SPACING_MARK,
- UCP_ENCLOSING_MARK,
- UCP_NON_SPACING_MARK,
- UCP_DECIMAL_NUMBER,
- UCP_LETTER_NUMBER,
- UCP_OTHER_NUMBER,
- UCP_CONNECTOR_PUNCTUATION,
- UCP_DASH_PUNCTUATION,
- UCP_CLOSE_PUNCTUATION,
- UCP_FINAL_PUNCTUATION,
- UCP_INITIAL_PUNCTUATION,
- UCP_OTHER_PUNCTUATION,
- UCP_OPEN_PUNCTUATION,
- UCP_CURRENCY_SYMBOL,
- UCP_MODIFIER_SYMBOL,
- UCP_MATHEMATICAL_SYMBOL,
- UCP_OTHER_SYMBOL,
- UCP_LINE_SEPARATOR,
- UCP_PARAGRAPH_SEPARATOR,
- UCP_SPACE_SEPARATOR
- };
-
- enum Script
- /// Unicode 7.0 script identifiers.
- {
- UCP_ARABIC,
- UCP_ARMENIAN,
- UCP_BENGALI,
- UCP_BOPOMOFO,
- UCP_BRAILLE,
- UCP_BUGINESE,
- UCP_BUHID,
- UCP_CANADIAN_ABORIGINAL,
- UCP_CHEROKEE,
- UCP_COMMON,
- UCP_COPTIC,
- UCP_CYPRIOT,
- UCP_CYRILLIC,
- UCP_DESERET,
- UCP_DEVANAGARI,
- UCP_ETHIOPIC,
- UCP_GEORGIAN,
- UCP_GLAGOLITIC,
- UCP_GOTHIC,
- UCP_GREEK,
- UCP_GUJARATI,
- UCP_GURMUKHI,
- UCP_HAN,
- UCP_HANGUL,
- UCP_HANUNOO,
- UCP_HEBREW,
- UCP_HIRAGANA,
- UCP_INHERITED,
- UCP_KANNADA,
- UCP_KATAKANA,
- UCP_KHAROSHTHI,
- UCP_KHMER,
- UCP_LAO,
- UCP_LATIN,
- UCP_LIMBU,
- UCP_LINEAR_B,
- UCP_MALAYALAM,
- UCP_MONGOLIAN,
- UCP_MYANMAR,
- UCP_NEW_TAI_LUE,
- UCP_OGHAM,
- UCP_OLD_ITALIC,
- UCP_OLD_PERSIAN,
- UCP_ORIYA,
- UCP_OSMANYA,
- UCP_RUNIC,
- UCP_SHAVIAN,
- UCP_SINHALA,
- UCP_SYLOTI_NAGRI,
- UCP_SYRIAC,
- UCP_TAGALOG,
- UCP_TAGBANWA,
- UCP_TAI_LE,
- UCP_TAMIL,
- UCP_TELUGU,
- UCP_THAANA,
- UCP_THAI,
- UCP_TIBETAN,
- UCP_TIFINAGH,
- UCP_UGARITIC,
- UCP_YI,
- // Unicode 5.0
- UCP_BALINESE,
- UCP_CUNEIFORM,
- UCP_NKO,
- UCP_PHAGS_PA,
- UCP_PHOENICIAN,
- // Unicode 5.1
- UCP_CARIAN,
- UCP_CHAM,
- UCP_KAYAH_LI,
- UCP_LEPCHA,
- UCP_LYCIAN,
- UCP_LYDIAN,
- UCP_OL_CHIKI,
- UCP_REJANG,
- UCP_SAURASHTRA,
- UCP_SUNDANESE,
- UCP_VAI,
- // Unicode 5.2
- UCP_AVESTAN,
- UCP_BAMUM,
- UCP_EGYPTIAN_HIEROGLYPHS,
- UCP_IMPERIAL_ARAMAIC,
- UCP_INSCRIPTIONAL_PAHLAVI,
- UCP_INSCRIPTIONAL_PARTHIAN,
- UCP_JAVANESE,
- UCP_KAITHI,
- UCP_LISU,
- UCP_MEETEI_MAYEK,
- UCP_OLD_SOUTH_ARABIAN,
- UCP_OLD_TURKIC,
- UCP_SAMARITAN,
- UCP_TAI_THAM,
- UCP_TAI_VIET,
- // Unicode 6.0
- UCP_BATAK,
- UCP_BRAHMI,
- UCP_MANDAIC,
- // Unicode 6.1
- UCP_CHAKMA,
- UCP_MEROITIC_CURSIVE,
- UCP_MEROITIC_HIEROGLYPHS,
- UCP_MIAO,
- UCP_SHARADA,
- UCP_SORA_SOMPENG,
- UCP_TAKRI,
- // Unicode 7.0
- UCP_BASSA_VAH,
- UCP_CAUCASIAN_ALBANIAN,
- UCP_DUPLOYAN,
- UCP_ELBASAN,
- UCP_GRANTHA,
- UCP_KHOJKI,
- UCP_KHUDAWADI,
- UCP_LINEAR_A,
- UCP_MAHAJANI,
- UCP_MANICHAEAN,
- UCP_MENDE_KIKAKUI,
- UCP_MODI,
- UCP_MRO,
- UCP_NABATAEAN,
- UCP_OLD_NORTH_ARABIAN,
- UCP_OLD_PERMIC,
- UCP_PAHAWH_HMONG,
- UCP_PALMYRENE,
- UCP_PSALTER_PAHLAVI,
- UCP_PAU_CIN_HAU,
- UCP_SIDDHAM,
- UCP_TIRHUTA,
- UCP_WARANG_CITI
- };
-
- enum
- {
- UCP_MAX_CODEPOINT = 0x10FFFF
- };
-
- struct CharacterProperties
- /// This structure holds the character properties
- /// of an Unicode character.
- {
- CharacterCategory category;
- CharacterType type;
- Script script;
- };
-
- static void properties(int ch, CharacterProperties& props);
- /// Return the Unicode character properties for the
- /// character with the given Unicode value.
-
- static bool isSpace(int ch);
- /// Returns true iff the given character is a separator.
-
- static bool isDigit(int ch);
- /// Returns true iff the given character is a numeric character.
-
- static bool isPunct(int ch);
- /// Returns true iff the given character is a punctuation character.
-
- static bool isAlpha(int ch);
- /// Returns true iff the given character is a letter.
-
- static bool isLower(int ch);
- /// Returns true iff the given character is a lowercase
- /// character.
-
- static bool isUpper(int ch);
- /// Returns true iff the given character is an uppercase
- /// character.
-
- static int toLower(int ch);
- /// If the given character is an uppercase character,
- /// return its lowercase counterpart, otherwise return
- /// the character.
-
- static int toUpper(int ch);
- /// If the given character is a lowercase character,
- /// return its uppercase counterpart, otherwise return
- /// the character.
-};
-
-
-//
-// inlines
-//
-inline bool Unicode::isSpace(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_SEPARATOR;
-}
-
-
-inline bool Unicode::isDigit(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_NUMBER;
-}
-
-
-inline bool Unicode::isPunct(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_PUNCTUATION;
-}
-
-
-inline bool Unicode::isAlpha(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_LETTER;
-}
-
-
-inline bool Unicode::isLower(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_LETTER && props.type == UCP_LOWER_CASE_LETTER;
-}
-
-
-inline bool Unicode::isUpper(int ch)
-{
- CharacterProperties props;
- properties(ch, props);
- return props.category == UCP_LETTER && props.type == UCP_UPPER_CASE_LETTER;
-}
-
-
-} // namespace Poco
-
-
-#endif // Foundation_Unicode_INCLUDED
+//
+// Unicode.h
+//
+// Library: Foundation
+// Package: Text
+// Module: Unicode
+//
+// Definition of the Unicode class.
+//
+// Copyright (c) 2007, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier: BSL-1.0
+//
+
+
+#ifndef Foundation_Unicode_INCLUDED
+#define Foundation_Unicode_INCLUDED
+
+
+#include "Poco/Foundation.h"
+
+
+namespace Poco {
+
+
+class Foundation_API Unicode
+ /// This class contains enumerations and static
+ /// utility functions for dealing with Unicode characters
+ /// and their properties.
+ ///
+ /// For more information on Unicode, see <http://www.unicode.org>.
+ ///
+ /// The implementation is based on the Unicode support
+ /// functions in PCRE.
+{
+public:
+ // Implementation note: the following definitions must be kept
+ // in sync with those from ucp.h (PCRE).
+ enum CharacterCategory
+ /// Unicode character categories.
+ {
+ UCP_OTHER,
+ UCP_LETTER,
+ UCP_MARK,
+ UCP_NUMBER,
+ UCP_PUNCTUATION,
+ UCP_SYMBOL,
+ UCP_SEPARATOR
+ };
+
+ enum CharacterType
+ /// Unicode character types.
+ {
+ UCP_CONTROL,
+ UCP_FORMAT,
+ UCP_UNASSIGNED,
+ UCP_PRIVATE_USE,
+ UCP_SURROGATE,
+ UCP_LOWER_CASE_LETTER,
+ UCP_MODIFIER_LETTER,
+ UCP_OTHER_LETTER,
+ UCP_TITLE_CASE_LETTER,
+ UCP_UPPER_CASE_LETTER,
+ UCP_SPACING_MARK,
+ UCP_ENCLOSING_MARK,
+ UCP_NON_SPACING_MARK,
+ UCP_DECIMAL_NUMBER,
+ UCP_LETTER_NUMBER,
+ UCP_OTHER_NUMBER,
+ UCP_CONNECTOR_PUNCTUATION,
+ UCP_DASH_PUNCTUATION,
+ UCP_CLOSE_PUNCTUATION,
+ UCP_FINAL_PUNCTUATION,
+ UCP_INITIAL_PUNCTUATION,
+ UCP_OTHER_PUNCTUATION,
+ UCP_OPEN_PUNCTUATION,
+ UCP_CURRENCY_SYMBOL,
+ UCP_MODIFIER_SYMBOL,
+ UCP_MATHEMATICAL_SYMBOL,
+ UCP_OTHER_SYMBOL,
+ UCP_LINE_SEPARATOR,
+ UCP_PARAGRAPH_SEPARATOR,
+ UCP_SPACE_SEPARATOR
+ };
+
+ enum Script
+ /// Unicode 7.0 script identifiers.
+ {
+ UCP_ARABIC,
+ UCP_ARMENIAN,
+ UCP_BENGALI,
+ UCP_BOPOMOFO,
+ UCP_BRAILLE,
+ UCP_BUGINESE,
+ UCP_BUHID,
+ UCP_CANADIAN_ABORIGINAL,
+ UCP_CHEROKEE,
+ UCP_COMMON,
+ UCP_COPTIC,
+ UCP_CYPRIOT,
+ UCP_CYRILLIC,
+ UCP_DESERET,
+ UCP_DEVANAGARI,
+ UCP_ETHIOPIC,
+ UCP_GEORGIAN,
+ UCP_GLAGOLITIC,
+ UCP_GOTHIC,
+ UCP_GREEK,
+ UCP_GUJARATI,
+ UCP_GURMUKHI,
+ UCP_HAN,
+ UCP_HANGUL,
+ UCP_HANUNOO,
+ UCP_HEBREW,
+ UCP_HIRAGANA,
+ UCP_INHERITED,
+ UCP_KANNADA,
+ UCP_KATAKANA,
+ UCP_KHAROSHTHI,
+ UCP_KHMER,
+ UCP_LAO,
+ UCP_LATIN,
+ UCP_LIMBU,
+ UCP_LINEAR_B,
+ UCP_MALAYALAM,
+ UCP_MONGOLIAN,
+ UCP_MYANMAR,
+ UCP_NEW_TAI_LUE,
+ UCP_OGHAM,
+ UCP_OLD_ITALIC,
+ UCP_OLD_PERSIAN,
+ UCP_ORIYA,
+ UCP_OSMANYA,
+ UCP_RUNIC,
+ UCP_SHAVIAN,
+ UCP_SINHALA,
+ UCP_SYLOTI_NAGRI,
+ UCP_SYRIAC,
+ UCP_TAGALOG,
+ UCP_TAGBANWA,
+ UCP_TAI_LE,
+ UCP_TAMIL,
+ UCP_TELUGU,
+ UCP_THAANA,
+ UCP_THAI,
+ UCP_TIBETAN,
+ UCP_TIFINAGH,
+ UCP_UGARITIC,
+ UCP_YI,
+ // Unicode 5.0
+ UCP_BALINESE,
+ UCP_CUNEIFORM,
+ UCP_NKO,
+ UCP_PHAGS_PA,
+ UCP_PHOENICIAN,
+ // Unicode 5.1
+ UCP_CARIAN,
+ UCP_CHAM,
+ UCP_KAYAH_LI,
+ UCP_LEPCHA,
+ UCP_LYCIAN,
+ UCP_LYDIAN,
+ UCP_OL_CHIKI,
+ UCP_REJANG,
+ UCP_SAURASHTRA,
+ UCP_SUNDANESE,
+ UCP_VAI,
+ // Unicode 5.2
+ UCP_AVESTAN,
+ UCP_BAMUM,
+ UCP_EGYPTIAN_HIEROGLYPHS,
+ UCP_IMPERIAL_ARAMAIC,
+ UCP_INSCRIPTIONAL_PAHLAVI,
+ UCP_INSCRIPTIONAL_PARTHIAN,
+ UCP_JAVANESE,
+ UCP_KAITHI,
+ UCP_LISU,
+ UCP_MEETEI_MAYEK,
+ UCP_OLD_SOUTH_ARABIAN,
+ UCP_OLD_TURKIC,
+ UCP_SAMARITAN,
+ UCP_TAI_THAM,
+ UCP_TAI_VIET,
+ // Unicode 6.0
+ UCP_BATAK,
+ UCP_BRAHMI,
+ UCP_MANDAIC,
+ // Unicode 6.1
+ UCP_CHAKMA,
+ UCP_MEROITIC_CURSIVE,
+ UCP_MEROITIC_HIEROGLYPHS,
+ UCP_MIAO,
+ UCP_SHARADA,
+ UCP_SORA_SOMPENG,
+ UCP_TAKRI,
+ // Unicode 7.0
+ UCP_BASSA_VAH,
+ UCP_CAUCASIAN_ALBANIAN,
+ UCP_DUPLOYAN,
+ UCP_ELBASAN,
+ UCP_GRANTHA,
+ UCP_KHOJKI,
+ UCP_KHUDAWADI,
+ UCP_LINEAR_A,
+ UCP_MAHAJANI,
+ UCP_MANICHAEAN,
+ UCP_MENDE_KIKAKUI,
+ UCP_MODI,
+ UCP_MRO,
+ UCP_NABATAEAN,
+ UCP_OLD_NORTH_ARABIAN,
+ UCP_OLD_PERMIC,
+ UCP_PAHAWH_HMONG,
+ UCP_PALMYRENE,
+ UCP_PSALTER_PAHLAVI,
+ UCP_PAU_CIN_HAU,
+ UCP_SIDDHAM,
+ UCP_TIRHUTA,
+ UCP_WARANG_CITI
+ };
+
+ enum
+ {
+ UCP_MAX_CODEPOINT = 0x10FFFF
+ };
+
+ struct CharacterProperties
+ /// This structure holds the character properties
+ /// of an Unicode character.
+ {
+ CharacterCategory category;
+ CharacterType type;
+ Script script;
+ };
+
+ static void properties(int ch, CharacterProperties& props);
+ /// Return the Unicode character properties for the
+ /// character with the given Unicode value.
+
+ static bool isSpace(int ch);
+ /// Returns true iff the given character is a separator.
+
+ static bool isDigit(int ch);
+ /// Returns true iff the given character is a numeric character.
+
+ static bool isPunct(int ch);
+ /// Returns true iff the given character is a punctuation character.
+
+ static bool isAlpha(int ch);
+ /// Returns true iff the given character is a letter.
+
+ static bool isLower(int ch);
+ /// Returns true iff the given character is a lowercase
+ /// character.
+
+ static bool isUpper(int ch);
+ /// Returns true iff the given character is an uppercase
+ /// character.
+
+ static int toLower(int ch);
+ /// If the given character is an uppercase character,
+ /// return its lowercase counterpart, otherwise return
+ /// the character.
+
+ static int toUpper(int ch);
+ /// If the given character is a lowercase character,
+ /// return its uppercase counterpart, otherwise return
+ /// the character.
+};
+
+
+//
+// inlines
+//
+inline bool Unicode::isSpace(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_SEPARATOR;
+}
+
+
+inline bool Unicode::isDigit(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_NUMBER;
+}
+
+
+inline bool Unicode::isPunct(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_PUNCTUATION;
+}
+
+
+inline bool Unicode::isAlpha(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_LETTER;
+}
+
+
+inline bool Unicode::isLower(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_LETTER && props.type == UCP_LOWER_CASE_LETTER;
+}
+
+
+inline bool Unicode::isUpper(int ch)
+{
+ CharacterProperties props;
+ properties(ch, props);
+ return props.category == UCP_LETTER && props.type == UCP_UPPER_CASE_LETTER;
+}
+
+
+} // namespace Poco
+
+
+#endif // Foundation_Unicode_INCLUDED