diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/poco/Foundation/include/Poco/Unicode.h | |
parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
download | ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/poco/Foundation/include/Poco/Unicode.h')
-rw-r--r-- | contrib/libs/poco/Foundation/include/Poco/Unicode.h | 654 |
1 files changed, 327 insertions, 327 deletions
diff --git a/contrib/libs/poco/Foundation/include/Poco/Unicode.h b/contrib/libs/poco/Foundation/include/Poco/Unicode.h index b6d027685a..f1506edb97 100644 --- a/contrib/libs/poco/Foundation/include/Poco/Unicode.h +++ b/contrib/libs/poco/Foundation/include/Poco/Unicode.h @@ -1,327 +1,327 @@ -// -// Unicode.h -// -// Library: Foundation -// Package: Text -// Module: Unicode -// -// Definition of the Unicode class. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Unicode_INCLUDED -#define Foundation_Unicode_INCLUDED - - -#include "Poco/Foundation.h" - - -namespace Poco { - - -class Foundation_API Unicode - /// This class contains enumerations and static - /// utility functions for dealing with Unicode characters - /// and their properties. - /// - /// For more information on Unicode, see <http://www.unicode.org>. - /// - /// The implementation is based on the Unicode support - /// functions in PCRE. -{ -public: - // Implementation note: the following definitions must be kept - // in sync with those from ucp.h (PCRE). - enum CharacterCategory - /// Unicode character categories. - { - UCP_OTHER, - UCP_LETTER, - UCP_MARK, - UCP_NUMBER, - UCP_PUNCTUATION, - UCP_SYMBOL, - UCP_SEPARATOR - }; - - enum CharacterType - /// Unicode character types. - { - UCP_CONTROL, - UCP_FORMAT, - UCP_UNASSIGNED, - UCP_PRIVATE_USE, - UCP_SURROGATE, - UCP_LOWER_CASE_LETTER, - UCP_MODIFIER_LETTER, - UCP_OTHER_LETTER, - UCP_TITLE_CASE_LETTER, - UCP_UPPER_CASE_LETTER, - UCP_SPACING_MARK, - UCP_ENCLOSING_MARK, - UCP_NON_SPACING_MARK, - UCP_DECIMAL_NUMBER, - UCP_LETTER_NUMBER, - UCP_OTHER_NUMBER, - UCP_CONNECTOR_PUNCTUATION, - UCP_DASH_PUNCTUATION, - UCP_CLOSE_PUNCTUATION, - UCP_FINAL_PUNCTUATION, - UCP_INITIAL_PUNCTUATION, - UCP_OTHER_PUNCTUATION, - UCP_OPEN_PUNCTUATION, - UCP_CURRENCY_SYMBOL, - UCP_MODIFIER_SYMBOL, - UCP_MATHEMATICAL_SYMBOL, - UCP_OTHER_SYMBOL, - UCP_LINE_SEPARATOR, - UCP_PARAGRAPH_SEPARATOR, - UCP_SPACE_SEPARATOR - }; - - enum Script - /// Unicode 7.0 script identifiers. - { - UCP_ARABIC, - UCP_ARMENIAN, - UCP_BENGALI, - UCP_BOPOMOFO, - UCP_BRAILLE, - UCP_BUGINESE, - UCP_BUHID, - UCP_CANADIAN_ABORIGINAL, - UCP_CHEROKEE, - UCP_COMMON, - UCP_COPTIC, - UCP_CYPRIOT, - UCP_CYRILLIC, - UCP_DESERET, - UCP_DEVANAGARI, - UCP_ETHIOPIC, - UCP_GEORGIAN, - UCP_GLAGOLITIC, - UCP_GOTHIC, - UCP_GREEK, - UCP_GUJARATI, - UCP_GURMUKHI, - UCP_HAN, - UCP_HANGUL, - UCP_HANUNOO, - UCP_HEBREW, - UCP_HIRAGANA, - UCP_INHERITED, - UCP_KANNADA, - UCP_KATAKANA, - UCP_KHAROSHTHI, - UCP_KHMER, - UCP_LAO, - UCP_LATIN, - UCP_LIMBU, - UCP_LINEAR_B, - UCP_MALAYALAM, - UCP_MONGOLIAN, - UCP_MYANMAR, - UCP_NEW_TAI_LUE, - UCP_OGHAM, - UCP_OLD_ITALIC, - UCP_OLD_PERSIAN, - UCP_ORIYA, - UCP_OSMANYA, - UCP_RUNIC, - UCP_SHAVIAN, - UCP_SINHALA, - UCP_SYLOTI_NAGRI, - UCP_SYRIAC, - UCP_TAGALOG, - UCP_TAGBANWA, - UCP_TAI_LE, - UCP_TAMIL, - UCP_TELUGU, - UCP_THAANA, - UCP_THAI, - UCP_TIBETAN, - UCP_TIFINAGH, - UCP_UGARITIC, - UCP_YI, - // Unicode 5.0 - UCP_BALINESE, - UCP_CUNEIFORM, - UCP_NKO, - UCP_PHAGS_PA, - UCP_PHOENICIAN, - // Unicode 5.1 - UCP_CARIAN, - UCP_CHAM, - UCP_KAYAH_LI, - UCP_LEPCHA, - UCP_LYCIAN, - UCP_LYDIAN, - UCP_OL_CHIKI, - UCP_REJANG, - UCP_SAURASHTRA, - UCP_SUNDANESE, - UCP_VAI, - // Unicode 5.2 - UCP_AVESTAN, - UCP_BAMUM, - UCP_EGYPTIAN_HIEROGLYPHS, - UCP_IMPERIAL_ARAMAIC, - UCP_INSCRIPTIONAL_PAHLAVI, - UCP_INSCRIPTIONAL_PARTHIAN, - UCP_JAVANESE, - UCP_KAITHI, - UCP_LISU, - UCP_MEETEI_MAYEK, - UCP_OLD_SOUTH_ARABIAN, - UCP_OLD_TURKIC, - UCP_SAMARITAN, - UCP_TAI_THAM, - UCP_TAI_VIET, - // Unicode 6.0 - UCP_BATAK, - UCP_BRAHMI, - UCP_MANDAIC, - // Unicode 6.1 - UCP_CHAKMA, - UCP_MEROITIC_CURSIVE, - UCP_MEROITIC_HIEROGLYPHS, - UCP_MIAO, - UCP_SHARADA, - UCP_SORA_SOMPENG, - UCP_TAKRI, - // Unicode 7.0 - UCP_BASSA_VAH, - UCP_CAUCASIAN_ALBANIAN, - UCP_DUPLOYAN, - UCP_ELBASAN, - UCP_GRANTHA, - UCP_KHOJKI, - UCP_KHUDAWADI, - UCP_LINEAR_A, - UCP_MAHAJANI, - UCP_MANICHAEAN, - UCP_MENDE_KIKAKUI, - UCP_MODI, - UCP_MRO, - UCP_NABATAEAN, - UCP_OLD_NORTH_ARABIAN, - UCP_OLD_PERMIC, - UCP_PAHAWH_HMONG, - UCP_PALMYRENE, - UCP_PSALTER_PAHLAVI, - UCP_PAU_CIN_HAU, - UCP_SIDDHAM, - UCP_TIRHUTA, - UCP_WARANG_CITI - }; - - enum - { - UCP_MAX_CODEPOINT = 0x10FFFF - }; - - struct CharacterProperties - /// This structure holds the character properties - /// of an Unicode character. - { - CharacterCategory category; - CharacterType type; - Script script; - }; - - static void properties(int ch, CharacterProperties& props); - /// Return the Unicode character properties for the - /// character with the given Unicode value. - - static bool isSpace(int ch); - /// Returns true iff the given character is a separator. - - static bool isDigit(int ch); - /// Returns true iff the given character is a numeric character. - - static bool isPunct(int ch); - /// Returns true iff the given character is a punctuation character. - - static bool isAlpha(int ch); - /// Returns true iff the given character is a letter. - - static bool isLower(int ch); - /// Returns true iff the given character is a lowercase - /// character. - - static bool isUpper(int ch); - /// Returns true iff the given character is an uppercase - /// character. - - static int toLower(int ch); - /// If the given character is an uppercase character, - /// return its lowercase counterpart, otherwise return - /// the character. - - static int toUpper(int ch); - /// If the given character is a lowercase character, - /// return its uppercase counterpart, otherwise return - /// the character. -}; - - -// -// inlines -// -inline bool Unicode::isSpace(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_SEPARATOR; -} - - -inline bool Unicode::isDigit(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_NUMBER; -} - - -inline bool Unicode::isPunct(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_PUNCTUATION; -} - - -inline bool Unicode::isAlpha(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_LETTER; -} - - -inline bool Unicode::isLower(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_LETTER && props.type == UCP_LOWER_CASE_LETTER; -} - - -inline bool Unicode::isUpper(int ch) -{ - CharacterProperties props; - properties(ch, props); - return props.category == UCP_LETTER && props.type == UCP_UPPER_CASE_LETTER; -} - - -} // namespace Poco - - -#endif // Foundation_Unicode_INCLUDED +// +// Unicode.h +// +// Library: Foundation +// Package: Text +// Module: Unicode +// +// Definition of the Unicode class. +// +// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef Foundation_Unicode_INCLUDED +#define Foundation_Unicode_INCLUDED + + +#include "Poco/Foundation.h" + + +namespace Poco { + + +class Foundation_API Unicode + /// This class contains enumerations and static + /// utility functions for dealing with Unicode characters + /// and their properties. + /// + /// For more information on Unicode, see <http://www.unicode.org>. + /// + /// The implementation is based on the Unicode support + /// functions in PCRE. +{ +public: + // Implementation note: the following definitions must be kept + // in sync with those from ucp.h (PCRE). + enum CharacterCategory + /// Unicode character categories. + { + UCP_OTHER, + UCP_LETTER, + UCP_MARK, + UCP_NUMBER, + UCP_PUNCTUATION, + UCP_SYMBOL, + UCP_SEPARATOR + }; + + enum CharacterType + /// Unicode character types. + { + UCP_CONTROL, + UCP_FORMAT, + UCP_UNASSIGNED, + UCP_PRIVATE_USE, + UCP_SURROGATE, + UCP_LOWER_CASE_LETTER, + UCP_MODIFIER_LETTER, + UCP_OTHER_LETTER, + UCP_TITLE_CASE_LETTER, + UCP_UPPER_CASE_LETTER, + UCP_SPACING_MARK, + UCP_ENCLOSING_MARK, + UCP_NON_SPACING_MARK, + UCP_DECIMAL_NUMBER, + UCP_LETTER_NUMBER, + UCP_OTHER_NUMBER, + UCP_CONNECTOR_PUNCTUATION, + UCP_DASH_PUNCTUATION, + UCP_CLOSE_PUNCTUATION, + UCP_FINAL_PUNCTUATION, + UCP_INITIAL_PUNCTUATION, + UCP_OTHER_PUNCTUATION, + UCP_OPEN_PUNCTUATION, + UCP_CURRENCY_SYMBOL, + UCP_MODIFIER_SYMBOL, + UCP_MATHEMATICAL_SYMBOL, + UCP_OTHER_SYMBOL, + UCP_LINE_SEPARATOR, + UCP_PARAGRAPH_SEPARATOR, + UCP_SPACE_SEPARATOR + }; + + enum Script + /// Unicode 7.0 script identifiers. + { + UCP_ARABIC, + UCP_ARMENIAN, + UCP_BENGALI, + UCP_BOPOMOFO, + UCP_BRAILLE, + UCP_BUGINESE, + UCP_BUHID, + UCP_CANADIAN_ABORIGINAL, + UCP_CHEROKEE, + UCP_COMMON, + UCP_COPTIC, + UCP_CYPRIOT, + UCP_CYRILLIC, + UCP_DESERET, + UCP_DEVANAGARI, + UCP_ETHIOPIC, + UCP_GEORGIAN, + UCP_GLAGOLITIC, + UCP_GOTHIC, + UCP_GREEK, + UCP_GUJARATI, + UCP_GURMUKHI, + UCP_HAN, + UCP_HANGUL, + UCP_HANUNOO, + UCP_HEBREW, + UCP_HIRAGANA, + UCP_INHERITED, + UCP_KANNADA, + UCP_KATAKANA, + UCP_KHAROSHTHI, + UCP_KHMER, + UCP_LAO, + UCP_LATIN, + UCP_LIMBU, + UCP_LINEAR_B, + UCP_MALAYALAM, + UCP_MONGOLIAN, + UCP_MYANMAR, + UCP_NEW_TAI_LUE, + UCP_OGHAM, + UCP_OLD_ITALIC, + UCP_OLD_PERSIAN, + UCP_ORIYA, + UCP_OSMANYA, + UCP_RUNIC, + UCP_SHAVIAN, + UCP_SINHALA, + UCP_SYLOTI_NAGRI, + UCP_SYRIAC, + UCP_TAGALOG, + UCP_TAGBANWA, + UCP_TAI_LE, + UCP_TAMIL, + UCP_TELUGU, + UCP_THAANA, + UCP_THAI, + UCP_TIBETAN, + UCP_TIFINAGH, + UCP_UGARITIC, + UCP_YI, + // Unicode 5.0 + UCP_BALINESE, + UCP_CUNEIFORM, + UCP_NKO, + UCP_PHAGS_PA, + UCP_PHOENICIAN, + // Unicode 5.1 + UCP_CARIAN, + UCP_CHAM, + UCP_KAYAH_LI, + UCP_LEPCHA, + UCP_LYCIAN, + UCP_LYDIAN, + UCP_OL_CHIKI, + UCP_REJANG, + UCP_SAURASHTRA, + UCP_SUNDANESE, + UCP_VAI, + // Unicode 5.2 + UCP_AVESTAN, + UCP_BAMUM, + UCP_EGYPTIAN_HIEROGLYPHS, + UCP_IMPERIAL_ARAMAIC, + UCP_INSCRIPTIONAL_PAHLAVI, + UCP_INSCRIPTIONAL_PARTHIAN, + UCP_JAVANESE, + UCP_KAITHI, + UCP_LISU, + UCP_MEETEI_MAYEK, + UCP_OLD_SOUTH_ARABIAN, + UCP_OLD_TURKIC, + UCP_SAMARITAN, + UCP_TAI_THAM, + UCP_TAI_VIET, + // Unicode 6.0 + UCP_BATAK, + UCP_BRAHMI, + UCP_MANDAIC, + // Unicode 6.1 + UCP_CHAKMA, + UCP_MEROITIC_CURSIVE, + UCP_MEROITIC_HIEROGLYPHS, + UCP_MIAO, + UCP_SHARADA, + UCP_SORA_SOMPENG, + UCP_TAKRI, + // Unicode 7.0 + UCP_BASSA_VAH, + UCP_CAUCASIAN_ALBANIAN, + UCP_DUPLOYAN, + UCP_ELBASAN, + UCP_GRANTHA, + UCP_KHOJKI, + UCP_KHUDAWADI, + UCP_LINEAR_A, + UCP_MAHAJANI, + UCP_MANICHAEAN, + UCP_MENDE_KIKAKUI, + UCP_MODI, + UCP_MRO, + UCP_NABATAEAN, + UCP_OLD_NORTH_ARABIAN, + UCP_OLD_PERMIC, + UCP_PAHAWH_HMONG, + UCP_PALMYRENE, + UCP_PSALTER_PAHLAVI, + UCP_PAU_CIN_HAU, + UCP_SIDDHAM, + UCP_TIRHUTA, + UCP_WARANG_CITI + }; + + enum + { + UCP_MAX_CODEPOINT = 0x10FFFF + }; + + struct CharacterProperties + /// This structure holds the character properties + /// of an Unicode character. + { + CharacterCategory category; + CharacterType type; + Script script; + }; + + static void properties(int ch, CharacterProperties& props); + /// Return the Unicode character properties for the + /// character with the given Unicode value. + + static bool isSpace(int ch); + /// Returns true iff the given character is a separator. + + static bool isDigit(int ch); + /// Returns true iff the given character is a numeric character. + + static bool isPunct(int ch); + /// Returns true iff the given character is a punctuation character. + + static bool isAlpha(int ch); + /// Returns true iff the given character is a letter. + + static bool isLower(int ch); + /// Returns true iff the given character is a lowercase + /// character. + + static bool isUpper(int ch); + /// Returns true iff the given character is an uppercase + /// character. + + static int toLower(int ch); + /// If the given character is an uppercase character, + /// return its lowercase counterpart, otherwise return + /// the character. + + static int toUpper(int ch); + /// If the given character is a lowercase character, + /// return its uppercase counterpart, otherwise return + /// the character. +}; + + +// +// inlines +// +inline bool Unicode::isSpace(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_SEPARATOR; +} + + +inline bool Unicode::isDigit(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_NUMBER; +} + + +inline bool Unicode::isPunct(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_PUNCTUATION; +} + + +inline bool Unicode::isAlpha(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_LETTER; +} + + +inline bool Unicode::isLower(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_LETTER && props.type == UCP_LOWER_CASE_LETTER; +} + + +inline bool Unicode::isUpper(int ch) +{ + CharacterProperties props; + properties(ch, props); + return props.category == UCP_LETTER && props.type == UCP_UPPER_CASE_LETTER; +} + + +} // namespace Poco + + +#endif // Foundation_Unicode_INCLUDED |