intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/poco/Foundation/include/Poco/Unicode.h
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
1 files changed, 327 insertions, 0 deletions
diff --git a/contrib/libs/poco/Foundation/include/Poco/Unicode.h b/contrib/libs/poco/Foundation/include/Poco/Unicode.h
new file mode 100644
index 0000000000..b6d027685a
--- /dev/null
+++ b/contrib/libs/poco/Foundation/include/Poco/Unicode.h
@@ -0,0 +1,327 @@
+//
+// Unicode.h
+//
+// Library: Foundation
+// Package: Text
+// Module:  Unicode
+//
+// Definition of the Unicode class.
+//
+// Copyright (c) 2007, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier:	BSL-1.0
+//
+
+
+#ifndef Foundation_Unicode_INCLUDED
+#define Foundation_Unicode_INCLUDED
+
+
+#include "Poco/Foundation.h"
+
+
+namespace Poco {
+
+
+class Foundation_API Unicode
+	/// This class contains enumerations and static
+	/// utility functions for dealing with Unicode characters
+	/// and their properties.
+	///
+	/// For more information on Unicode, see <http://www.unicode.org>.
+	///
+	/// The implementation is based on the Unicode support
+	/// functions in PCRE.
+{
+public:
+	// Implementation note: the following definitions must be kept
+	// in sync with those from ucp.h (PCRE).
+	enum CharacterCategory
+		/// Unicode character categories.
+	{
+		UCP_OTHER,
+		UCP_LETTER,
+		UCP_MARK,
+		UCP_NUMBER,
+		UCP_PUNCTUATION,
+		UCP_SYMBOL,
+		UCP_SEPARATOR
+	};
+
+	enum CharacterType
+		/// Unicode character types.
+	{
+		UCP_CONTROL,
+		UCP_FORMAT,
+		UCP_UNASSIGNED,
+		UCP_PRIVATE_USE,
+		UCP_SURROGATE,
+		UCP_LOWER_CASE_LETTER,
+		UCP_MODIFIER_LETTER,
+		UCP_OTHER_LETTER,
+		UCP_TITLE_CASE_LETTER,
+		UCP_UPPER_CASE_LETTER,
+		UCP_SPACING_MARK,
+		UCP_ENCLOSING_MARK,
+		UCP_NON_SPACING_MARK,
+		UCP_DECIMAL_NUMBER,
+		UCP_LETTER_NUMBER,
+		UCP_OTHER_NUMBER,
+		UCP_CONNECTOR_PUNCTUATION,
+		UCP_DASH_PUNCTUATION,
+		UCP_CLOSE_PUNCTUATION,
+		UCP_FINAL_PUNCTUATION,
+		UCP_INITIAL_PUNCTUATION,
+		UCP_OTHER_PUNCTUATION,
+		UCP_OPEN_PUNCTUATION,
+		UCP_CURRENCY_SYMBOL,
+		UCP_MODIFIER_SYMBOL,
+		UCP_MATHEMATICAL_SYMBOL,
+		UCP_OTHER_SYMBOL,
+		UCP_LINE_SEPARATOR,
+		UCP_PARAGRAPH_SEPARATOR,
+		UCP_SPACE_SEPARATOR
+	};
+	
+	enum Script
+		/// Unicode 7.0 script identifiers.
+	{
+		UCP_ARABIC,
+		UCP_ARMENIAN,
+		UCP_BENGALI,
+		UCP_BOPOMOFO,
+		UCP_BRAILLE,
+		UCP_BUGINESE,
+		UCP_BUHID,
+		UCP_CANADIAN_ABORIGINAL,
+		UCP_CHEROKEE,
+		UCP_COMMON,
+		UCP_COPTIC,
+		UCP_CYPRIOT,
+		UCP_CYRILLIC,
+		UCP_DESERET,
+		UCP_DEVANAGARI,
+		UCP_ETHIOPIC,
+		UCP_GEORGIAN,
+		UCP_GLAGOLITIC,
+		UCP_GOTHIC,
+		UCP_GREEK,
+		UCP_GUJARATI,
+		UCP_GURMUKHI,
+		UCP_HAN,
+		UCP_HANGUL,
+		UCP_HANUNOO,
+		UCP_HEBREW,
+		UCP_HIRAGANA,
+		UCP_INHERITED,
+		UCP_KANNADA,
+		UCP_KATAKANA,
+		UCP_KHAROSHTHI,
+		UCP_KHMER,
+		UCP_LAO,
+		UCP_LATIN,
+		UCP_LIMBU,
+		UCP_LINEAR_B,
+		UCP_MALAYALAM,
+		UCP_MONGOLIAN,
+		UCP_MYANMAR,
+		UCP_NEW_TAI_LUE,
+		UCP_OGHAM,
+		UCP_OLD_ITALIC,
+		UCP_OLD_PERSIAN,
+		UCP_ORIYA,
+		UCP_OSMANYA,
+		UCP_RUNIC,
+		UCP_SHAVIAN,
+		UCP_SINHALA,
+		UCP_SYLOTI_NAGRI,
+		UCP_SYRIAC,
+		UCP_TAGALOG,
+		UCP_TAGBANWA,
+		UCP_TAI_LE,
+		UCP_TAMIL,
+		UCP_TELUGU,
+		UCP_THAANA,
+		UCP_THAI,
+		UCP_TIBETAN,
+		UCP_TIFINAGH,
+		UCP_UGARITIC,
+		UCP_YI,
+		// Unicode 5.0
+		UCP_BALINESE,
+		UCP_CUNEIFORM,
+		UCP_NKO,
+		UCP_PHAGS_PA,
+		UCP_PHOENICIAN,
+		// Unicode 5.1
+		UCP_CARIAN,
+		UCP_CHAM,
+		UCP_KAYAH_LI,
+		UCP_LEPCHA,
+		UCP_LYCIAN,
+		UCP_LYDIAN,
+		UCP_OL_CHIKI,
+		UCP_REJANG,
+		UCP_SAURASHTRA,
+		UCP_SUNDANESE,
+		UCP_VAI,
+		// Unicode 5.2
+		UCP_AVESTAN,
+		UCP_BAMUM,
+		UCP_EGYPTIAN_HIEROGLYPHS,
+		UCP_IMPERIAL_ARAMAIC,
+		UCP_INSCRIPTIONAL_PAHLAVI,
+		UCP_INSCRIPTIONAL_PARTHIAN,
+		UCP_JAVANESE,
+		UCP_KAITHI,
+		UCP_LISU,
+		UCP_MEETEI_MAYEK,
+		UCP_OLD_SOUTH_ARABIAN,
+		UCP_OLD_TURKIC,
+		UCP_SAMARITAN,
+		UCP_TAI_THAM,
+		UCP_TAI_VIET,
+		// Unicode 6.0
+		UCP_BATAK,
+		UCP_BRAHMI,
+		UCP_MANDAIC,
+		// Unicode 6.1
+		UCP_CHAKMA,
+		UCP_MEROITIC_CURSIVE,
+		UCP_MEROITIC_HIEROGLYPHS,
+		UCP_MIAO,
+		UCP_SHARADA,
+		UCP_SORA_SOMPENG,
+		UCP_TAKRI,
+		// Unicode 7.0
+		UCP_BASSA_VAH,
+		UCP_CAUCASIAN_ALBANIAN,
+		UCP_DUPLOYAN,
+		UCP_ELBASAN,
+		UCP_GRANTHA,
+		UCP_KHOJKI,
+		UCP_KHUDAWADI,
+		UCP_LINEAR_A,
+		UCP_MAHAJANI,
+		UCP_MANICHAEAN,
+		UCP_MENDE_KIKAKUI,
+		UCP_MODI,
+		UCP_MRO,
+		UCP_NABATAEAN,
+		UCP_OLD_NORTH_ARABIAN,
+		UCP_OLD_PERMIC,
+		UCP_PAHAWH_HMONG,
+		UCP_PALMYRENE,
+		UCP_PSALTER_PAHLAVI,
+		UCP_PAU_CIN_HAU,
+		UCP_SIDDHAM,
+		UCP_TIRHUTA,
+		UCP_WARANG_CITI
+	};
+	
+	enum
+	{
+		UCP_MAX_CODEPOINT = 0x10FFFF
+	};
+	
+	struct CharacterProperties
+		/// This structure holds the character properties
+		/// of an Unicode character.
+	{
+		CharacterCategory category;
+		CharacterType     type;
+		Script            script;
+	};
+
+	static void properties(int ch, CharacterProperties& props);
+		/// Return the Unicode character properties for the
+		/// character with the given Unicode value.
+		
+	static bool isSpace(int ch);
+		/// Returns true iff the given character is a separator.
+		
+	static bool isDigit(int ch);
+		/// Returns true iff the given character is a numeric character.
+		
+	static bool isPunct(int ch);
+		/// Returns true iff the given character is a punctuation character.
+		
+	static bool isAlpha(int ch);
+		/// Returns true iff the given character is a letter.	
+		
+	static bool isLower(int ch);
+		/// Returns true iff the given character is a lowercase
+		/// character.
+		
+	static bool isUpper(int ch);
+		/// Returns true iff the given character is an uppercase
+		/// character.
+		
+	static int toLower(int ch);
+		/// If the given character is an uppercase character,
+		/// return its lowercase counterpart, otherwise return
+		/// the character.
+
+	static int toUpper(int ch);
+		/// If the given character is a lowercase character,
+		/// return its uppercase counterpart, otherwise return
+		/// the character.
+};
+
+
+//
+// inlines
+//
+inline bool Unicode::isSpace(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_SEPARATOR;
+}
+
+
+inline bool Unicode::isDigit(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_NUMBER;
+}
+
+
+inline bool Unicode::isPunct(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_PUNCTUATION;
+}
+
+
+inline bool Unicode::isAlpha(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_LETTER;
+}
+
+
+inline bool Unicode::isLower(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_LETTER && props.type == UCP_LOWER_CASE_LETTER;
+}
+
+	
+inline bool Unicode::isUpper(int ch)
+{
+	CharacterProperties props;
+	properties(ch, props);
+	return props.category == UCP_LETTER && props.type == UCP_UPPER_CASE_LETTER;
+}
+
+
+} // namespace Poco
+
+
+#endif // Foundation_Unicode_INCLUDED
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/poco/Foundation/include/Poco/Unicode.h
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz