aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/regexst.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:33 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:33 +0300
commit1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
treeb2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/i18n/regexst.cpp
parent8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
downloadydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/regexst.cpp')
-rw-r--r--contrib/libs/icu/i18n/regexst.cpp216
1 files changed, 108 insertions, 108 deletions
diff --git a/contrib/libs/icu/i18n/regexst.cpp b/contrib/libs/icu/i18n/regexst.cpp
index d9880bfad9..97e417ab5a 100644
--- a/contrib/libs/icu/i18n/regexst.cpp
+++ b/contrib/libs/icu/i18n/regexst.cpp
@@ -1,64 +1,64 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// regexst.h
-//
-// Copyright (C) 2004-2015, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains class RegexStaticSets
-//
-// This class is internal to the regular expression implementation.
-// For the public Regular Expression API, see the file "unicode/regex.h"
-//
-// RegexStaticSets groups together the common UnicodeSets that are needed
-// for compiling or executing RegularExpressions. This grouping simplifies
-// the thread safe lazy creation and sharing of these sets across
-// all instances of regular expressions.
-//
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS
-
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/regex.h"
-#include "uprops.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucln_in.h"
-#include "umutex.h"
-
-#include "regexcst.h" // Contains state table for the regex pattern parser.
- // generated by a Perl script.
-#include "regexst.h"
-
-U_NAMESPACE_BEGIN
-
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// regexst.h
+//
+// Copyright (C) 2004-2015, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains class RegexStaticSets
+//
+// This class is internal to the regular expression implementation.
+// For the public Regular Expression API, see the file "unicode/regex.h"
+//
+// RegexStaticSets groups together the common UnicodeSets that are needed
+// for compiling or executing RegularExpressions. This grouping simplifies
+// the thread safe lazy creation and sharing of these sets across
+// all instances of regular expressions.
+//
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/regex.h"
+#include "uprops.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_in.h"
+#include "umutex.h"
+
+#include "regexcst.h" // Contains state table for the regex pattern parser.
+ // generated by a Perl script.
+#include "regexst.h"
+
+U_NAMESPACE_BEGIN
+
// "Rule Char" Characters are those with special meaning, and therefore
// need to be escaped to appear as literals in a regexp.
constexpr char16_t const *gRuleSet_rule_chars = u"*?+[(){}^$|\\.";
-
-//
+
+//
// The backslash escape characters that ICU's unescape() function will handle.
-//
+//
constexpr char16_t const *gUnescapeChars = u"acefnrtuUx";
-
-//
+
+//
// Unicode Set pattern for Regular Expression \w
-//
+//
constexpr char16_t const *gIsWordPattern = u"[\\p{Alphabetic}\\p{M}\\p{Nd}\\p{Pc}\\u200c\\u200d]";
-
-//
-// Unicode Set Definitions for Regular Expression \s
-//
+
+//
+// Unicode Set Definitions for Regular Expression \s
+//
constexpr char16_t const *gIsSpacePattern = u"[\\p{WhiteSpace}]";
-
-//
-// UnicodeSets used in implementation of Grapheme Cluster detection, \X
-//
+
+//
+// UnicodeSets used in implementation of Grapheme Cluster detection, \X
+//
constexpr char16_t const *gGC_ControlPattern = u"[[:Zl:][:Zp:][:Cc:][:Cf:]-[:Grapheme_Extend:]]";
constexpr char16_t const *gGC_ExtendPattern = u"[\\p{Grapheme_Extend}]";
constexpr char16_t const *gGC_LPattern = u"[\\p{Hangul_Syllable_Type=L}]";
@@ -66,12 +66,12 @@ constexpr char16_t const *gGC_VPattern = u"[\\p{Hangul_Syllable_Type=V}]";
constexpr char16_t const *gGC_TPattern = u"[\\p{Hangul_Syllable_Type=T}]";
constexpr char16_t const *gGC_LVPattern = u"[\\p{Hangul_Syllable_Type=LV}]";
constexpr char16_t const *gGC_LVTPattern = u"[\\p{Hangul_Syllable_Type=LVT}]";
-
-
+
+
RegexStaticSets *RegexStaticSets::gStaticSets = nullptr;
UInitOnce gStaticSetsInitOnce = U_INITONCE_INITIALIZER;
-
-
+
+
RegexStaticSets::RegexStaticSets(UErrorCode *status) {
// Initialize the shared static sets to their correct values.
fUnescapeCharSet.addAll(UnicodeString(true, gUnescapeChars, -1)).freeze();
@@ -84,12 +84,12 @@ RegexStaticSets::RegexStaticSets(UErrorCode *status) {
fPropSets[URX_GC_T].applyPattern(UnicodeString(TRUE, gGC_TPattern, -1), *status).freeze();
fPropSets[URX_GC_LV].applyPattern(UnicodeString(TRUE, gGC_LVPattern, -1), *status).freeze();
fPropSets[URX_GC_LVT].applyPattern(UnicodeString(TRUE, gGC_LVTPattern, -1), *status).freeze();
-
-
- //
- // "Normal" is the set of characters that don't need special handling
- // when finding grapheme cluster boundaries.
- //
+
+
+ //
+ // "Normal" is the set of characters that don't need special handling
+ // when finding grapheme cluster boundaries.
+ //
fPropSets[URX_GC_NORMAL].complement();
fPropSets[URX_GC_NORMAL].remove(0xac00, 0xd7a4);
fPropSets[URX_GC_NORMAL].removeAll(fPropSets[URX_GC_CONTROL]);
@@ -97,9 +97,9 @@ RegexStaticSets::RegexStaticSets(UErrorCode *status) {
fPropSets[URX_GC_NORMAL].removeAll(fPropSets[URX_GC_V]);
fPropSets[URX_GC_NORMAL].removeAll(fPropSets[URX_GC_T]);
fPropSets[URX_GC_NORMAL].freeze();
-
- // Initialize the 8-bit fast bit sets from the parallel full
- // UnicodeSets.
+
+ // Initialize the 8-bit fast bit sets from the parallel full
+ // UnicodeSets.
//
// TODO: 25 Oct 2019 are these fast 8-bit sets worth keeping?
// Measured 3.5% gain on (non) matching with the pattern "x(?:\\S+)+x"
@@ -112,61 +112,61 @@ RegexStaticSets::RegexStaticSets(UErrorCode *status) {
for (int32_t i=0; i<URX_LAST_SET; i++) {
fPropSets8[i].init(&fPropSets[i]);
- }
-
- // Sets used while parsing rules, but not referenced from the parse state table
+ }
+
+ // Sets used while parsing rules, but not referenced from the parse state table
fRuleSets[kRuleSet_rule_char-128]
.addAll(UnicodeString(gRuleSet_rule_chars)).complement().freeze();
fRuleSets[kRuleSet_digit_char-128].add(u'0', u'9').freeze();
fRuleSets[kRuleSet_ascii_letter-128].add(u'A', u'Z').add(u'a', u'z').freeze();
- fRuleDigitsAlias = &fRuleSets[kRuleSet_digit_char-128];
-
+ fRuleDigitsAlias = &fRuleSets[kRuleSet_digit_char-128];
+
// Finally, initialize an empty UText string for utility purposes
fEmptyText = utext_openUChars(nullptr, nullptr, 0, status);
-
-}
-
-
-RegexStaticSets::~RegexStaticSets() {
+
+}
+
+
+RegexStaticSets::~RegexStaticSets() {
fRuleDigitsAlias = nullptr;
- utext_close(fEmptyText);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// regex_cleanup Memory cleanup function, free/delete all
-// cached memory. Called by ICU's u_cleanup() function.
-//
-//------------------------------------------------------------------------------
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-regex_cleanup(void) {
+ utext_close(fEmptyText);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// regex_cleanup Memory cleanup function, free/delete all
+// cached memory. Called by ICU's u_cleanup() function.
+//
+//------------------------------------------------------------------------------
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+regex_cleanup(void) {
delete RegexStaticSets::gStaticSets;
RegexStaticSets::gStaticSets = nullptr;
gStaticSetsInitOnce.reset();
return TRUE;
-}
-
-static void U_CALLCONV initStaticSets(UErrorCode &status) {
+}
+
+static void U_CALLCONV initStaticSets(UErrorCode &status) {
U_ASSERT(RegexStaticSets::gStaticSets == nullptr);
- ucln_i18n_registerCleanup(UCLN_I18N_REGEX, regex_cleanup);
- RegexStaticSets::gStaticSets = new RegexStaticSets(&status);
- if (U_FAILURE(status)) {
- delete RegexStaticSets::gStaticSets;
+ ucln_i18n_registerCleanup(UCLN_I18N_REGEX, regex_cleanup);
+ RegexStaticSets::gStaticSets = new RegexStaticSets(&status);
+ if (U_FAILURE(status)) {
+ delete RegexStaticSets::gStaticSets;
RegexStaticSets::gStaticSets = nullptr;
- }
+ }
if (RegexStaticSets::gStaticSets == nullptr && U_SUCCESS(status)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-U_CDECL_END
-
-void RegexStaticSets::initGlobals(UErrorCode *status) {
- umtx_initOnce(gStaticSetsInitOnce, &initStaticSets, *status);
-}
-
-U_NAMESPACE_END
-#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+U_CDECL_END
+
+void RegexStaticSets::initGlobals(UErrorCode *status) {
+ umtx_initOnce(gStaticSetsInitOnce, &initStaticSets, *status);
+}
+
+U_NAMESPACE_END
+#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS