diff options
author | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-14 19:51:50 +0300 |
---|---|---|
committer | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-15 01:24:11 +0300 |
commit | cfcd865e05c0d0525ea27d1e153a043b32a85138 (patch) | |
tree | 68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/include/unicode/rbbi.h | |
parent | ccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff) | |
download | ydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz |
Update ICU to 73.2
Diffstat (limited to 'contrib/libs/icu/include/unicode/rbbi.h')
-rw-r--r-- | contrib/libs/icu/include/unicode/rbbi.h | 99 |
1 files changed, 56 insertions, 43 deletions
diff --git a/contrib/libs/icu/include/unicode/rbbi.h b/contrib/libs/icu/include/unicode/rbbi.h index 0ce93819f5..418b52e41f 100644 --- a/contrib/libs/icu/include/unicode/rbbi.h +++ b/contrib/libs/icu/include/unicode/rbbi.h @@ -54,14 +54,14 @@ class UStack; * * <p>This class is not intended to be subclassed.</p> */ -class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { +class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator { private: /** * The UText through which this BreakIterator accesses the text * @internal (private) */ - UText fText; + UText fText = UTEXT_INITIALIZER; #ifndef U_HIDE_INTERNAL_API public: @@ -71,32 +71,38 @@ public: * Not for general use; Public only for testing purposes. * @internal */ - RBBIDataWrapper *fData; + RBBIDataWrapper *fData = nullptr; + private: + /** + * The saved error code associated with this break iterator. + * This is the value to be returned by copyErrorTo(). + */ + UErrorCode fErrorCode = U_ZERO_ERROR; /** * The current position of the iterator. Pinned, 0 < fPosition <= text.length. * Never has the value UBRK_DONE (-1). */ - int32_t fPosition; + int32_t fPosition = 0; /** * TODO: */ - int32_t fRuleStatusIndex; + int32_t fRuleStatusIndex = 0; /** * Cache of previously determined boundary positions. */ class BreakCache; - BreakCache *fBreakCache; + BreakCache *fBreakCache = nullptr; /** * Cache of boundary positions within a region of text that has been * sub-divided by dictionary based breaking. */ class DictionaryCache; - DictionaryCache *fDictionaryCache; + DictionaryCache *fDictionaryCache = nullptr; /** * @@ -105,7 +111,7 @@ private: * handle a given character. * @internal (private) */ - UStack *fLanguageBreakEngines; + UStack *fLanguageBreakEngines = nullptr; /** * @@ -114,38 +120,43 @@ private: * LanguageBreakEngine. * @internal (private) */ - UnhandledEngine *fUnhandledBreakEngine; + UnhandledEngine *fUnhandledBreakEngine = nullptr; /** * Counter for the number of characters encountered with the "dictionary" * flag set. * @internal (private) */ - uint32_t fDictionaryCharCount; + uint32_t fDictionaryCharCount = 0; /** * A character iterator that refers to the same text as the UText, above. * Only included for compatibility with old API, which was based on CharacterIterators. * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. */ - CharacterIterator *fCharIter; + CharacterIterator *fCharIter = &fSCharIter; /** * When the input text is provided by a UnicodeString, this will point to * a characterIterator that wraps that data. Needed only for the * implementation of getText(), a backwards compatibility issue. */ - StringCharacterIterator fSCharIter; + UCharCharacterIterator fSCharIter {u"", 0}; /** * True when iteration has run off the end, and iterator functions should return UBRK_DONE. */ - UBool fDone; + bool fDone = false; /** * Array of look-ahead tentative results. */ - int32_t *fLookAheadMatches; + int32_t *fLookAheadMatches = nullptr; + + /** + * A flag to indicate if phrase based breaking is enabled. + */ + UBool fIsPhraseBreaking = false; //======================================================================= // constructors @@ -163,15 +174,39 @@ private: */ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); + /** + * This constructor uses the udata interface to create a BreakIterator + * whose internal tables live in a memory-mapped file. "image" is an + * ICU UDataMemory handle for the pre-compiled break iterator tables. + * @param image handle to the memory image for the break iterator data. + * Ownership of the UDataMemory handle passes to the Break Iterator, + * which will be responsible for closing it when it is no longer needed. + * @param status Information on any errors encountered. + * @param isPhraseBreaking true if phrase based breaking is required, otherwise false. + * @see udata_open + * @see #getBinaryRules + * @internal (private) + */ + RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status); + /** @internal */ friend class RBBIRuleBuilder; /** @internal */ friend class BreakIterator; + /** + * Default constructor with an error code parameter. + * Aside from error handling, otherwise identical to the default constructor. + * Internally, handles common initialization for other constructors. + * @internal (private) + */ + RuleBasedBreakIterator(UErrorCode *status); + public: /** Default constructor. Creates an empty shell of an iterator, with no - * rules or text to iterate over. Object can subsequently be assigned to. + * rules or text to iterate over. Object can subsequently be assigned to, + * but is otherwise unusable. * @stable ICU 2.2 */ RuleBasedBreakIterator(); @@ -269,7 +304,9 @@ public: * @return true if both BreakIterators are not same. * @stable ICU 2.0 */ - inline bool operator!=(const BreakIterator& that) const; + inline bool operator!=(const BreakIterator& that) const { + return !operator==(that); + } /** * Returns a newly-constructed RuleBasedBreakIterator with the same @@ -315,8 +352,7 @@ public: * </p> * <p> * When the break iterator is operating on text supplied via a UText, - * this function will fail. Lacking any way to signal failures, it - * returns an CharacterIterator containing no text. + * this function will fail, returning a CharacterIterator containing no text. * The function getUText() provides similar functionality, * is reliable, and is more efficient. * </p> @@ -336,7 +372,7 @@ public: * access the text without impacting any break iterator operations, * but the underlying text itself must not be altered. * - * @param fillIn A UText to be filled in. If NULL, a new UText will be + * @param fillIn A UText to be filled in. If nullptr, a new UText will be * allocated to hold the result. * @param status receives any error codes. * @return The current UText for this break iterator. If an input @@ -556,7 +592,7 @@ public: * tricky. Use clone() instead. * * @param stackBuffer The pointer to the memory into which the cloned object - * should be placed. If NULL, allocate heap memory + * should be placed. If nullptr, allocate heap memory * for the cloned object. * @param BufferSize The size of the buffer. If zero, return the required * buffer size, but do not clone the object. If the @@ -629,19 +665,6 @@ private: // implementation //======================================================================= /** - * Dumps caches and performs other actions associated with a complete change - * in text or iteration position. - * @internal (private) - */ - void reset(void); - - /** - * Common initialization function, used by constructors and bufferClone. - * @internal (private) - */ - void init(UErrorCode &status); - - /** * Iterate backwards from an arbitrary position in the input text using the * synthesized Safe Reverse rules. * This locates a "Safe Position" from which the forward break rules @@ -713,16 +736,6 @@ private: #endif /* U_HIDE_INTERNAL_API */ }; -//------------------------------------------------------------------------------ -// -// Inline Functions Definitions ... -// -//------------------------------------------------------------------------------ - -inline bool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { - return !operator==(that); -} - U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |