aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/include/unicode/rbbi.h
diff options
context:
space:
mode:
authorromankoshelev <romankoshelev@yandex-team.com>2023-08-14 19:51:50 +0300
committerromankoshelev <romankoshelev@yandex-team.com>2023-08-15 01:24:11 +0300
commitcfcd865e05c0d0525ea27d1e153a043b32a85138 (patch)
tree68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/include/unicode/rbbi.h
parentccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff)
downloadydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz
Update ICU to 73.2
Diffstat (limited to 'contrib/libs/icu/include/unicode/rbbi.h')
-rw-r--r--contrib/libs/icu/include/unicode/rbbi.h99
1 files changed, 56 insertions, 43 deletions
diff --git a/contrib/libs/icu/include/unicode/rbbi.h b/contrib/libs/icu/include/unicode/rbbi.h
index 0ce93819f5..418b52e41f 100644
--- a/contrib/libs/icu/include/unicode/rbbi.h
+++ b/contrib/libs/icu/include/unicode/rbbi.h
@@ -54,14 +54,14 @@ class UStack;
*
* <p>This class is not intended to be subclassed.</p>
*/
-class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
+class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
private:
/**
* The UText through which this BreakIterator accesses the text
* @internal (private)
*/
- UText fText;
+ UText fText = UTEXT_INITIALIZER;
#ifndef U_HIDE_INTERNAL_API
public:
@@ -71,32 +71,38 @@ public:
* Not for general use; Public only for testing purposes.
* @internal
*/
- RBBIDataWrapper *fData;
+ RBBIDataWrapper *fData = nullptr;
+
private:
+ /**
+ * The saved error code associated with this break iterator.
+ * This is the value to be returned by copyErrorTo().
+ */
+ UErrorCode fErrorCode = U_ZERO_ERROR;
/**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
*/
- int32_t fPosition;
+ int32_t fPosition = 0;
/**
* TODO:
*/
- int32_t fRuleStatusIndex;
+ int32_t fRuleStatusIndex = 0;
/**
* Cache of previously determined boundary positions.
*/
class BreakCache;
- BreakCache *fBreakCache;
+ BreakCache *fBreakCache = nullptr;
/**
* Cache of boundary positions within a region of text that has been
* sub-divided by dictionary based breaking.
*/
class DictionaryCache;
- DictionaryCache *fDictionaryCache;
+ DictionaryCache *fDictionaryCache = nullptr;
/**
*
@@ -105,7 +111,7 @@ private:
* handle a given character.
* @internal (private)
*/
- UStack *fLanguageBreakEngines;
+ UStack *fLanguageBreakEngines = nullptr;
/**
*
@@ -114,38 +120,43 @@ private:
* LanguageBreakEngine.
* @internal (private)
*/
- UnhandledEngine *fUnhandledBreakEngine;
+ UnhandledEngine *fUnhandledBreakEngine = nullptr;
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal (private)
*/
- uint32_t fDictionaryCharCount;
+ uint32_t fDictionaryCharCount = 0;
/**
* A character iterator that refers to the same text as the UText, above.
* Only included for compatibility with old API, which was based on CharacterIterators.
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
*/
- CharacterIterator *fCharIter;
+ CharacterIterator *fCharIter = &fSCharIter;
/**
* When the input text is provided by a UnicodeString, this will point to
* a characterIterator that wraps that data. Needed only for the
* implementation of getText(), a backwards compatibility issue.
*/
- StringCharacterIterator fSCharIter;
+ UCharCharacterIterator fSCharIter {u"", 0};
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
- UBool fDone;
+ bool fDone = false;
/**
* Array of look-ahead tentative results.
*/
- int32_t *fLookAheadMatches;
+ int32_t *fLookAheadMatches = nullptr;
+
+ /**
+ * A flag to indicate if phrase based breaking is enabled.
+ */
+ UBool fIsPhraseBreaking = false;
//=======================================================================
// constructors
@@ -163,15 +174,39 @@ private:
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
+ /**
+ * This constructor uses the udata interface to create a BreakIterator
+ * whose internal tables live in a memory-mapped file. "image" is an
+ * ICU UDataMemory handle for the pre-compiled break iterator tables.
+ * @param image handle to the memory image for the break iterator data.
+ * Ownership of the UDataMemory handle passes to the Break Iterator,
+ * which will be responsible for closing it when it is no longer needed.
+ * @param status Information on any errors encountered.
+ * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
+ * @see udata_open
+ * @see #getBinaryRules
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
+
/** @internal */
friend class RBBIRuleBuilder;
/** @internal */
friend class BreakIterator;
+ /**
+ * Default constructor with an error code parameter.
+ * Aside from error handling, otherwise identical to the default constructor.
+ * Internally, handles common initialization for other constructors.
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(UErrorCode *status);
+
public:
/** Default constructor. Creates an empty shell of an iterator, with no
- * rules or text to iterate over. Object can subsequently be assigned to.
+ * rules or text to iterate over. Object can subsequently be assigned to,
+ * but is otherwise unusable.
* @stable ICU 2.2
*/
RuleBasedBreakIterator();
@@ -269,7 +304,9 @@ public:
* @return true if both BreakIterators are not same.
* @stable ICU 2.0
*/
- inline bool operator!=(const BreakIterator& that) const;
+ inline bool operator!=(const BreakIterator& that) const {
+ return !operator==(that);
+ }
/**
* Returns a newly-constructed RuleBasedBreakIterator with the same
@@ -315,8 +352,7 @@ public:
* </p>
* <p>
* When the break iterator is operating on text supplied via a UText,
- * this function will fail. Lacking any way to signal failures, it
- * returns an CharacterIterator containing no text.
+ * this function will fail, returning a CharacterIterator containing no text.
* The function getUText() provides similar functionality,
* is reliable, and is more efficient.
* </p>
@@ -336,7 +372,7 @@ public:
* access the text without impacting any break iterator operations,
* but the underlying text itself must not be altered.
*
- * @param fillIn A UText to be filled in. If NULL, a new UText will be
+ * @param fillIn A UText to be filled in. If nullptr, a new UText will be
* allocated to hold the result.
* @param status receives any error codes.
* @return The current UText for this break iterator. If an input
@@ -556,7 +592,7 @@ public:
* tricky. Use clone() instead.
*
* @param stackBuffer The pointer to the memory into which the cloned object
- * should be placed. If NULL, allocate heap memory
+ * should be placed. If nullptr, allocate heap memory
* for the cloned object.
* @param BufferSize The size of the buffer. If zero, return the required
* buffer size, but do not clone the object. If the
@@ -629,19 +665,6 @@ private:
// implementation
//=======================================================================
/**
- * Dumps caches and performs other actions associated with a complete change
- * in text or iteration position.
- * @internal (private)
- */
- void reset(void);
-
- /**
- * Common initialization function, used by constructors and bufferClone.
- * @internal (private)
- */
- void init(UErrorCode &status);
-
- /**
* Iterate backwards from an arbitrary position in the input text using the
* synthesized Safe Reverse rules.
* This locates a "Safe Position" from which the forward break rules
@@ -713,16 +736,6 @@ private:
#endif /* U_HIDE_INTERNAL_API */
};
-//------------------------------------------------------------------------------
-//
-// Inline Functions Definitions ...
-//
-//------------------------------------------------------------------------------
-
-inline bool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
- return !operator==(that);
-}
-
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */