diff options
author | neksard <neksard@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
commit | 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch) | |
tree | 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/utf8collationiterator.h | |
parent | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff) | |
download | ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz |
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/utf8collationiterator.h')
-rw-r--r-- | contrib/libs/icu/i18n/utf8collationiterator.h | 346 |
1 files changed, 173 insertions, 173 deletions
diff --git a/contrib/libs/icu/i18n/utf8collationiterator.h b/contrib/libs/icu/i18n/utf8collationiterator.h index 9a3ec45aeb..560ffb38bc 100644 --- a/contrib/libs/icu/i18n/utf8collationiterator.h +++ b/contrib/libs/icu/i18n/utf8collationiterator.h @@ -1,174 +1,174 @@ // © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2012-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* utf8collationiterator.h -* -* created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h) -* created by: Markus W. Scherer -*/ - -#ifndef __UTF8COLLATIONITERATOR_H__ -#define __UTF8COLLATIONITERATOR_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -#include "cmemory.h" -#include "collation.h" -#include "collationdata.h" -#include "collationiterator.h" -#include "normalizer2impl.h" - -U_NAMESPACE_BEGIN - -/** - * UTF-8 collation element and character iterator. - * Handles normalized UTF-8 text inline, with length or NUL-terminated. - * Unnormalized text is handled by a subclass. - */ -class U_I18N_API UTF8CollationIterator : public CollationIterator { -public: - UTF8CollationIterator(const CollationData *d, UBool numeric, - const uint8_t *s, int32_t p, int32_t len) - : CollationIterator(d, numeric), - u8(s), pos(p), length(len) {} - - virtual ~UTF8CollationIterator(); - - virtual void resetToOffset(int32_t newOffset); - - virtual int32_t getOffset() const; - - virtual UChar32 nextCodePoint(UErrorCode &errorCode); - - virtual UChar32 previousCodePoint(UErrorCode &errorCode); - -protected: - /** - * For byte sequences that are illegal in UTF-8, an error value may be returned - * together with a bogus code point. The caller will ignore that code point. - * - * Special values may be returned for surrogate code points, which are also illegal in UTF-8, - * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE. - * - * Valid lead surrogates are returned from inside a normalized text segment, - * where handleGetTrailSurrogate() will return the matching trail surrogate. - */ - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); - - virtual UBool foundNULTerminator(); - - virtual UBool forbidSurrogateCodePoints() const; - - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); - - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); - - const uint8_t *u8; - int32_t pos; - int32_t length; // <0 for NUL-terminated strings -}; - -/** - * Incrementally checks the input text for FCD and normalizes where necessary. - */ -class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator { -public: - FCDUTF8CollationIterator(const CollationData *data, UBool numeric, - const uint8_t *s, int32_t p, int32_t len) - : UTF8CollationIterator(data, numeric, s, p, len), - state(CHECK_FWD), start(p), - nfcImpl(data->nfcImpl) {} - - virtual ~FCDUTF8CollationIterator(); - - virtual void resetToOffset(int32_t newOffset); - - virtual int32_t getOffset() const; - - virtual UChar32 nextCodePoint(UErrorCode &errorCode); - - virtual UChar32 previousCodePoint(UErrorCode &errorCode); - -protected: - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); - - virtual UChar handleGetTrailSurrogate(); - - virtual UBool foundNULTerminator(); - - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); - - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); - -private: - UBool nextHasLccc() const; - UBool previousHasTccc() const; - - /** - * Switches to forward checking if possible. - */ - void switchToForward(); - - /** - * Extends the FCD text segment forward or normalizes around pos. - * @return TRUE if success - */ - UBool nextSegment(UErrorCode &errorCode); - - /** - * Switches to backward checking. - */ - void switchToBackward(); - - /** - * Extends the FCD text segment backward or normalizes around pos. - * @return TRUE if success - */ - UBool previousSegment(UErrorCode &errorCode); - - UBool normalize(const UnicodeString &s, UErrorCode &errorCode); - - enum State { - /** - * The input text [start..pos[ passes the FCD check. - * Moving forward checks incrementally. - * limit is undefined. - */ - CHECK_FWD, - /** - * The input text [pos..limit[ passes the FCD check. - * Moving backward checks incrementally. - * start is undefined. - */ - CHECK_BWD, - /** - * The input text [start..limit[ passes the FCD check. - * pos tracks the current text index. - */ - IN_FCD_SEGMENT, - /** - * The input text [start..limit[ failed the FCD check and was normalized. - * pos tracks the current index in the normalized string. - */ - IN_NORMALIZED - }; - - State state; - - int32_t start; - int32_t limit; - - const Normalizer2Impl &nfcImpl; - UnicodeString normalized; -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_COLLATION -#endif // __UTF8COLLATIONITERATOR_H__ +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2012-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* utf8collationiterator.h +* +* created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h) +* created by: Markus W. Scherer +*/ + +#ifndef __UTF8COLLATIONITERATOR_H__ +#define __UTF8COLLATIONITERATOR_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "cmemory.h" +#include "collation.h" +#include "collationdata.h" +#include "collationiterator.h" +#include "normalizer2impl.h" + +U_NAMESPACE_BEGIN + +/** + * UTF-8 collation element and character iterator. + * Handles normalized UTF-8 text inline, with length or NUL-terminated. + * Unnormalized text is handled by a subclass. + */ +class U_I18N_API UTF8CollationIterator : public CollationIterator { +public: + UTF8CollationIterator(const CollationData *d, UBool numeric, + const uint8_t *s, int32_t p, int32_t len) + : CollationIterator(d, numeric), + u8(s), pos(p), length(len) {} + + virtual ~UTF8CollationIterator(); + + virtual void resetToOffset(int32_t newOffset); + + virtual int32_t getOffset() const; + + virtual UChar32 nextCodePoint(UErrorCode &errorCode); + + virtual UChar32 previousCodePoint(UErrorCode &errorCode); + +protected: + /** + * For byte sequences that are illegal in UTF-8, an error value may be returned + * together with a bogus code point. The caller will ignore that code point. + * + * Special values may be returned for surrogate code points, which are also illegal in UTF-8, + * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE. + * + * Valid lead surrogates are returned from inside a normalized text segment, + * where handleGetTrailSurrogate() will return the matching trail surrogate. + */ + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + + virtual UBool foundNULTerminator(); + + virtual UBool forbidSurrogateCodePoints() const; + + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + + const uint8_t *u8; + int32_t pos; + int32_t length; // <0 for NUL-terminated strings +}; + +/** + * Incrementally checks the input text for FCD and normalizes where necessary. + */ +class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator { +public: + FCDUTF8CollationIterator(const CollationData *data, UBool numeric, + const uint8_t *s, int32_t p, int32_t len) + : UTF8CollationIterator(data, numeric, s, p, len), + state(CHECK_FWD), start(p), + nfcImpl(data->nfcImpl) {} + + virtual ~FCDUTF8CollationIterator(); + + virtual void resetToOffset(int32_t newOffset); + + virtual int32_t getOffset() const; + + virtual UChar32 nextCodePoint(UErrorCode &errorCode); + + virtual UChar32 previousCodePoint(UErrorCode &errorCode); + +protected: + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + + virtual UChar handleGetTrailSurrogate(); + + virtual UBool foundNULTerminator(); + + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + +private: + UBool nextHasLccc() const; + UBool previousHasTccc() const; + + /** + * Switches to forward checking if possible. + */ + void switchToForward(); + + /** + * Extends the FCD text segment forward or normalizes around pos. + * @return TRUE if success + */ + UBool nextSegment(UErrorCode &errorCode); + + /** + * Switches to backward checking. + */ + void switchToBackward(); + + /** + * Extends the FCD text segment backward or normalizes around pos. + * @return TRUE if success + */ + UBool previousSegment(UErrorCode &errorCode); + + UBool normalize(const UnicodeString &s, UErrorCode &errorCode); + + enum State { + /** + * The input text [start..pos[ passes the FCD check. + * Moving forward checks incrementally. + * limit is undefined. + */ + CHECK_FWD, + /** + * The input text [pos..limit[ passes the FCD check. + * Moving backward checks incrementally. + * start is undefined. + */ + CHECK_BWD, + /** + * The input text [start..limit[ passes the FCD check. + * pos tracks the current text index. + */ + IN_FCD_SEGMENT, + /** + * The input text [start..limit[ failed the FCD check and was normalized. + * pos tracks the current index in the normalized string. + */ + IN_NORMALIZED + }; + + State state; + + int32_t start; + int32_t limit; + + const Normalizer2Impl &nfcImpl; + UnicodeString normalized; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_COLLATION +#endif // __UTF8COLLATIONITERATOR_H__ |