aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/utf8collationiterator.h
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:23 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:23 +0300
commit8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/utf8collationiterator.h
parentd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
downloadydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/utf8collationiterator.h')
-rw-r--r--contrib/libs/icu/i18n/utf8collationiterator.h346
1 files changed, 173 insertions, 173 deletions
diff --git a/contrib/libs/icu/i18n/utf8collationiterator.h b/contrib/libs/icu/i18n/utf8collationiterator.h
index 9a3ec45aeb..560ffb38bc 100644
--- a/contrib/libs/icu/i18n/utf8collationiterator.h
+++ b/contrib/libs/icu/i18n/utf8collationiterator.h
@@ -1,174 +1,174 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2012-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* utf8collationiterator.h
-*
-* created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h)
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UTF8COLLATIONITERATOR_H__
-#define __UTF8COLLATIONITERATOR_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "cmemory.h"
-#include "collation.h"
-#include "collationdata.h"
-#include "collationiterator.h"
-#include "normalizer2impl.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * UTF-8 collation element and character iterator.
- * Handles normalized UTF-8 text inline, with length or NUL-terminated.
- * Unnormalized text is handled by a subclass.
- */
-class U_I18N_API UTF8CollationIterator : public CollationIterator {
-public:
- UTF8CollationIterator(const CollationData *d, UBool numeric,
- const uint8_t *s, int32_t p, int32_t len)
- : CollationIterator(d, numeric),
- u8(s), pos(p), length(len) {}
-
- virtual ~UTF8CollationIterator();
-
- virtual void resetToOffset(int32_t newOffset);
-
- virtual int32_t getOffset() const;
-
- virtual UChar32 nextCodePoint(UErrorCode &errorCode);
-
- virtual UChar32 previousCodePoint(UErrorCode &errorCode);
-
-protected:
- /**
- * For byte sequences that are illegal in UTF-8, an error value may be returned
- * together with a bogus code point. The caller will ignore that code point.
- *
- * Special values may be returned for surrogate code points, which are also illegal in UTF-8,
- * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE.
- *
- * Valid lead surrogates are returned from inside a normalized text segment,
- * where handleGetTrailSurrogate() will return the matching trail surrogate.
- */
- virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
-
- virtual UBool foundNULTerminator();
-
- virtual UBool forbidSurrogateCodePoints() const;
-
- virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
-
- virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
-
- const uint8_t *u8;
- int32_t pos;
- int32_t length; // <0 for NUL-terminated strings
-};
-
-/**
- * Incrementally checks the input text for FCD and normalizes where necessary.
- */
-class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator {
-public:
- FCDUTF8CollationIterator(const CollationData *data, UBool numeric,
- const uint8_t *s, int32_t p, int32_t len)
- : UTF8CollationIterator(data, numeric, s, p, len),
- state(CHECK_FWD), start(p),
- nfcImpl(data->nfcImpl) {}
-
- virtual ~FCDUTF8CollationIterator();
-
- virtual void resetToOffset(int32_t newOffset);
-
- virtual int32_t getOffset() const;
-
- virtual UChar32 nextCodePoint(UErrorCode &errorCode);
-
- virtual UChar32 previousCodePoint(UErrorCode &errorCode);
-
-protected:
- virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
-
- virtual UChar handleGetTrailSurrogate();
-
- virtual UBool foundNULTerminator();
-
- virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
-
- virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
-
-private:
- UBool nextHasLccc() const;
- UBool previousHasTccc() const;
-
- /**
- * Switches to forward checking if possible.
- */
- void switchToForward();
-
- /**
- * Extends the FCD text segment forward or normalizes around pos.
- * @return TRUE if success
- */
- UBool nextSegment(UErrorCode &errorCode);
-
- /**
- * Switches to backward checking.
- */
- void switchToBackward();
-
- /**
- * Extends the FCD text segment backward or normalizes around pos.
- * @return TRUE if success
- */
- UBool previousSegment(UErrorCode &errorCode);
-
- UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
-
- enum State {
- /**
- * The input text [start..pos[ passes the FCD check.
- * Moving forward checks incrementally.
- * limit is undefined.
- */
- CHECK_FWD,
- /**
- * The input text [pos..limit[ passes the FCD check.
- * Moving backward checks incrementally.
- * start is undefined.
- */
- CHECK_BWD,
- /**
- * The input text [start..limit[ passes the FCD check.
- * pos tracks the current text index.
- */
- IN_FCD_SEGMENT,
- /**
- * The input text [start..limit[ failed the FCD check and was normalized.
- * pos tracks the current index in the normalized string.
- */
- IN_NORMALIZED
- };
-
- State state;
-
- int32_t start;
- int32_t limit;
-
- const Normalizer2Impl &nfcImpl;
- UnicodeString normalized;
-};
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_COLLATION
-#endif // __UTF8COLLATIONITERATOR_H__
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2012-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* utf8collationiterator.h
+*
+* created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h)
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTF8COLLATIONITERATOR_H__
+#define __UTF8COLLATIONITERATOR_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "cmemory.h"
+#include "collation.h"
+#include "collationdata.h"
+#include "collationiterator.h"
+#include "normalizer2impl.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UTF-8 collation element and character iterator.
+ * Handles normalized UTF-8 text inline, with length or NUL-terminated.
+ * Unnormalized text is handled by a subclass.
+ */
+class U_I18N_API UTF8CollationIterator : public CollationIterator {
+public:
+ UTF8CollationIterator(const CollationData *d, UBool numeric,
+ const uint8_t *s, int32_t p, int32_t len)
+ : CollationIterator(d, numeric),
+ u8(s), pos(p), length(len) {}
+
+ virtual ~UTF8CollationIterator();
+
+ virtual void resetToOffset(int32_t newOffset);
+
+ virtual int32_t getOffset() const;
+
+ virtual UChar32 nextCodePoint(UErrorCode &errorCode);
+
+ virtual UChar32 previousCodePoint(UErrorCode &errorCode);
+
+protected:
+ /**
+ * For byte sequences that are illegal in UTF-8, an error value may be returned
+ * together with a bogus code point. The caller will ignore that code point.
+ *
+ * Special values may be returned for surrogate code points, which are also illegal in UTF-8,
+ * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE.
+ *
+ * Valid lead surrogates are returned from inside a normalized text segment,
+ * where handleGetTrailSurrogate() will return the matching trail surrogate.
+ */
+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
+
+ virtual UBool foundNULTerminator();
+
+ virtual UBool forbidSurrogateCodePoints() const;
+
+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
+
+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
+
+ const uint8_t *u8;
+ int32_t pos;
+ int32_t length; // <0 for NUL-terminated strings
+};
+
+/**
+ * Incrementally checks the input text for FCD and normalizes where necessary.
+ */
+class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator {
+public:
+ FCDUTF8CollationIterator(const CollationData *data, UBool numeric,
+ const uint8_t *s, int32_t p, int32_t len)
+ : UTF8CollationIterator(data, numeric, s, p, len),
+ state(CHECK_FWD), start(p),
+ nfcImpl(data->nfcImpl) {}
+
+ virtual ~FCDUTF8CollationIterator();
+
+ virtual void resetToOffset(int32_t newOffset);
+
+ virtual int32_t getOffset() const;
+
+ virtual UChar32 nextCodePoint(UErrorCode &errorCode);
+
+ virtual UChar32 previousCodePoint(UErrorCode &errorCode);
+
+protected:
+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
+
+ virtual UChar handleGetTrailSurrogate();
+
+ virtual UBool foundNULTerminator();
+
+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
+
+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
+
+private:
+ UBool nextHasLccc() const;
+ UBool previousHasTccc() const;
+
+ /**
+ * Switches to forward checking if possible.
+ */
+ void switchToForward();
+
+ /**
+ * Extends the FCD text segment forward or normalizes around pos.
+ * @return TRUE if success
+ */
+ UBool nextSegment(UErrorCode &errorCode);
+
+ /**
+ * Switches to backward checking.
+ */
+ void switchToBackward();
+
+ /**
+ * Extends the FCD text segment backward or normalizes around pos.
+ * @return TRUE if success
+ */
+ UBool previousSegment(UErrorCode &errorCode);
+
+ UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
+
+ enum State {
+ /**
+ * The input text [start..pos[ passes the FCD check.
+ * Moving forward checks incrementally.
+ * limit is undefined.
+ */
+ CHECK_FWD,
+ /**
+ * The input text [pos..limit[ passes the FCD check.
+ * Moving backward checks incrementally.
+ * start is undefined.
+ */
+ CHECK_BWD,
+ /**
+ * The input text [start..limit[ passes the FCD check.
+ * pos tracks the current text index.
+ */
+ IN_FCD_SEGMENT,
+ /**
+ * The input text [start..limit[ failed the FCD check and was normalized.
+ * pos tracks the current index in the normalized string.
+ */
+ IN_NORMALIZED
+ };
+
+ State state;
+
+ int32_t start;
+ int32_t limit;
+
+ const Normalizer2Impl &nfcImpl;
+ UnicodeString normalized;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION
+#endif // __UTF8COLLATIONITERATOR_H__