aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/include/unicode/uiter.h
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:33 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:33 +0300
commit1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
treeb2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/include/unicode/uiter.h
parent8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
downloadydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/include/unicode/uiter.h')
-rw-r--r--contrib/libs/icu/include/unicode/uiter.h1414
1 files changed, 707 insertions, 707 deletions
diff --git a/contrib/libs/icu/include/unicode/uiter.h b/contrib/libs/icu/include/unicode/uiter.h
index 03997b339d..11ad75acd5 100644
--- a/contrib/libs/icu/include/unicode/uiter.h
+++ b/contrib/libs/icu/include/unicode/uiter.h
@@ -1,709 +1,709 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2011 International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uiter.h
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jan18
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UITER_H__
-#define __UITER_H__
-
-/**
- * \file
- * \brief C API: Unicode Character Iteration
- *
- * @see UCharIterator
- */
-
-#include "unicode/utypes.h"
-
-#if U_SHOW_CPLUSPLUS_API
- U_NAMESPACE_BEGIN
-
- class CharacterIterator;
- class Replaceable;
-
- U_NAMESPACE_END
-#endif
-
-U_CDECL_BEGIN
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
-
-/**
- * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
- * @see UCharIteratorMove
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef enum UCharIteratorOrigin {
- UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
-} UCharIteratorOrigin;
-
-/** Constants for UCharIterator. @stable ICU 2.6 */
-enum {
- /**
- * Constant value that may be returned by UCharIteratorMove
- * indicating that the final UTF-16 index is not known, but that the move succeeded.
- * This can occur when moving relative to limit or length, or
- * when moving relative to the current index after a setState()
- * when the current UTF-16 index is not known.
- *
- * It would be very inefficient to have to count from the beginning of the text
- * just to get the current/limit/length index after moving relative to it.
- * The actual index can be determined with getIndex(UITER_CURRENT)
- * which will count the UChars if necessary.
- *
- * @stable ICU 2.6
- */
- UITER_UNKNOWN_INDEX=-2
-};
-
-
-/**
- * Constant for UCharIterator getState() indicating an error or
- * an unknown state.
- * Returned by uiter_getState()/UCharIteratorGetState
- * when an error occurs.
- * Also, some UCharIterator implementations may not be able to return
- * a valid state for each position. This will be clearly documented
- * for each such iterator (none of the public ones here).
- *
- * @stable ICU 2.6
- */
-#define UITER_NO_STATE ((uint32_t)0xffffffff)
-
-/**
- * Function type declaration for UCharIterator.getIndex().
- *
- * Gets the current position, or the start or limit of the
- * iteration range.
- *
- * This function may perform slowly for UITER_CURRENT after setState() was called,
- * or for UITER_LENGTH, because an iterator implementation may have to count
- * UChars if the underlying storage is not UTF-16.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param origin get the 0, start, limit, length, or current index
- * @return the requested index, or U_SENTINEL in an error condition
- *
- * @see UCharIteratorOrigin
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
-
-/**
- * Function type declaration for UCharIterator.move().
- *
- * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
- *
- * Moves the current position relative to the start or limit of the
- * iteration range, or relative to the current position itself.
- * The movement is expressed in numbers of code units forward
- * or backward by specifying a positive or negative delta.
- * Out of bounds movement will be pinned to the start or limit.
- *
- * This function may perform slowly for moving relative to UITER_LENGTH
- * because an iterator implementation may have to count the rest of the
- * UChars if the native storage is not UTF-16.
- *
- * When moving relative to the limit or length, or
- * relative to the current position after setState() was called,
- * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
- * determination of the actual UTF-16 index.
- * The actual index can be determined with getIndex(UITER_CURRENT)
- * which will count the UChars if necessary.
- * See UITER_UNKNOWN_INDEX for details.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param delta can be positive, zero, or negative
- * @param origin move relative to the 0, start, limit, length, or current index
- * @return the new index, or U_SENTINEL on an error condition,
- * or UITER_UNKNOWN_INDEX when the index is not known.
- *
- * @see UCharIteratorOrigin
- * @see UCharIterator
- * @see UITER_UNKNOWN_INDEX
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
-
-/**
- * Function type declaration for UCharIterator.hasNext().
- *
- * Check if current() and next() can still
- * return another code unit.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return boolean value for whether current() and next() can still return another code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UBool U_CALLCONV
-UCharIteratorHasNext(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.hasPrevious().
- *
- * Check if previous() can still return another code unit.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return boolean value for whether previous() can still return another code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UBool U_CALLCONV
-UCharIteratorHasPrevious(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.current().
- *
- * Return the code unit at the current position,
- * or U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorCurrent(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.next().
- *
- * Return the code unit at the current index and increment
- * the index (post-increment, like s[i++]),
- * or return U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code unit (and post-increment the current index)
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorNext(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.previous().
- *
- * Decrement the index and return the code unit from there
- * (pre-decrement, like s[--i]),
- * or return U_SENTINEL if there is none (index is at the start).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the previous code unit (after pre-decrementing the current index)
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorPrevious(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.reservedFn().
- * Reserved for future use.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param something some integer argument
- * @return some integer
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorReserved(UCharIterator *iter, int32_t something);
-
-/**
- * Function type declaration for UCharIterator.getState().
- *
- * Get the "state" of the iterator in the form of a single 32-bit word.
- * It is recommended that the state value be calculated to be as small as
- * is feasible. For strings with limited lengths, fewer than 32 bits may
- * be sufficient.
- *
- * This is used together with setState()/UCharIteratorSetState
- * to save and restore the iterator position more efficiently than with
- * getIndex()/move().
- *
- * The iterator state is defined as a uint32_t value because it is designed
- * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
- * of the character iterator.
- *
- * With some UCharIterator implementations (e.g., UTF-8),
- * getting and setting the UTF-16 index with existing functions
- * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
- * relatively slow because the iterator has to "walk" from a known index
- * to the requested one.
- * This takes more time the farther it needs to go.
- *
- * An opaque state value allows an iterator implementation to provide
- * an internal index (UTF-8: the source byte array index) for
- * fast, constant-time restoration.
- *
- * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
- * the UTF-16 index may not be restored as well, but the iterator can deliver
- * the correct text contents and move relative to the current position
- * without performance degradation.
- *
- * Some UCharIterator implementations may not be able to return
- * a valid state for each position, in which case they return UITER_NO_STATE instead.
- * This will be clearly documented for each such iterator (none of the public ones here).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the state word
- *
- * @see UCharIterator
- * @see UCharIteratorSetState
- * @see UITER_NO_STATE
- * @stable ICU 2.6
- */
-typedef uint32_t U_CALLCONV
-UCharIteratorGetState(const UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.setState().
- *
- * Restore the "state" of the iterator using a state word from a getState() call.
- * The iterator object need not be the same one as for which getState() was called,
- * but it must be of the same type (set up using the same uiter_setXYZ function)
- * and it must iterate over the same string
- * (binary identical regardless of memory address).
- * For more about the state word see UCharIteratorGetState.
- *
- * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
- * the UTF-16 index may not be restored as well, but the iterator can deliver
- * the correct text contents and move relative to the current position
- * without performance degradation.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param state the state word from a getState() call
- * on a same-type, same-string iterator
- * @param pErrorCode Must be a valid pointer to an error code value,
- * which must not indicate a failure before the function call.
- *
- * @see UCharIterator
- * @see UCharIteratorGetState
- * @stable ICU 2.6
- */
-typedef void U_CALLCONV
-UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
-
-
-/**
- * C API for code unit iteration.
- * This can be used as a C wrapper around
- * CharacterIterator, Replaceable, or implemented using simple strings, etc.
- *
- * There are two roles for using UCharIterator:
- *
- * A "provider" sets the necessary function pointers and controls the "protected"
- * fields of the UCharIterator structure. A "provider" passes a UCharIterator
- * into C APIs that need a UCharIterator as an abstract, flexible string interface.
- *
- * Implementations of such C APIs are "callers" of UCharIterator functions;
- * they only use the "public" function pointers and never access the "protected"
- * fields directly.
- *
- * The current() and next() functions only check the current index against the
- * limit, and previous() only checks the current index against the start,
- * to see if the iterator already reached the end of the iteration range.
- *
- * The assumption - in all iterators - is that the index is moved via the API,
- * which means it won't go out of bounds, or the index is modified by
- * user code that knows enough about the iterator implementation to set valid
- * index values.
- *
- * UCharIterator functions return code unit values 0..0xffff,
- * or U_SENTINEL if the iteration bounds are reached.
- *
- * @stable ICU 2.1
- */
-struct UCharIterator {
- /**
- * (protected) Pointer to string or wrapped object or similar.
- * Not used by caller.
- * @stable ICU 2.1
- */
- const void *context;
-
- /**
- * (protected) Length of string or similar.
- * Not used by caller.
- * @stable ICU 2.1
- */
- int32_t length;
-
- /**
- * (protected) Start index or similar.
- * Not used by caller.
- * @stable ICU 2.1
- */
- int32_t start;
-
- /**
- * (protected) Current index or similar.
- * Not used by caller.
- * @stable ICU 2.1
- */
- int32_t index;
-
- /**
- * (protected) Limit index or similar.
- * Not used by caller.
- * @stable ICU 2.1
- */
- int32_t limit;
-
- /**
- * (protected) Used by UTF-8 iterators and possibly others.
- * @stable ICU 2.1
- */
- int32_t reservedField;
-
- /**
- * (public) Returns the current position or the
- * start or limit index of the iteration range.
- *
- * @see UCharIteratorGetIndex
- * @stable ICU 2.1
- */
- UCharIteratorGetIndex *getIndex;
-
- /**
- * (public) Moves the current position relative to the start or limit of the
- * iteration range, or relative to the current position itself.
- * The movement is expressed in numbers of code units forward
- * or backward by specifying a positive or negative delta.
- *
- * @see UCharIteratorMove
- * @stable ICU 2.1
- */
- UCharIteratorMove *move;
-
- /**
- * (public) Check if current() and next() can still
- * return another code unit.
- *
- * @see UCharIteratorHasNext
- * @stable ICU 2.1
- */
- UCharIteratorHasNext *hasNext;
-
- /**
- * (public) Check if previous() can still return another code unit.
- *
- * @see UCharIteratorHasPrevious
- * @stable ICU 2.1
- */
- UCharIteratorHasPrevious *hasPrevious;
-
- /**
- * (public) Return the code unit at the current position,
- * or U_SENTINEL if there is none (index is at the limit).
- *
- * @see UCharIteratorCurrent
- * @stable ICU 2.1
- */
- UCharIteratorCurrent *current;
-
- /**
- * (public) Return the code unit at the current index and increment
- * the index (post-increment, like s[i++]),
- * or return U_SENTINEL if there is none (index is at the limit).
- *
- * @see UCharIteratorNext
- * @stable ICU 2.1
- */
- UCharIteratorNext *next;
-
- /**
- * (public) Decrement the index and return the code unit from there
- * (pre-decrement, like s[--i]),
- * or return U_SENTINEL if there is none (index is at the start).
- *
- * @see UCharIteratorPrevious
- * @stable ICU 2.1
- */
- UCharIteratorPrevious *previous;
-
- /**
- * (public) Reserved for future use. Currently NULL.
- *
- * @see UCharIteratorReserved
- * @stable ICU 2.1
- */
- UCharIteratorReserved *reservedFn;
-
- /**
- * (public) Return the state of the iterator, to be restored later with setState().
- * This function pointer is NULL if the iterator does not implement it.
- *
- * @see UCharIteratorGet
- * @stable ICU 2.6
- */
- UCharIteratorGetState *getState;
-
- /**
- * (public) Restore the iterator state from the state word from a call
- * to getState().
- * This function pointer is NULL if the iterator does not implement it.
- *
- * @see UCharIteratorSet
- * @stable ICU 2.6
- */
- UCharIteratorSetState *setState;
-};
-
-/**
- * Helper function for UCharIterator to get the code point
- * at the current index.
- *
- * Return the code point that includes the code unit at the current position,
- * or U_SENTINEL if there is none (index is at the limit).
- * If the current code unit is a lead or trail surrogate,
- * then the following or preceding surrogate is used to form
- * the code point value.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code point
- *
- * @see UCharIterator
- * @see U16_GET
- * @see UnicodeString::char32At()
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_current32(UCharIterator *iter);
-
-/**
- * Helper function for UCharIterator to get the next code point.
- *
- * Return the code point at the current index and increment
- * the index (post-increment, like s[i++]),
- * or return U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code point (and post-increment the current index)
- *
- * @see UCharIterator
- * @see U16_NEXT
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_next32(UCharIterator *iter);
-
-/**
- * Helper function for UCharIterator to get the previous code point.
- *
- * Decrement the index and return the code point from there
- * (pre-decrement, like s[--i]),
- * or return U_SENTINEL if there is none (index is at the start).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the previous code point (after pre-decrementing the current index)
- *
- * @see UCharIterator
- * @see U16_PREV
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_previous32(UCharIterator *iter);
-
-/**
- * Get the "state" of the iterator in the form of a single 32-bit word.
- * This is a convenience function that calls iter->getState(iter)
- * if iter->getState is not NULL;
- * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
- *
- * Some UCharIterator implementations may not be able to return
- * a valid state for each position, in which case they return UITER_NO_STATE instead.
- * This will be clearly documented for each such iterator (none of the public ones here).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the state word
- *
- * @see UCharIterator
- * @see UCharIteratorGetState
- * @see UITER_NO_STATE
- * @stable ICU 2.6
- */
-U_STABLE uint32_t U_EXPORT2
-uiter_getState(const UCharIterator *iter);
-
-/**
- * Restore the "state" of the iterator using a state word from a getState() call.
- * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
- * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param state the state word from a getState() call
- * on a same-type, same-string iterator
- * @param pErrorCode Must be a valid pointer to an error code value,
- * which must not indicate a failure before the function call.
- *
- * @see UCharIterator
- * @see UCharIteratorSetState
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
-
-/**
- * Set up a UCharIterator to iterate over a string.
- *
- * Sets the UCharIterator function pointers for iteration over the string s
- * with iteration boundaries start=index=0 and length=limit=string length.
- * The "provider" may set the start, index, and limit values at any time
- * within the range 0..length.
- * The length field will be ignored.
- *
- * The string pointer s is set into UCharIterator.context without copying
- * or reallocating the string contents.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s String to iterate over
- * @param length Length of s, or -1 if NUL-terminated
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
-
-/**
- * Set up a UCharIterator to iterate over a UTF-16BE string
- * (byte vector with a big-endian pair of bytes per UChar).
- *
- * Everything works just like with a normal UChar iterator (uiter_setString),
- * except that UChars are assembled from byte pairs,
- * and that the length argument here indicates an even number of bytes.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s UTF-16BE string to iterate over
- * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
- * (NUL means pair of 0 bytes at even index from s)
- *
- * @see UCharIterator
- * @see uiter_setString
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
-
-/**
- * Set up a UCharIterator to iterate over a UTF-8 string.
- *
- * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
- * with UTF-8 iteration boundaries 0 and length.
- * The implementation counts the UTF-16 index on the fly and
- * lazily evaluates the UTF-16 length of the text.
- *
- * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
- * When the reservedField is not 0, then it contains a supplementary code point
- * and the UTF-16 index is between the two corresponding surrogates.
- * At that point, the UTF-8 index is behind that code point.
- *
- * The UTF-8 string pointer s is set into UCharIterator.context without copying
- * or reallocating the string contents.
- *
- * getState() returns a state value consisting of
- * - the current UTF-8 source byte index (bits 31..1)
- * - a flag (bit 0) that indicates whether the UChar position is in the middle
- * of a surrogate pair
- * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
- *
- * getState() cannot also encode the UTF-16 index in the state value.
- * move(relative to limit or length), or
- * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s UTF-8 string to iterate over
- * @param length Length of s in bytes, or -1 if NUL-terminated
- *
- * @see UCharIterator
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
-
-#if U_SHOW_CPLUSPLUS_API
-
-/**
- * Set up a UCharIterator to wrap around a C++ CharacterIterator.
- *
- * Sets the UCharIterator function pointers for iteration using the
- * CharacterIterator charIter.
- *
- * The CharacterIterator pointer charIter is set into UCharIterator.context
- * without copying or cloning the CharacterIterator object.
- * The other "protected" UCharIterator fields are set to 0 and will be ignored.
- * The iteration index and boundaries are controlled by the CharacterIterator.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param charIter CharacterIterator to wrap
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
-
-/**
- * Set up a UCharIterator to iterate over a C++ Replaceable.
- *
- * Sets the UCharIterator function pointers for iteration over the
- * Replaceable rep with iteration boundaries start=index=0 and
- * length=limit=rep->length().
- * The "provider" may set the start, index, and limit values at any time
- * within the range 0..length=rep->length().
- * The length field will be ignored.
- *
- * The Replaceable pointer rep is set into UCharIterator.context without copying
- * or cloning/reallocating the Replaceable object.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param rep Replaceable to iterate over
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
-
-#endif
-
-U_CDECL_END
-
-#endif
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif