diff options
author | neksard <neksard@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
commit | 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch) | |
tree | 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/include/unicode/caniter.h | |
parent | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff) | |
download | ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz |
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/include/unicode/caniter.h')
-rw-r--r-- | contrib/libs/icu/include/unicode/caniter.h | 410 |
1 files changed, 205 insertions, 205 deletions
diff --git a/contrib/libs/icu/include/unicode/caniter.h b/contrib/libs/icu/include/unicode/caniter.h index 13e524ff6a..724821f2b4 100644 --- a/contrib/libs/icu/include/unicode/caniter.h +++ b/contrib/libs/icu/include/unicode/caniter.h @@ -1,214 +1,214 @@ // © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * Copyright (C) 1996-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************************* - */ - -#ifndef CANITER_H -#define CANITER_H - -#include "unicode/utypes.h" - +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +#ifndef CANITER_H +#define CANITER_H + +#include "unicode/utypes.h" + #if U_SHOW_CPLUSPLUS_API -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: Canonical Iterator - */ - -/** Should permutation skip characters with combining class zero - * Should be either TRUE or FALSE. This is a compile time option - * @stable ICU 2.4 - */ -#ifndef CANITER_SKIP_ZEROES -#define CANITER_SKIP_ZEROES TRUE -#endif - -U_NAMESPACE_BEGIN - -class Hashtable; -class Normalizer2; -class Normalizer2Impl; - -/** - * This class allows one to iterate through all the strings that are canonically equivalent to a given - * string. For example, here are some sample results: -Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -1: \\u0041\\u030A\\u0064\\u0307\\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -2: \\u0041\\u030A\\u0064\\u0327\\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -3: \\u0041\\u030A\\u1E0B\\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -4: \\u0041\\u030A\\u1E11\\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -5: \\u00C5\\u0064\\u0307\\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -6: \\u00C5\\u0064\\u0327\\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -7: \\u00C5\\u1E0B\\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -8: \\u00C5\\u1E11\\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -9: \\u212B\\u0064\\u0307\\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -10: \\u212B\\u0064\\u0327\\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -11: \\u212B\\u1E0B\\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -12: \\u212B\\u1E11\\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} - *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones, - * since it has not been optimized for that situation. - * Note, CanonicalIterator is not intended to be subclassed. - * @author M. Davis - * @author C++ port by V. Weinstein - * @stable ICU 2.4 - */ -class U_COMMON_API CanonicalIterator U_FINAL : public UObject { -public: - /** - * Construct a CanonicalIterator object - * @param source string to get results for - * @param status Fill-in parameter which receives the status of this operation. - * @stable ICU 2.4 - */ - CanonicalIterator(const UnicodeString &source, UErrorCode &status); - - /** Destructor - * Cleans pieces - * @stable ICU 2.4 - */ - virtual ~CanonicalIterator(); - - /** - * Gets the NFD form of the current source we are iterating over. - * @return gets the source: NOTE: it is the NFD form of source - * @stable ICU 2.4 - */ - UnicodeString getSource(); - - /** - * Resets the iterator so that one can start again from the beginning. - * @stable ICU 2.4 - */ - void reset(); - - /** - * Get the next canonically equivalent string. - * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b> - * @return the next string that is canonically equivalent. A bogus string is returned when - * the iteration is done. - * @stable ICU 2.4 - */ - UnicodeString next(); - - /** - * Set a new source for this iterator. Allows object reuse. - * @param newSource the source string to iterate against. This allows the same iterator to be used - * while changing the source string, saving object creation. - * @param status Fill-in parameter which receives the status of this operation. - * @stable ICU 2.4 - */ - void setSource(const UnicodeString &newSource, UErrorCode &status); - -#ifndef U_HIDE_INTERNAL_API - /** - * Dumb recursive implementation of permutation. - * TODO: optimize - * @param source the string to find permutations for - * @param skipZeros determine if skip zeros - * @param result the results in a set. - * @param status Fill-in parameter which receives the status of this operation. - * @internal - */ - static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); -#endif /* U_HIDE_INTERNAL_API */ - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -private: - // ===================== PRIVATES ============================== - // private default constructor - CanonicalIterator(); - - - /** - * Copy constructor. Private for now. +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: Canonical Iterator + */ + +/** Should permutation skip characters with combining class zero + * Should be either TRUE or FALSE. This is a compile time option + * @stable ICU 2.4 + */ +#ifndef CANITER_SKIP_ZEROES +#define CANITER_SKIP_ZEROES TRUE +#endif + +U_NAMESPACE_BEGIN + +class Hashtable; +class Normalizer2; +class Normalizer2Impl; + +/** + * This class allows one to iterate through all the strings that are canonically equivalent to a given + * string. For example, here are some sample results: +Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +1: \\u0041\\u030A\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +2: \\u0041\\u030A\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +3: \\u0041\\u030A\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +4: \\u0041\\u030A\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +5: \\u00C5\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +6: \\u00C5\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +7: \\u00C5\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +8: \\u00C5\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +9: \\u212B\\u0064\\u0307\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +10: \\u212B\\u0064\\u0327\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +11: \\u212B\\u1E0B\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +12: \\u212B\\u1E11\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} + *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones, + * since it has not been optimized for that situation. + * Note, CanonicalIterator is not intended to be subclassed. + * @author M. Davis + * @author C++ port by V. Weinstein + * @stable ICU 2.4 + */ +class U_COMMON_API CanonicalIterator U_FINAL : public UObject { +public: + /** + * Construct a CanonicalIterator object + * @param source string to get results for + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + CanonicalIterator(const UnicodeString &source, UErrorCode &status); + + /** Destructor + * Cleans pieces + * @stable ICU 2.4 + */ + virtual ~CanonicalIterator(); + + /** + * Gets the NFD form of the current source we are iterating over. + * @return gets the source: NOTE: it is the NFD form of source + * @stable ICU 2.4 + */ + UnicodeString getSource(); + + /** + * Resets the iterator so that one can start again from the beginning. + * @stable ICU 2.4 + */ + void reset(); + + /** + * Get the next canonically equivalent string. + * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b> + * @return the next string that is canonically equivalent. A bogus string is returned when + * the iteration is done. + * @stable ICU 2.4 + */ + UnicodeString next(); + + /** + * Set a new source for this iterator. Allows object reuse. + * @param newSource the source string to iterate against. This allows the same iterator to be used + * while changing the source string, saving object creation. + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + void setSource(const UnicodeString &newSource, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Dumb recursive implementation of permutation. + * TODO: optimize + * @param source the string to find permutations for + * @param skipZeros determine if skip zeros + * @param result the results in a set. + * @param status Fill-in parameter which receives the status of this operation. + * @internal + */ + static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +private: + // ===================== PRIVATES ============================== + // private default constructor + CanonicalIterator(); + + + /** + * Copy constructor. Private for now. * @internal (private) - */ - CanonicalIterator(const CanonicalIterator& other); - - /** - * Assignment operator. Private for now. + */ + CanonicalIterator(const CanonicalIterator& other); + + /** + * Assignment operator. Private for now. * @internal (private) - */ - CanonicalIterator& operator=(const CanonicalIterator& other); - - // fields - UnicodeString source; - UBool done; - - // 2 dimensional array holds the pieces of the string with - // their different canonically equivalent representations - UnicodeString **pieces; - int32_t pieces_length; - int32_t *pieces_lengths; - - // current is used in iterating to combine pieces - int32_t *current; - int32_t current_length; - - // transient fields - UnicodeString buffer; - - const Normalizer2 &nfd; - const Normalizer2Impl &nfcImpl; - - // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. - UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) - - //Set getEquivalents2(String segment); + */ + CanonicalIterator& operator=(const CanonicalIterator& other); + + // fields + UnicodeString source; + UBool done; + + // 2 dimensional array holds the pieces of the string with + // their different canonically equivalent representations + UnicodeString **pieces; + int32_t pieces_length; + int32_t *pieces_lengths; + + // current is used in iterating to combine pieces + int32_t *current; + int32_t current_length; + + // transient fields + UnicodeString buffer; + + const Normalizer2 &nfd; + const Normalizer2Impl &nfcImpl; + + // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. + UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) + + //Set getEquivalents2(String segment); Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); - //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); - - /** - * See if the decomposition of cp2 is at segment starting at segmentPos - * (with canonical rearrangment!) - * If so, take the remainder, and return the equivalents - */ - //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); + //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); + + /** + * See if the decomposition of cp2 is at segment starting at segmentPos + * (with canonical rearrangment!) + * If so, take the remainder, and return the equivalents + */ + //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); - //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); - - void cleanPieces(); - -}; - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - + //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + + void cleanPieces(); + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ -#endif +#endif |