aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/rbt_pars.h
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:23 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:23 +0300
commit8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/rbt_pars.h
parentd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
downloadydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/rbt_pars.h')
-rw-r--r--contrib/libs/icu/i18n/rbt_pars.h712
1 files changed, 356 insertions, 356 deletions
diff --git a/contrib/libs/icu/i18n/rbt_pars.h b/contrib/libs/icu/i18n/rbt_pars.h
index 61ce9727e0..214152077d 100644
--- a/contrib/libs/icu/i18n/rbt_pars.h
+++ b/contrib/libs/icu/i18n/rbt_pars.h
@@ -1,357 +1,357 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines Corporation
-* and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/17/99 aliu Creation.
-**********************************************************************
-*/
-#ifndef RBT_PARS_H
-#define RBT_PARS_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_TRANSLITERATION
-#ifdef __cplusplus
-
-#include "unicode/uobject.h"
-#include "unicode/parseerr.h"
-#include "unicode/unorm.h"
-#include "rbt.h"
-#include "hash.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-class TransliterationRuleData;
-class UnicodeFunctor;
-class ParseData;
-class RuleHalf;
-class ParsePosition;
-class StringMatcher;
-
-class TransliteratorParser : public UMemory {
-
- public:
-
- /**
- * A Vector of TransliterationRuleData objects, one for each discrete group
- * of rules in the rule set
- */
- UVector dataVector;
-
- /**
- * PUBLIC data member.
- * A Vector of UnicodeStrings containing all of the ID blocks in the rule set
- */
- UVector idBlockVector;
-
- /**
- * PUBLIC data member containing the parsed compound filter, if any.
- */
- UnicodeSet* compoundFilter;
-
- private:
-
- /**
- * The current data object for which we are parsing rules
- */
- TransliterationRuleData* curData;
-
- UTransDirection direction;
-
- /**
- * Parse error information.
- */
- UParseError parseError;
-
- /**
- * Temporary symbol table used during parsing.
- */
- ParseData* parseData;
-
- /**
- * Temporary vector of matcher variables. When parsing is complete, this
- * is copied into the array data.variables. As with data.variables,
- * element 0 corresponds to character data.variablesBase.
- */
- UVector variablesVector;
-
- /**
- * Temporary table of variable names. When parsing is complete, this is
- * copied into data.variableNames.
- */
- Hashtable variableNames;
-
- /**
- * String of standins for segments. Used during the parsing of a single
- * rule. segmentStandins.charAt(0) is the standin for "$1" and corresponds
- * to StringMatcher object segmentObjects.elementAt(0), etc.
- */
- UnicodeString segmentStandins;
-
- /**
- * Vector of StringMatcher objects for segments. Used during the
- * parsing of a single rule.
- * segmentStandins.charAt(0) is the standin for "$1" and corresponds
- * to StringMatcher object segmentObjects.elementAt(0), etc.
- */
- UVector segmentObjects;
-
- /**
- * The next available stand-in for variables. This starts at some point in
- * the private use area (discovered dynamically) and increments up toward
- * <code>variableLimit</code>. At any point during parsing, available
- * variables are <code>variableNext..variableLimit-1</code>.
- */
- UChar variableNext;
-
- /**
- * The last available stand-in for variables. This is discovered
- * dynamically. At any point during parsing, available variables are
- * <code>variableNext..variableLimit-1</code>.
- */
- UChar variableLimit;
-
- /**
- * When we encounter an undefined variable, we do not immediately signal
- * an error, in case we are defining this variable, e.g., "$a = [a-z];".
- * Instead, we save the name of the undefined variable, and substitute
- * in the placeholder char variableLimit - 1, and decrement
- * variableLimit.
- */
- UnicodeString undefinedVariableName;
-
- /**
- * The stand-in character for the 'dot' set, represented by '.' in
- * patterns. This is allocated the first time it is needed, and
- * reused thereafter.
- */
- UChar dotStandIn;
-
-public:
-
- /**
- * Constructor.
- */
- TransliteratorParser(UErrorCode &statusReturn);
-
- /**
- * Destructor.
- */
- ~TransliteratorParser();
-
- /**
- * Parse the given string as a sequence of rules, separated by newline
- * characters ('\n'), and cause this object to implement those rules. Any
- * previous rules are discarded. Typically this method is called exactly
- * once after construction.
- *
- * Parse the given rules, in the given direction. After this call
- * returns, query the public data members for results. The caller
- * owns the 'data' and 'compoundFilter' data members after this
- * call returns.
- * @param rules rules, separated by ';'
- * @param direction either FORWARD or REVERSE.
- * @param pe Struct to recieve information on position
- * of error if an error is encountered
- * @param ec Output param set to success/failure code.
- */
- void parse(const UnicodeString& rules,
- UTransDirection direction,
- UParseError& pe,
- UErrorCode& ec);
-
- /**
- * Return the compound filter parsed by parse(). Caller owns result.
- * @return the compound filter parsed by parse().
- */
- UnicodeSet* orphanCompoundFilter();
-
-private:
-
- /**
- * Return a representation of this transliterator as source rules.
- * @param rules Output param to receive the rules.
- * @param direction either FORWARD or REVERSE.
- */
- void parseRules(const UnicodeString& rules,
- UTransDirection direction,
- UErrorCode& status);
-
- /**
- * MAIN PARSER. Parse the next rule in the given rule string, starting
- * at pos. Return the index after the last character parsed. Do not
- * parse characters at or after limit.
- *
- * Important: The character at pos must be a non-whitespace character
- * that is not the comment character.
- *
- * This method handles quoting, escaping, and whitespace removal. It
- * parses the end-of-rule character. It recognizes context and cursor
- * indicators. Once it does a lexical breakdown of the rule at pos, it
- * creates a rule object and adds it to our rule list.
- * @param rules Output param to receive the rules.
- * @param pos the starting position.
- * @param limit pointer past the last character of the rule.
- * @return the index after the last character parsed.
- */
- int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
-
- /**
- * Set the variable range to [start, end] (inclusive).
- * @param start the start value of the range.
- * @param end the end value of the range.
- */
- void setVariableRange(int32_t start, int32_t end, UErrorCode& status);
-
- /**
- * Assert that the given character is NOT within the variable range.
- * If it is, return FALSE. This is neccesary to ensure that the
- * variable range does not overlap characters used in a rule.
- * @param ch the given character.
- * @return True, if the given character is NOT within the variable range.
- */
- UBool checkVariableRange(UChar32 ch) const;
-
- /**
- * Set the maximum backup to 'backup', in response to a pragma
- * statement.
- * @param backup the new value to be set.
- */
- void pragmaMaximumBackup(int32_t backup);
-
- /**
- * Begin normalizing all rules using the given mode, in response
- * to a pragma statement.
- * @param mode the given mode.
- */
- void pragmaNormalizeRules(UNormalizationMode mode);
-
- /**
- * Return true if the given rule looks like a pragma.
- * @param pos offset to the first non-whitespace character
- * of the rule.
- * @param limit pointer past the last character of the rule.
- * @return true if the given rule looks like a pragma.
- */
- static UBool resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit);
-
- /**
- * Parse a pragma. This method assumes resemblesPragma() has
- * already returned true.
- * @param pos offset to the first non-whitespace character
- * of the rule.
- * @param limit pointer past the last character of the rule.
- * @return the position index after the final ';' of the pragma,
- * or -1 on failure.
- */
- int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
-
- /**
- * Called by main parser upon syntax error. Search the rule string
- * for the probable end of the rule. Of course, if the error is that
- * the end of rule marker is missing, then the rule end will not be found.
- * In any case the rule start will be correctly reported.
- * @param parseErrorCode error code.
- * @param msg error description.
- * @param start position of first character of current rule.
- * @return start position of first character of current rule.
- */
- int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start,
- UErrorCode& status);
-
- /**
- * Parse a UnicodeSet out, store it, and return the stand-in character
- * used to represent it.
- *
- * @param rule the rule for UnicodeSet.
- * @param pos the position in pattern at which to start parsing.
- * @return the stand-in character used to represent it.
- */
- UChar parseSet(const UnicodeString& rule,
- ParsePosition& pos,
- UErrorCode& status);
-
- /**
- * Generate and return a stand-in for a new UnicodeFunctor. Store
- * the matcher (adopt it).
- * @param adopted the UnicodeFunctor to be adopted.
- * @return a stand-in for a new UnicodeFunctor.
- */
- UChar generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status);
-
- /**
- * Return the standin for segment seg (1-based).
- * @param seg the given segment.
- * @return the standIn character for the given segment.
- */
- UChar getSegmentStandin(int32_t seg, UErrorCode& status);
-
- /**
- * Set the object for segment seg (1-based).
- * @param seg the given segment.
- * @param adopted the StringMatcher to be adopted.
- */
- void setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status);
-
- /**
- * Return the stand-in for the dot set. It is allocated the first
- * time and reused thereafter.
- * @return the stand-in for the dot set.
- */
- UChar getDotStandIn(UErrorCode& status);
-
- /**
- * Append the value of the given variable name to the given
- * UnicodeString.
- * @param name the variable name to be appended.
- * @param buf the given UnicodeString to append to.
- */
- void appendVariableDef(const UnicodeString& name,
- UnicodeString& buf,
- UErrorCode& status);
-
- /**
- * Glue method to get around access restrictions in C++.
- */
- /*static Transliterator* createBasicInstance(const UnicodeString& id,
- const UnicodeString* canonID);*/
-
- friend class RuleHalf;
-
- // Disallowed methods; no impl.
- /**
- * Copy constructor
- */
- TransliteratorParser(const TransliteratorParser&);
-
- /**
- * Assignment operator
- */
- TransliteratorParser& operator=(const TransliteratorParser&);
-};
-
-U_NAMESPACE_END
-
-#endif /* #ifdef __cplusplus */
-
-/**
- * Strip/convert the following from the transliterator rules:
- * comments
- * newlines
- * white space at the beginning and end of a line
- * unescape \u notation
- *
- * The target must be equal in size as the source.
- * @internal
- */
-U_CAPI int32_t
-utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status);
-
-#endif /* #if !UCONFIG_NO_TRANSLITERATION */
-
-#endif
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines Corporation
+* and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation.
+**********************************************************************
+*/
+#ifndef RBT_PARS_H
+#define RBT_PARS_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION
+#ifdef __cplusplus
+
+#include "unicode/uobject.h"
+#include "unicode/parseerr.h"
+#include "unicode/unorm.h"
+#include "rbt.h"
+#include "hash.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+class TransliterationRuleData;
+class UnicodeFunctor;
+class ParseData;
+class RuleHalf;
+class ParsePosition;
+class StringMatcher;
+
+class TransliteratorParser : public UMemory {
+
+ public:
+
+ /**
+ * A Vector of TransliterationRuleData objects, one for each discrete group
+ * of rules in the rule set
+ */
+ UVector dataVector;
+
+ /**
+ * PUBLIC data member.
+ * A Vector of UnicodeStrings containing all of the ID blocks in the rule set
+ */
+ UVector idBlockVector;
+
+ /**
+ * PUBLIC data member containing the parsed compound filter, if any.
+ */
+ UnicodeSet* compoundFilter;
+
+ private:
+
+ /**
+ * The current data object for which we are parsing rules
+ */
+ TransliterationRuleData* curData;
+
+ UTransDirection direction;
+
+ /**
+ * Parse error information.
+ */
+ UParseError parseError;
+
+ /**
+ * Temporary symbol table used during parsing.
+ */
+ ParseData* parseData;
+
+ /**
+ * Temporary vector of matcher variables. When parsing is complete, this
+ * is copied into the array data.variables. As with data.variables,
+ * element 0 corresponds to character data.variablesBase.
+ */
+ UVector variablesVector;
+
+ /**
+ * Temporary table of variable names. When parsing is complete, this is
+ * copied into data.variableNames.
+ */
+ Hashtable variableNames;
+
+ /**
+ * String of standins for segments. Used during the parsing of a single
+ * rule. segmentStandins.charAt(0) is the standin for "$1" and corresponds
+ * to StringMatcher object segmentObjects.elementAt(0), etc.
+ */
+ UnicodeString segmentStandins;
+
+ /**
+ * Vector of StringMatcher objects for segments. Used during the
+ * parsing of a single rule.
+ * segmentStandins.charAt(0) is the standin for "$1" and corresponds
+ * to StringMatcher object segmentObjects.elementAt(0), etc.
+ */
+ UVector segmentObjects;
+
+ /**
+ * The next available stand-in for variables. This starts at some point in
+ * the private use area (discovered dynamically) and increments up toward
+ * <code>variableLimit</code>. At any point during parsing, available
+ * variables are <code>variableNext..variableLimit-1</code>.
+ */
+ UChar variableNext;
+
+ /**
+ * The last available stand-in for variables. This is discovered
+ * dynamically. At any point during parsing, available variables are
+ * <code>variableNext..variableLimit-1</code>.
+ */
+ UChar variableLimit;
+
+ /**
+ * When we encounter an undefined variable, we do not immediately signal
+ * an error, in case we are defining this variable, e.g., "$a = [a-z];".
+ * Instead, we save the name of the undefined variable, and substitute
+ * in the placeholder char variableLimit - 1, and decrement
+ * variableLimit.
+ */
+ UnicodeString undefinedVariableName;
+
+ /**
+ * The stand-in character for the 'dot' set, represented by '.' in
+ * patterns. This is allocated the first time it is needed, and
+ * reused thereafter.
+ */
+ UChar dotStandIn;
+
+public:
+
+ /**
+ * Constructor.
+ */
+ TransliteratorParser(UErrorCode &statusReturn);
+
+ /**
+ * Destructor.
+ */
+ ~TransliteratorParser();
+
+ /**
+ * Parse the given string as a sequence of rules, separated by newline
+ * characters ('\n'), and cause this object to implement those rules. Any
+ * previous rules are discarded. Typically this method is called exactly
+ * once after construction.
+ *
+ * Parse the given rules, in the given direction. After this call
+ * returns, query the public data members for results. The caller
+ * owns the 'data' and 'compoundFilter' data members after this
+ * call returns.
+ * @param rules rules, separated by ';'
+ * @param direction either FORWARD or REVERSE.
+ * @param pe Struct to recieve information on position
+ * of error if an error is encountered
+ * @param ec Output param set to success/failure code.
+ */
+ void parse(const UnicodeString& rules,
+ UTransDirection direction,
+ UParseError& pe,
+ UErrorCode& ec);
+
+ /**
+ * Return the compound filter parsed by parse(). Caller owns result.
+ * @return the compound filter parsed by parse().
+ */
+ UnicodeSet* orphanCompoundFilter();
+
+private:
+
+ /**
+ * Return a representation of this transliterator as source rules.
+ * @param rules Output param to receive the rules.
+ * @param direction either FORWARD or REVERSE.
+ */
+ void parseRules(const UnicodeString& rules,
+ UTransDirection direction,
+ UErrorCode& status);
+
+ /**
+ * MAIN PARSER. Parse the next rule in the given rule string, starting
+ * at pos. Return the index after the last character parsed. Do not
+ * parse characters at or after limit.
+ *
+ * Important: The character at pos must be a non-whitespace character
+ * that is not the comment character.
+ *
+ * This method handles quoting, escaping, and whitespace removal. It
+ * parses the end-of-rule character. It recognizes context and cursor
+ * indicators. Once it does a lexical breakdown of the rule at pos, it
+ * creates a rule object and adds it to our rule list.
+ * @param rules Output param to receive the rules.
+ * @param pos the starting position.
+ * @param limit pointer past the last character of the rule.
+ * @return the index after the last character parsed.
+ */
+ int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
+
+ /**
+ * Set the variable range to [start, end] (inclusive).
+ * @param start the start value of the range.
+ * @param end the end value of the range.
+ */
+ void setVariableRange(int32_t start, int32_t end, UErrorCode& status);
+
+ /**
+ * Assert that the given character is NOT within the variable range.
+ * If it is, return FALSE. This is neccesary to ensure that the
+ * variable range does not overlap characters used in a rule.
+ * @param ch the given character.
+ * @return True, if the given character is NOT within the variable range.
+ */
+ UBool checkVariableRange(UChar32 ch) const;
+
+ /**
+ * Set the maximum backup to 'backup', in response to a pragma
+ * statement.
+ * @param backup the new value to be set.
+ */
+ void pragmaMaximumBackup(int32_t backup);
+
+ /**
+ * Begin normalizing all rules using the given mode, in response
+ * to a pragma statement.
+ * @param mode the given mode.
+ */
+ void pragmaNormalizeRules(UNormalizationMode mode);
+
+ /**
+ * Return true if the given rule looks like a pragma.
+ * @param pos offset to the first non-whitespace character
+ * of the rule.
+ * @param limit pointer past the last character of the rule.
+ * @return true if the given rule looks like a pragma.
+ */
+ static UBool resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit);
+
+ /**
+ * Parse a pragma. This method assumes resemblesPragma() has
+ * already returned true.
+ * @param pos offset to the first non-whitespace character
+ * of the rule.
+ * @param limit pointer past the last character of the rule.
+ * @return the position index after the final ';' of the pragma,
+ * or -1 on failure.
+ */
+ int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
+
+ /**
+ * Called by main parser upon syntax error. Search the rule string
+ * for the probable end of the rule. Of course, if the error is that
+ * the end of rule marker is missing, then the rule end will not be found.
+ * In any case the rule start will be correctly reported.
+ * @param parseErrorCode error code.
+ * @param msg error description.
+ * @param start position of first character of current rule.
+ * @return start position of first character of current rule.
+ */
+ int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start,
+ UErrorCode& status);
+
+ /**
+ * Parse a UnicodeSet out, store it, and return the stand-in character
+ * used to represent it.
+ *
+ * @param rule the rule for UnicodeSet.
+ * @param pos the position in pattern at which to start parsing.
+ * @return the stand-in character used to represent it.
+ */
+ UChar parseSet(const UnicodeString& rule,
+ ParsePosition& pos,
+ UErrorCode& status);
+
+ /**
+ * Generate and return a stand-in for a new UnicodeFunctor. Store
+ * the matcher (adopt it).
+ * @param adopted the UnicodeFunctor to be adopted.
+ * @return a stand-in for a new UnicodeFunctor.
+ */
+ UChar generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status);
+
+ /**
+ * Return the standin for segment seg (1-based).
+ * @param seg the given segment.
+ * @return the standIn character for the given segment.
+ */
+ UChar getSegmentStandin(int32_t seg, UErrorCode& status);
+
+ /**
+ * Set the object for segment seg (1-based).
+ * @param seg the given segment.
+ * @param adopted the StringMatcher to be adopted.
+ */
+ void setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status);
+
+ /**
+ * Return the stand-in for the dot set. It is allocated the first
+ * time and reused thereafter.
+ * @return the stand-in for the dot set.
+ */
+ UChar getDotStandIn(UErrorCode& status);
+
+ /**
+ * Append the value of the given variable name to the given
+ * UnicodeString.
+ * @param name the variable name to be appended.
+ * @param buf the given UnicodeString to append to.
+ */
+ void appendVariableDef(const UnicodeString& name,
+ UnicodeString& buf,
+ UErrorCode& status);
+
+ /**
+ * Glue method to get around access restrictions in C++.
+ */
+ /*static Transliterator* createBasicInstance(const UnicodeString& id,
+ const UnicodeString* canonID);*/
+
+ friend class RuleHalf;
+
+ // Disallowed methods; no impl.
+ /**
+ * Copy constructor
+ */
+ TransliteratorParser(const TransliteratorParser&);
+
+ /**
+ * Assignment operator
+ */
+ TransliteratorParser& operator=(const TransliteratorParser&);
+};
+
+U_NAMESPACE_END
+
+#endif /* #ifdef __cplusplus */
+
+/**
+ * Strip/convert the following from the transliterator rules:
+ * comments
+ * newlines
+ * white space at the beginning and end of a line
+ * unescape \u notation
+ *
+ * The target must be equal in size as the source.
+ * @internal
+ */
+U_CAPI int32_t
+utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status);
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif