diff options
author | neksard <[email protected]> | 2022-02-10 16:45:23 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:45:23 +0300 |
commit | 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch) | |
tree | 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/common/messagepattern.cpp | |
parent | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff) |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/common/messagepattern.cpp')
-rw-r--r-- | contrib/libs/icu/common/messagepattern.cpp | 2462 |
1 files changed, 1231 insertions, 1231 deletions
diff --git a/contrib/libs/icu/common/messagepattern.cpp b/contrib/libs/icu/common/messagepattern.cpp index f223d067116..160c9c395c4 100644 --- a/contrib/libs/icu/common/messagepattern.cpp +++ b/contrib/libs/icu/common/messagepattern.cpp @@ -1,1233 +1,1233 @@ // © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: messagepattern.cpp +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: messagepattern.cpp * encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011mar14 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/messagepattern.h" -#include "unicode/unistr.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "cstring.h" -#include "messageimpl.h" -#include "patternprops.h" -#include "putilimp.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -// Unicode character/code point constants ---------------------------------- *** - -static const UChar u_pound=0x23; -static const UChar u_apos=0x27; -static const UChar u_plus=0x2B; -static const UChar u_comma=0x2C; -static const UChar u_minus=0x2D; -static const UChar u_dot=0x2E; -static const UChar u_colon=0x3A; -static const UChar u_lessThan=0x3C; -static const UChar u_equal=0x3D; -static const UChar u_A=0x41; -static const UChar u_C=0x43; -static const UChar u_D=0x44; -static const UChar u_E=0x45; -static const UChar u_H=0x48; -static const UChar u_I=0x49; -static const UChar u_L=0x4C; -static const UChar u_N=0x4E; -static const UChar u_O=0x4F; -static const UChar u_P=0x50; -static const UChar u_R=0x52; -static const UChar u_S=0x53; -static const UChar u_T=0x54; -static const UChar u_U=0x55; -static const UChar u_Z=0x5A; -static const UChar u_a=0x61; -static const UChar u_c=0x63; -static const UChar u_d=0x64; -static const UChar u_e=0x65; -static const UChar u_f=0x66; -static const UChar u_h=0x68; -static const UChar u_i=0x69; -static const UChar u_l=0x6C; -static const UChar u_n=0x6E; -static const UChar u_o=0x6F; -static const UChar u_p=0x70; -static const UChar u_r=0x72; -static const UChar u_s=0x73; -static const UChar u_t=0x74; -static const UChar u_u=0x75; -static const UChar u_z=0x7A; -static const UChar u_leftCurlyBrace=0x7B; -static const UChar u_pipe=0x7C; -static const UChar u_rightCurlyBrace=0x7D; -static const UChar u_lessOrEqual=0x2264; // U+2264 is <= - -static const UChar kOffsetColon[]={ // "offset:" - u_o, u_f, u_f, u_s, u_e, u_t, u_colon -}; - -static const UChar kOther[]={ // "other" - u_o, u_t, u_h, u_e, u_r -}; - -// MessagePatternList ------------------------------------------------------ *** - -template<typename T, int32_t stackCapacity> -class MessagePatternList : public UMemory { -public: - MessagePatternList() {} - void copyFrom(const MessagePatternList<T, stackCapacity> &other, - int32_t length, - UErrorCode &errorCode); - UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); - UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const { - for(int32_t i=0; i<length; ++i) { - if(a[i]!=other.a[i]) { return FALSE; } - } - return TRUE; - } - - MaybeStackArray<T, stackCapacity> a; -}; - -template<typename T, int32_t stackCapacity> -void -MessagePatternList<T, stackCapacity>::copyFrom( - const MessagePatternList<T, stackCapacity> &other, - int32_t length, - UErrorCode &errorCode) { - if(U_SUCCESS(errorCode) && length>0) { - if(length>a.getCapacity() && NULL==a.resize(length)) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); - } -} - -template<typename T, int32_t stackCapacity> -UBool -MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { - return TRUE; - } - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; -} - -// MessagePatternList specializations -------------------------------------- *** - -class MessagePatternDoubleList : public MessagePatternList<double, 8> { -}; - -class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> { -}; - -// MessagePattern constructors etc. ---------------------------------------- *** - -MessagePattern::MessagePattern(UErrorCode &errorCode) - : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - init(errorCode); -} - -MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) - : aposMode(mode), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - init(errorCode); -} - -MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) - : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - if(init(errorCode)) { - parse(pattern, parseError, errorCode); - } -} - -UBool -MessagePattern::init(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - partsList=new MessagePatternPartsList(); - if(partsList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - parts=partsList->a.getAlias(); - return TRUE; -} - -MessagePattern::MessagePattern(const MessagePattern &other) - : UObject(other), aposMode(other.aposMode), msg(other.msg), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), - needsAutoQuoting(other.needsAutoQuoting) { - UErrorCode errorCode=U_ZERO_ERROR; - if(!copyStorage(other, errorCode)) { - clear(); - } -} - -MessagePattern & -MessagePattern::operator=(const MessagePattern &other) { - if(this==&other) { - return *this; - } - aposMode=other.aposMode; - msg=other.msg; - hasArgNames=other.hasArgNames; - hasArgNumbers=other.hasArgNumbers; - needsAutoQuoting=other.needsAutoQuoting; - UErrorCode errorCode=U_ZERO_ERROR; - if(!copyStorage(other, errorCode)) { - clear(); - } - return *this; -} - -UBool -MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - parts=NULL; - partsLength=0; - numericValues=NULL; - numericValuesLength=0; - if(partsList==NULL) { - partsList=new MessagePatternPartsList(); - if(partsList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - parts=partsList->a.getAlias(); - } - if(other.partsLength>0) { - partsList->copyFrom(*other.partsList, other.partsLength, errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - parts=partsList->a.getAlias(); - partsLength=other.partsLength; - } - if(other.numericValuesLength>0) { - if(numericValuesList==NULL) { - numericValuesList=new MessagePatternDoubleList(); - if(numericValuesList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - numericValues=numericValuesList->a.getAlias(); - } - numericValuesList->copyFrom( - *other.numericValuesList, other.numericValuesLength, errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - numericValues=numericValuesList->a.getAlias(); - numericValuesLength=other.numericValuesLength; - } - return TRUE; -} - -MessagePattern::~MessagePattern() { - delete partsList; - delete numericValuesList; -} - -// MessagePattern API ------------------------------------------------------ *** - -MessagePattern & -MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parseChoiceStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parseChoiceStyle(0, 0, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parsePluralStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parseSelectStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); - postParse(); - return *this; -} - -void -MessagePattern::clear() { - // Mostly the same as preParse(). - msg.remove(); - hasArgNames=hasArgNumbers=FALSE; - needsAutoQuoting=FALSE; - partsLength=0; - numericValuesLength=0; -} - -UBool -MessagePattern::operator==(const MessagePattern &other) const { - if(this==&other) { - return TRUE; - } - return - aposMode==other.aposMode && - msg==other.msg && - // parts.equals(o.parts) - partsLength==other.partsLength && - (partsLength==0 || partsList->equals(*other.partsList, partsLength)); - // No need to compare numericValues if msg and parts are the same. -} - -int32_t -MessagePattern::hashCode() const { - int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; - for(int32_t i=0; i<partsLength; ++i) { - hash=hash*37+parts[i].hashCode(); - } - return hash; -} - -int32_t -MessagePattern::validateArgumentName(const UnicodeString &name) { - if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) { - return UMSGPAT_ARG_NAME_NOT_VALID; - } - return parseArgNumber(name, 0, name.length()); -} - -UnicodeString -MessagePattern::autoQuoteApostropheDeep() const { - if(!needsAutoQuoting) { - return msg; - } - UnicodeString modified(msg); - // Iterate backward so that the insertion indexes do not change. - int32_t count=countParts(); - for(int32_t i=count; i>0;) { - const Part &part=getPart(--i); - if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { - modified.insert(part.index, (UChar)part.value); - } - } - return modified; -} - -double -MessagePattern::getNumericValue(const Part &part) const { - UMessagePatternPartType type=part.type; - if(type==UMSGPAT_PART_TYPE_ARG_INT) { - return part.value; - } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { - return numericValues[part.value]; - } else { - return UMSGPAT_NO_NUMERIC_VALUE; - } -} - -/** - * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. - * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) - * @return the "offset:" value. - * @draft ICU 4.8 - */ -double -MessagePattern::getPluralOffset(int32_t pluralStart) const { - const Part &part=getPart(pluralStart); - if(Part::hasNumericValue(part.type)) { - return getNumericValue(part); - } else { - return 0; - } -} - -// MessagePattern::Part ---------------------------------------------------- *** - -UBool -MessagePattern::Part::operator==(const Part &other) const { - if(this==&other) { - return TRUE; - } - return - type==other.type && - index==other.index && - length==other.length && - value==other.value && - limitPartIndex==other.limitPartIndex; -} - -// MessagePattern parser --------------------------------------------------- *** - -void -MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - if(parseError!=NULL) { - parseError->line=0; - parseError->offset=0; - parseError->preContext[0]=0; - parseError->postContext[0]=0; - } - msg=pattern; - hasArgNames=hasArgNumbers=FALSE; - needsAutoQuoting=FALSE; - partsLength=0; - numericValuesLength=0; -} - -void -MessagePattern::postParse() { - if(partsList!=NULL) { - parts=partsList->a.getAlias(); - } - if(numericValuesList!=NULL) { - numericValues=numericValuesList->a.getAlias(); - } -} - -int32_t -MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, - int32_t nestingLevel, UMessagePatternArgType parentType, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - if(nestingLevel>Part::MAX_VALUE) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - int32_t msgStart=partsLength; - addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); - index+=msgStartLength; - for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check - if(U_FAILURE(errorCode)) { - return 0; - } - if(index>=msg.length()) { - break; - } - UChar c=msg.charAt(index++); - if(c==u_apos) { - if(index==msg.length()) { - // The apostrophe is the last character in the pattern. - // Add a Part for auto-quoting. - addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, - u_apos, errorCode); // value=char to be inserted - needsAutoQuoting=TRUE; - } else { - c=msg.charAt(index); - if(c==u_apos) { - // double apostrophe, skip the second one - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); - } else if( - aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || - c==u_leftCurlyBrace || c==u_rightCurlyBrace || - (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || - (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) - ) { - // skip the quote-starting apostrophe - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); - // find the end of the quoted literal text - for(;;) { - index=msg.indexOf(u_apos, index+1); - if(index>=0) { - if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) { - // double apostrophe inside quoted literal text - // still encodes a single apostrophe, skip the second one - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode); - } else { - // skip the quote-ending apostrophe - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); - break; - } - } else { - // The quoted text reaches to the end of the of the message. - index=msg.length(); - // Add a Part for auto-quoting. - addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, - u_apos, errorCode); // value=char to be inserted - needsAutoQuoting=TRUE; - break; - } - } - } else { - // Interpret the apostrophe as literal text. - // Add a Part for auto-quoting. - addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, - u_apos, errorCode); // value=char to be inserted - needsAutoQuoting=TRUE; - } - } - } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) { - // The unquoted # in a plural message fragment will be replaced - // with the (number-offset). - addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode); - } else if(c==u_leftCurlyBrace) { - index=parseArg(index-1, 1, nestingLevel, parseError, errorCode); - } else if((nestingLevel>0 && c==u_rightCurlyBrace) || - (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { - // Finish the message before the terminator. - // In a choice style, report the "}" substring only for the following ARG_LIMIT, - // not for this MSG_LIMIT. - int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; - addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, - nestingLevel, errorCode); - if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { - // Let the choice style parser see the '}' or '|'. - return index-1; - } else { - // continue parsing after the '}' - return index; - } - } // else: c is part of literal text - } - if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); - return index; -} - -int32_t -MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - int32_t argStart=partsLength; - UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; - addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); - if(index==msg.length()) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - // parse argument name or number - index=skipIdentifier(index); - int32_t number=parseArgNumber(nameIndex, index); - if(number>=0) { - int32_t length=index-nameIndex; - if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { - setParseError(parseError, nameIndex); // Argument number too large. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - hasArgNumbers=TRUE; - addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); - } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { - int32_t length=index-nameIndex; - if(length>Part::MAX_LENGTH) { - setParseError(parseError, nameIndex); // Argument name too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - hasArgNames=TRUE; - addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); - } else { // number<-1 (ARG_NAME_NOT_VALID) - setParseError(parseError, nameIndex); // Bad argument syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - index=skipWhiteSpace(index); - if(index==msg.length()) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - UChar c=msg.charAt(index); - if(c==u_rightCurlyBrace) { - // all done - } else if(c!=u_comma) { - setParseError(parseError, nameIndex); // Bad argument syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } else /* ',' */ { - // parse argument type: case-sensitive a-zA-Z - int32_t typeIndex=index=skipWhiteSpace(index+1); - while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { - ++index; - } - int32_t length=index-typeIndex; - index=skipWhiteSpace(index); - if(index==msg.length()) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) { - setParseError(parseError, nameIndex); // Bad argument syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(length>Part::MAX_LENGTH) { - setParseError(parseError, nameIndex); // Argument type name too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - argType=UMSGPAT_ARG_TYPE_SIMPLE; - if(length==6) { - // case-insensitive comparisons for complex-type names - if(isChoice(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_CHOICE; - } else if(isPlural(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_PLURAL; - } else if(isSelect(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_SELECT; - } - } else if(length==13) { - if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { - argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; - } - } - // change the ARG_START type from NONE to argType - partsList->a[argStart].value=(int16_t)argType; - if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { - addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); - } - // look for an argument style (pattern) - if(c==u_rightCurlyBrace) { - if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { - setParseError(parseError, nameIndex); // No style field for complex argument. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - } else /* ',' */ { - ++index; - if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { - index=parseSimpleStyle(index, parseError, errorCode); - } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { - index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); - } else { - index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); - } - } - } - // Argument parsing stopped on the '}'. - addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); - return index+1; -} - -int32_t -MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - int32_t nestedBraces=0; - while(index<msg.length()) { - UChar c=msg.charAt(index++); - if(c==u_apos) { - // Treat apostrophe as quoting but include it in the style part. - // Find the end of the quoted literal text. - index=msg.indexOf(u_apos, index); - if(index<0) { - // Quoted literal argument style text reaches to the end of the message. - setParseError(parseError, start); - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - // skip the quote-ending apostrophe - ++index; - } else if(c==u_leftCurlyBrace) { - ++nestedBraces; - } else if(c==u_rightCurlyBrace) { - if(nestedBraces>0) { - --nestedBraces; - } else { - int32_t length=--index-start; - if(length>Part::MAX_LENGTH) { - setParseError(parseError, start); // Argument style text too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); - return index; - } - } // c is part of literal text - } - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; -} - -int32_t -MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - index=skipWhiteSpace(index); - if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { - setParseError(parseError, 0); // Missing choice argument pattern. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - for(;;) { - // The choice argument style contains |-separated (number, separator, message) triples. - // Parse the number. - int32_t numberIndex=index; - index=skipDouble(index); - int32_t length=index-numberIndex; - if(length==0) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(length>Part::MAX_LENGTH) { - setParseError(parseError, numberIndex); // Choice number too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - if(U_FAILURE(errorCode)) { - return 0; - } - // Parse the separator. - index=skipWhiteSpace(index); - if(index==msg.length()) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - UChar c=msg.charAt(index); - if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= - setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); - // Parse the message fragment. - index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). - if(index==msg.length()) { - return index; - } - if(msg.charAt(index)==u_rightCurlyBrace) { - if(!inMessageFormatPattern(nestingLevel)) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - return index; - } // else the terminator is '|' - index=skipWhiteSpace(index+1); - } -} - -int32_t -MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, - int32_t index, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - UBool isEmpty=TRUE; - UBool hasOther=FALSE; - for(;;) { - // First, collect the selector looking for a small set of terminators. - // It would be a little faster to consider the syntax of each possible - // token right here, but that makes the code too complicated. - index=skipWhiteSpace(index); - UBool eos=index==msg.length(); - if(eos || msg.charAt(index)==u_rightCurlyBrace) { - if(eos==inMessageFormatPattern(nestingLevel)) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(!hasOther) { - setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. - errorCode=U_DEFAULT_KEYWORD_MISSING; - return 0; - } - return index; - } - int32_t selectorIndex=index; - if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { - // explicit-value plural selector: =double - index=skipDouble(index+1); - int32_t length=index-selectorIndex; - if(length==1) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(length>Part::MAX_LENGTH) { - setParseError(parseError, selectorIndex); // Argument selector too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); - parseDouble(selectorIndex+1, index, FALSE, - parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - } else { - index=skipIdentifier(index); - int32_t length=index-selectorIndex; - if(length==0) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. - if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() && - 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7) - ) { - // plural offset, not a selector - if(!isEmpty) { - // Plural argument 'offset:' (if present) must precede key-message pairs. - setParseError(parseError, start); - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - // allow whitespace between offset: and its value - int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index. - index=skipDouble(valueIndex); - if(index==valueIndex) { - setParseError(parseError, start); // Missing value for plural 'offset:'. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if((index-valueIndex)>Part::MAX_LENGTH) { - setParseError(parseError, valueIndex); // Plural offset value too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - parseDouble(valueIndex, index, FALSE, - parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - if(U_FAILURE(errorCode)) { - return 0; - } - isEmpty=FALSE; - continue; // no message fragment after the offset - } else { - // normal selector word - if(length>Part::MAX_LENGTH) { - setParseError(parseError, selectorIndex); // Argument selector too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); - if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { - hasOther=TRUE; - } - } - } - if(U_FAILURE(errorCode)) { - return 0; - } - - // parse the message fragment following the selector - index=skipWhiteSpace(index); - if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { - setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - isEmpty=FALSE; - } -} - -int32_t -MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { - // If the identifier contains only ASCII digits, then it is an argument _number_ - // and must not have leading zeros (except "0" itself). - // Otherwise it is an argument _name_. - if(start>=limit) { - return UMSGPAT_ARG_NAME_NOT_VALID; - } - int32_t number; - // Defer numeric errors until we know there are only digits. - UBool badNumber; - UChar c=s.charAt(start++); - if(c==0x30) { - if(start==limit) { - return 0; - } else { - number=0; - badNumber=TRUE; // leading zero - } - } else if(0x31<=c && c<=0x39) { - number=c-0x30; - badNumber=FALSE; - } else { - return UMSGPAT_ARG_NAME_NOT_NUMBER; - } - while(start<limit) { - c=s.charAt(start++); - if(0x30<=c && c<=0x39) { - if(number>=INT32_MAX/10) { - badNumber=TRUE; // overflow - } - number=number*10+(c-0x30); - } else { - return UMSGPAT_ARG_NAME_NOT_NUMBER; - } - } - // There are only ASCII digits. - if(badNumber) { - return UMSGPAT_ARG_NAME_NOT_VALID; - } else { - return number; - } -} - -void -MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - U_ASSERT(start<limit); - // fake loop for easy exit and single throw statement - for(;;) { /*loop doesn't iterate*/ - // fast path for small integers and infinity - int32_t value=0; - int32_t isNegative=0; // not boolean so that we can easily add it to value - int32_t index=start; - UChar c=msg.charAt(index++); - if(c==u_minus) { - isNegative=1; - if(index==limit) { - break; // no number - } - c=msg.charAt(index++); - } else if(c==u_plus) { - if(index==limit) { - break; // no number - } - c=msg.charAt(index++); - } - if(c==0x221e) { // infinity - if(allowInfinity && index==limit) { - double infinity=uprv_getInfinity(); - addArgDoublePart( - isNegative!=0 ? -infinity : infinity, - start, limit-start, errorCode); - return; - } else { - break; - } - } - // try to parse the number as a small integer but fall back to a double - while('0'<=c && c<='9') { - value=value*10+(c-'0'); - if(value>(Part::MAX_VALUE+isNegative)) { - break; // not a small-enough integer - } - if(index==limit) { - addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, - isNegative!=0 ? -value : value, errorCode); - return; - } - c=msg.charAt(index++); - } - // Let Double.parseDouble() throw a NumberFormatException. - char numberChars[128]; - int32_t capacity=(int32_t)sizeof(numberChars); - int32_t length=limit-start; - if(length>=capacity) { - break; // number too long - } - msg.extract(start, length, numberChars, capacity, US_INV); - if((int32_t)uprv_strlen(numberChars)<length) { - break; // contains non-invariant character that was turned into NUL - } - char *end; - double numericValue=uprv_strtod(numberChars, &end); - if(end!=(numberChars+length)) { - break; // parsing error - } - addArgDoublePart(numericValue, start, length, errorCode); - return; - } - setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value. - errorCode=U_PATTERN_SYNTAX_ERROR; - return; -} - -int32_t -MessagePattern::skipWhiteSpace(int32_t index) { - const UChar *s=msg.getBuffer(); - int32_t msgLength=msg.length(); - const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index); - return (int32_t)(t-s); -} - -int32_t -MessagePattern::skipIdentifier(int32_t index) { - const UChar *s=msg.getBuffer(); - int32_t msgLength=msg.length(); - const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index); - return (int32_t)(t-s); -} - -int32_t -MessagePattern::skipDouble(int32_t index) { - int32_t msgLength=msg.length(); - while(index<msgLength) { - UChar c=msg.charAt(index); - // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. - if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) { - break; - } - ++index; - } - return index; -} - -UBool -MessagePattern::isArgTypeChar(UChar32 c) { - return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); -} - -UBool -MessagePattern::isChoice(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index++))==u_h || c==u_H) && - ((c=msg.charAt(index++))==u_o || c==u_O) && - ((c=msg.charAt(index++))==u_i || c==u_I) && - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index))==u_e || c==u_E); -} - -UBool -MessagePattern::isPlural(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_p || c==u_P) && - ((c=msg.charAt(index++))==u_l || c==u_L) && - ((c=msg.charAt(index++))==u_u || c==u_U) && - ((c=msg.charAt(index++))==u_r || c==u_R) && - ((c=msg.charAt(index++))==u_a || c==u_A) && - ((c=msg.charAt(index))==u_l || c==u_L); -} - -UBool -MessagePattern::isSelect(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_s || c==u_S) && - ((c=msg.charAt(index++))==u_e || c==u_E) && - ((c=msg.charAt(index++))==u_l || c==u_L) && - ((c=msg.charAt(index++))==u_e || c==u_E) && - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index))==u_t || c==u_T); -} - -UBool -MessagePattern::isOrdinal(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_o || c==u_O) && - ((c=msg.charAt(index++))==u_r || c==u_R) && - ((c=msg.charAt(index++))==u_d || c==u_D) && - ((c=msg.charAt(index++))==u_i || c==u_I) && - ((c=msg.charAt(index++))==u_n || c==u_N) && - ((c=msg.charAt(index++))==u_a || c==u_A) && - ((c=msg.charAt(index))==u_l || c==u_L); -} - -UBool -MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { - return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; -} - -UBool -MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { - return - nestingLevel==1 && - parentType==UMSGPAT_ARG_TYPE_CHOICE && - partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; -} - -void -MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, - int32_t value, UErrorCode &errorCode) { - if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { - Part &part=partsList->a[partsLength++]; - part.type=type; - part.index=index; - part.length=(uint16_t)length; - part.value=(int16_t)value; - part.limitPartIndex=0; - } -} - -void -MessagePattern::addLimitPart(int32_t start, - UMessagePatternPartType type, int32_t index, int32_t length, - int32_t value, UErrorCode &errorCode) { - partsList->a[start].limitPartIndex=partsLength; - addPart(type, index, length, value, errorCode); -} - -void -MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - int32_t numericIndex=numericValuesLength; - if(numericValuesList==NULL) { - numericValuesList=new MessagePatternDoubleList(); - if(numericValuesList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { - return; - } else { - if(numericIndex>Part::MAX_VALUE) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - } - numericValuesList->a[numericValuesLength++]=numericValue; - addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); -} - -void -MessagePattern::setParseError(UParseError *parseError, int32_t index) { - if(parseError==NULL) { - return; - } - parseError->offset=index; - - // Set preContext to some of msg before index. - // Avoid splitting a surrogate pair. - int32_t length=index; - if(length>=U_PARSE_CONTEXT_LEN) { - length=U_PARSE_CONTEXT_LEN-1; - if(length>0 && U16_IS_TRAIL(msg[index-length])) { - --length; - } - } - msg.extract(index-length, length, parseError->preContext); - parseError->preContext[length]=0; - - // Set postContext to some of msg starting at index. - length=msg.length()-index; - if(length>=U_PARSE_CONTEXT_LEN) { - length=U_PARSE_CONTEXT_LEN-1; - if(length>0 && U16_IS_LEAD(msg[index+length-1])) { - --length; - } - } - msg.extract(index, length, parseError->postContext); - parseError->postContext[length]=0; -} - -// MessageImpl ------------------------------------------------------------- *** - -void -MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, - UnicodeString &sb) { - int32_t doubleApos=-1; - for(;;) { - int32_t i=s.indexOf(u_apos, start); - if(i<0 || i>=limit) { - sb.append(s, start, limit-start); - break; - } - if(i==doubleApos) { - // Double apostrophe at start-1 and start==i, append one. - sb.append(u_apos); - ++start; - doubleApos=-1; - } else { - // Append text between apostrophes and skip this one. - sb.append(s, start, i-start); - doubleApos=start=i+1; - } - } -} - -// Ported from second half of ICU4J SelectFormat.format(String). -UnicodeString & -MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, - int32_t msgStart, - UnicodeString &result) { - const UnicodeString &msgString=msgPattern.getPatternString(); - int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); - for(int32_t i=msgStart;;) { - const MessagePattern::Part &part=msgPattern.getPart(++i); - UMessagePatternPartType type=part.getType(); - int32_t index=part.getIndex(); - if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { - return result.append(msgString, prevIndex, index-prevIndex); - } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { - result.append(msgString, prevIndex, index-prevIndex); - prevIndex=part.getLimit(); - } else if(type==UMSGPAT_PART_TYPE_ARG_START) { - result.append(msgString, prevIndex, index-prevIndex); - prevIndex=index; - i=msgPattern.getLimitPartIndex(i); - index=msgPattern.getPart(i).getLimit(); - appendReducedApostrophes(msgString, prevIndex, index, result); - prevIndex=index; - } - } -} - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_FORMATTING +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011mar14 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messagepattern.h" +#include "unicode/unistr.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "cstring.h" +#include "messageimpl.h" +#include "patternprops.h" +#include "putilimp.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +// Unicode character/code point constants ---------------------------------- *** + +static const UChar u_pound=0x23; +static const UChar u_apos=0x27; +static const UChar u_plus=0x2B; +static const UChar u_comma=0x2C; +static const UChar u_minus=0x2D; +static const UChar u_dot=0x2E; +static const UChar u_colon=0x3A; +static const UChar u_lessThan=0x3C; +static const UChar u_equal=0x3D; +static const UChar u_A=0x41; +static const UChar u_C=0x43; +static const UChar u_D=0x44; +static const UChar u_E=0x45; +static const UChar u_H=0x48; +static const UChar u_I=0x49; +static const UChar u_L=0x4C; +static const UChar u_N=0x4E; +static const UChar u_O=0x4F; +static const UChar u_P=0x50; +static const UChar u_R=0x52; +static const UChar u_S=0x53; +static const UChar u_T=0x54; +static const UChar u_U=0x55; +static const UChar u_Z=0x5A; +static const UChar u_a=0x61; +static const UChar u_c=0x63; +static const UChar u_d=0x64; +static const UChar u_e=0x65; +static const UChar u_f=0x66; +static const UChar u_h=0x68; +static const UChar u_i=0x69; +static const UChar u_l=0x6C; +static const UChar u_n=0x6E; +static const UChar u_o=0x6F; +static const UChar u_p=0x70; +static const UChar u_r=0x72; +static const UChar u_s=0x73; +static const UChar u_t=0x74; +static const UChar u_u=0x75; +static const UChar u_z=0x7A; +static const UChar u_leftCurlyBrace=0x7B; +static const UChar u_pipe=0x7C; +static const UChar u_rightCurlyBrace=0x7D; +static const UChar u_lessOrEqual=0x2264; // U+2264 is <= + +static const UChar kOffsetColon[]={ // "offset:" + u_o, u_f, u_f, u_s, u_e, u_t, u_colon +}; + +static const UChar kOther[]={ // "other" + u_o, u_t, u_h, u_e, u_r +}; + +// MessagePatternList ------------------------------------------------------ *** + +template<typename T, int32_t stackCapacity> +class MessagePatternList : public UMemory { +public: + MessagePatternList() {} + void copyFrom(const MessagePatternList<T, stackCapacity> &other, + int32_t length, + UErrorCode &errorCode); + UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); + UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const { + for(int32_t i=0; i<length; ++i) { + if(a[i]!=other.a[i]) { return FALSE; } + } + return TRUE; + } + + MaybeStackArray<T, stackCapacity> a; +}; + +template<typename T, int32_t stackCapacity> +void +MessagePatternList<T, stackCapacity>::copyFrom( + const MessagePatternList<T, stackCapacity> &other, + int32_t length, + UErrorCode &errorCode) { + if(U_SUCCESS(errorCode) && length>0) { + if(length>a.getCapacity() && NULL==a.resize(length)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); + } +} + +template<typename T, int32_t stackCapacity> +UBool +MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { + return TRUE; + } + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; +} + +// MessagePatternList specializations -------------------------------------- *** + +class MessagePatternDoubleList : public MessagePatternList<double, 8> { +}; + +class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> { +}; + +// MessagePattern constructors etc. ---------------------------------------- *** + +MessagePattern::MessagePattern(UErrorCode &errorCode) + : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + init(errorCode); +} + +MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) + : aposMode(mode), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + init(errorCode); +} + +MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) + : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + if(init(errorCode)) { + parse(pattern, parseError, errorCode); + } +} + +UBool +MessagePattern::init(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + partsList=new MessagePatternPartsList(); + if(partsList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + parts=partsList->a.getAlias(); + return TRUE; +} + +MessagePattern::MessagePattern(const MessagePattern &other) + : UObject(other), aposMode(other.aposMode), msg(other.msg), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), + needsAutoQuoting(other.needsAutoQuoting) { + UErrorCode errorCode=U_ZERO_ERROR; + if(!copyStorage(other, errorCode)) { + clear(); + } +} + +MessagePattern & +MessagePattern::operator=(const MessagePattern &other) { + if(this==&other) { + return *this; + } + aposMode=other.aposMode; + msg=other.msg; + hasArgNames=other.hasArgNames; + hasArgNumbers=other.hasArgNumbers; + needsAutoQuoting=other.needsAutoQuoting; + UErrorCode errorCode=U_ZERO_ERROR; + if(!copyStorage(other, errorCode)) { + clear(); + } + return *this; +} + +UBool +MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + parts=NULL; + partsLength=0; + numericValues=NULL; + numericValuesLength=0; + if(partsList==NULL) { + partsList=new MessagePatternPartsList(); + if(partsList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + parts=partsList->a.getAlias(); + } + if(other.partsLength>0) { + partsList->copyFrom(*other.partsList, other.partsLength, errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + parts=partsList->a.getAlias(); + partsLength=other.partsLength; + } + if(other.numericValuesLength>0) { + if(numericValuesList==NULL) { + numericValuesList=new MessagePatternDoubleList(); + if(numericValuesList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + numericValues=numericValuesList->a.getAlias(); + } + numericValuesList->copyFrom( + *other.numericValuesList, other.numericValuesLength, errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + numericValues=numericValuesList->a.getAlias(); + numericValuesLength=other.numericValuesLength; + } + return TRUE; +} + +MessagePattern::~MessagePattern() { + delete partsList; + delete numericValuesList; +} + +// MessagePattern API ------------------------------------------------------ *** + +MessagePattern & +MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parseChoiceStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parseChoiceStyle(0, 0, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parsePluralStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parseSelectStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); + postParse(); + return *this; +} + +void +MessagePattern::clear() { + // Mostly the same as preParse(). + msg.remove(); + hasArgNames=hasArgNumbers=FALSE; + needsAutoQuoting=FALSE; + partsLength=0; + numericValuesLength=0; +} + +UBool +MessagePattern::operator==(const MessagePattern &other) const { + if(this==&other) { + return TRUE; + } + return + aposMode==other.aposMode && + msg==other.msg && + // parts.equals(o.parts) + partsLength==other.partsLength && + (partsLength==0 || partsList->equals(*other.partsList, partsLength)); + // No need to compare numericValues if msg and parts are the same. +} + +int32_t +MessagePattern::hashCode() const { + int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; + for(int32_t i=0; i<partsLength; ++i) { + hash=hash*37+parts[i].hashCode(); + } + return hash; +} + +int32_t +MessagePattern::validateArgumentName(const UnicodeString &name) { + if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } + return parseArgNumber(name, 0, name.length()); +} + +UnicodeString +MessagePattern::autoQuoteApostropheDeep() const { + if(!needsAutoQuoting) { + return msg; + } + UnicodeString modified(msg); + // Iterate backward so that the insertion indexes do not change. + int32_t count=countParts(); + for(int32_t i=count; i>0;) { + const Part &part=getPart(--i); + if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { + modified.insert(part.index, (UChar)part.value); + } + } + return modified; +} + +double +MessagePattern::getNumericValue(const Part &part) const { + UMessagePatternPartType type=part.type; + if(type==UMSGPAT_PART_TYPE_ARG_INT) { + return part.value; + } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { + return numericValues[part.value]; + } else { + return UMSGPAT_NO_NUMERIC_VALUE; + } +} + +/** + * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. + * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) + * @return the "offset:" value. + * @draft ICU 4.8 + */ +double +MessagePattern::getPluralOffset(int32_t pluralStart) const { + const Part &part=getPart(pluralStart); + if(Part::hasNumericValue(part.type)) { + return getNumericValue(part); + } else { + return 0; + } +} + +// MessagePattern::Part ---------------------------------------------------- *** + +UBool +MessagePattern::Part::operator==(const Part &other) const { + if(this==&other) { + return TRUE; + } + return + type==other.type && + index==other.index && + length==other.length && + value==other.value && + limitPartIndex==other.limitPartIndex; +} + +// MessagePattern parser --------------------------------------------------- *** + +void +MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + if(parseError!=NULL) { + parseError->line=0; + parseError->offset=0; + parseError->preContext[0]=0; + parseError->postContext[0]=0; + } + msg=pattern; + hasArgNames=hasArgNumbers=FALSE; + needsAutoQuoting=FALSE; + partsLength=0; + numericValuesLength=0; +} + +void +MessagePattern::postParse() { + if(partsList!=NULL) { + parts=partsList->a.getAlias(); + } + if(numericValuesList!=NULL) { + numericValues=numericValuesList->a.getAlias(); + } +} + +int32_t +MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, + int32_t nestingLevel, UMessagePatternArgType parentType, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + if(nestingLevel>Part::MAX_VALUE) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + int32_t msgStart=partsLength; + addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); + index+=msgStartLength; + for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check + if(U_FAILURE(errorCode)) { + return 0; + } + if(index>=msg.length()) { + break; + } + UChar c=msg.charAt(index++); + if(c==u_apos) { + if(index==msg.length()) { + // The apostrophe is the last character in the pattern. + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + } else { + c=msg.charAt(index); + if(c==u_apos) { + // double apostrophe, skip the second one + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); + } else if( + aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || + c==u_leftCurlyBrace || c==u_rightCurlyBrace || + (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || + (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) + ) { + // skip the quote-starting apostrophe + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); + // find the end of the quoted literal text + for(;;) { + index=msg.indexOf(u_apos, index+1); + if(index>=0) { + if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) { + // double apostrophe inside quoted literal text + // still encodes a single apostrophe, skip the second one + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode); + } else { + // skip the quote-ending apostrophe + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); + break; + } + } else { + // The quoted text reaches to the end of the of the message. + index=msg.length(); + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + break; + } + } + } else { + // Interpret the apostrophe as literal text. + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + } + } + } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) { + // The unquoted # in a plural message fragment will be replaced + // with the (number-offset). + addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode); + } else if(c==u_leftCurlyBrace) { + index=parseArg(index-1, 1, nestingLevel, parseError, errorCode); + } else if((nestingLevel>0 && c==u_rightCurlyBrace) || + (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { + // Finish the message before the terminator. + // In a choice style, report the "}" substring only for the following ARG_LIMIT, + // not for this MSG_LIMIT. + int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; + addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, + nestingLevel, errorCode); + if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { + // Let the choice style parser see the '}' or '|'. + return index-1; + } else { + // continue parsing after the '}' + return index; + } + } // else: c is part of literal text + } + if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); + return index; +} + +int32_t +MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + int32_t argStart=partsLength; + UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; + addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + // parse argument name or number + index=skipIdentifier(index); + int32_t number=parseArgNumber(nameIndex, index); + if(number>=0) { + int32_t length=index-nameIndex; + if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { + setParseError(parseError, nameIndex); // Argument number too large. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + hasArgNumbers=TRUE; + addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); + } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { + int32_t length=index-nameIndex; + if(length>Part::MAX_LENGTH) { + setParseError(parseError, nameIndex); // Argument name too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + hasArgNames=TRUE; + addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); + } else { // number<-1 (ARG_NAME_NOT_VALID) + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + UChar c=msg.charAt(index); + if(c==u_rightCurlyBrace) { + // all done + } else if(c!=u_comma) { + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } else /* ',' */ { + // parse argument type: case-sensitive a-zA-Z + int32_t typeIndex=index=skipWhiteSpace(index+1); + while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { + ++index; + } + int32_t length=index-typeIndex; + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) { + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, nameIndex); // Argument type name too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + argType=UMSGPAT_ARG_TYPE_SIMPLE; + if(length==6) { + // case-insensitive comparisons for complex-type names + if(isChoice(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_CHOICE; + } else if(isPlural(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_PLURAL; + } else if(isSelect(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_SELECT; + } + } else if(length==13) { + if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { + argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; + } + } + // change the ARG_START type from NONE to argType + partsList->a[argStart].value=(int16_t)argType; + if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { + addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); + } + // look for an argument style (pattern) + if(c==u_rightCurlyBrace) { + if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { + setParseError(parseError, nameIndex); // No style field for complex argument. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + } else /* ',' */ { + ++index; + if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { + index=parseSimpleStyle(index, parseError, errorCode); + } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { + index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); + } else { + index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); + } + } + } + // Argument parsing stopped on the '}'. + addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); + return index+1; +} + +int32_t +MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + int32_t nestedBraces=0; + while(index<msg.length()) { + UChar c=msg.charAt(index++); + if(c==u_apos) { + // Treat apostrophe as quoting but include it in the style part. + // Find the end of the quoted literal text. + index=msg.indexOf(u_apos, index); + if(index<0) { + // Quoted literal argument style text reaches to the end of the message. + setParseError(parseError, start); + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // skip the quote-ending apostrophe + ++index; + } else if(c==u_leftCurlyBrace) { + ++nestedBraces; + } else if(c==u_rightCurlyBrace) { + if(nestedBraces>0) { + --nestedBraces; + } else { + int32_t length=--index-start; + if(length>Part::MAX_LENGTH) { + setParseError(parseError, start); // Argument style text too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); + return index; + } + } // c is part of literal text + } + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; +} + +int32_t +MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + index=skipWhiteSpace(index); + if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { + setParseError(parseError, 0); // Missing choice argument pattern. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + for(;;) { + // The choice argument style contains |-separated (number, separator, message) triples. + // Parse the number. + int32_t numberIndex=index; + index=skipDouble(index); + int32_t length=index-numberIndex; + if(length==0) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, numberIndex); // Choice number too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + if(U_FAILURE(errorCode)) { + return 0; + } + // Parse the separator. + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + UChar c=msg.charAt(index); + if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= + setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); + // Parse the message fragment. + index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). + if(index==msg.length()) { + return index; + } + if(msg.charAt(index)==u_rightCurlyBrace) { + if(!inMessageFormatPattern(nestingLevel)) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + return index; + } // else the terminator is '|' + index=skipWhiteSpace(index+1); + } +} + +int32_t +MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, + int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + UBool isEmpty=TRUE; + UBool hasOther=FALSE; + for(;;) { + // First, collect the selector looking for a small set of terminators. + // It would be a little faster to consider the syntax of each possible + // token right here, but that makes the code too complicated. + index=skipWhiteSpace(index); + UBool eos=index==msg.length(); + if(eos || msg.charAt(index)==u_rightCurlyBrace) { + if(eos==inMessageFormatPattern(nestingLevel)) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(!hasOther) { + setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. + errorCode=U_DEFAULT_KEYWORD_MISSING; + return 0; + } + return index; + } + int32_t selectorIndex=index; + if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { + // explicit-value plural selector: =double + index=skipDouble(index+1); + int32_t length=index-selectorIndex; + if(length==1) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, selectorIndex); // Argument selector too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); + parseDouble(selectorIndex+1, index, FALSE, + parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + } else { + index=skipIdentifier(index); + int32_t length=index-selectorIndex; + if(length==0) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. + if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() && + 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7) + ) { + // plural offset, not a selector + if(!isEmpty) { + // Plural argument 'offset:' (if present) must precede key-message pairs. + setParseError(parseError, start); + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // allow whitespace between offset: and its value + int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index. + index=skipDouble(valueIndex); + if(index==valueIndex) { + setParseError(parseError, start); // Missing value for plural 'offset:'. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if((index-valueIndex)>Part::MAX_LENGTH) { + setParseError(parseError, valueIndex); // Plural offset value too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + parseDouble(valueIndex, index, FALSE, + parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + if(U_FAILURE(errorCode)) { + return 0; + } + isEmpty=FALSE; + continue; // no message fragment after the offset + } else { + // normal selector word + if(length>Part::MAX_LENGTH) { + setParseError(parseError, selectorIndex); // Argument selector too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); + if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { + hasOther=TRUE; + } + } + } + if(U_FAILURE(errorCode)) { + return 0; + } + + // parse the message fragment following the selector + index=skipWhiteSpace(index); + if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { + setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + isEmpty=FALSE; + } +} + +int32_t +MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { + // If the identifier contains only ASCII digits, then it is an argument _number_ + // and must not have leading zeros (except "0" itself). + // Otherwise it is an argument _name_. + if(start>=limit) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } + int32_t number; + // Defer numeric errors until we know there are only digits. + UBool badNumber; + UChar c=s.charAt(start++); + if(c==0x30) { + if(start==limit) { + return 0; + } else { + number=0; + badNumber=TRUE; // leading zero + } + } else if(0x31<=c && c<=0x39) { + number=c-0x30; + badNumber=FALSE; + } else { + return UMSGPAT_ARG_NAME_NOT_NUMBER; + } + while(start<limit) { + c=s.charAt(start++); + if(0x30<=c && c<=0x39) { + if(number>=INT32_MAX/10) { + badNumber=TRUE; // overflow + } + number=number*10+(c-0x30); + } else { + return UMSGPAT_ARG_NAME_NOT_NUMBER; + } + } + // There are only ASCII digits. + if(badNumber) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } else { + return number; + } +} + +void +MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + U_ASSERT(start<limit); + // fake loop for easy exit and single throw statement + for(;;) { /*loop doesn't iterate*/ + // fast path for small integers and infinity + int32_t value=0; + int32_t isNegative=0; // not boolean so that we can easily add it to value + int32_t index=start; + UChar c=msg.charAt(index++); + if(c==u_minus) { + isNegative=1; + if(index==limit) { + break; // no number + } + c=msg.charAt(index++); + } else if(c==u_plus) { + if(index==limit) { + break; // no number + } + c=msg.charAt(index++); + } + if(c==0x221e) { // infinity + if(allowInfinity && index==limit) { + double infinity=uprv_getInfinity(); + addArgDoublePart( + isNegative!=0 ? -infinity : infinity, + start, limit-start, errorCode); + return; + } else { + break; + } + } + // try to parse the number as a small integer but fall back to a double + while('0'<=c && c<='9') { + value=value*10+(c-'0'); + if(value>(Part::MAX_VALUE+isNegative)) { + break; // not a small-enough integer + } + if(index==limit) { + addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, + isNegative!=0 ? -value : value, errorCode); + return; + } + c=msg.charAt(index++); + } + // Let Double.parseDouble() throw a NumberFormatException. + char numberChars[128]; + int32_t capacity=(int32_t)sizeof(numberChars); + int32_t length=limit-start; + if(length>=capacity) { + break; // number too long + } + msg.extract(start, length, numberChars, capacity, US_INV); + if((int32_t)uprv_strlen(numberChars)<length) { + break; // contains non-invariant character that was turned into NUL + } + char *end; + double numericValue=uprv_strtod(numberChars, &end); + if(end!=(numberChars+length)) { + break; // parsing error + } + addArgDoublePart(numericValue, start, length, errorCode); + return; + } + setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value. + errorCode=U_PATTERN_SYNTAX_ERROR; + return; +} + +int32_t +MessagePattern::skipWhiteSpace(int32_t index) { + const UChar *s=msg.getBuffer(); + int32_t msgLength=msg.length(); + const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index); + return (int32_t)(t-s); +} + +int32_t +MessagePattern::skipIdentifier(int32_t index) { + const UChar *s=msg.getBuffer(); + int32_t msgLength=msg.length(); + const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index); + return (int32_t)(t-s); +} + +int32_t +MessagePattern::skipDouble(int32_t index) { + int32_t msgLength=msg.length(); + while(index<msgLength) { + UChar c=msg.charAt(index); + // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. + if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) { + break; + } + ++index; + } + return index; +} + +UBool +MessagePattern::isArgTypeChar(UChar32 c) { + return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); +} + +UBool +MessagePattern::isChoice(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index++))==u_h || c==u_H) && + ((c=msg.charAt(index++))==u_o || c==u_O) && + ((c=msg.charAt(index++))==u_i || c==u_I) && + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index))==u_e || c==u_E); +} + +UBool +MessagePattern::isPlural(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_p || c==u_P) && + ((c=msg.charAt(index++))==u_l || c==u_L) && + ((c=msg.charAt(index++))==u_u || c==u_U) && + ((c=msg.charAt(index++))==u_r || c==u_R) && + ((c=msg.charAt(index++))==u_a || c==u_A) && + ((c=msg.charAt(index))==u_l || c==u_L); +} + +UBool +MessagePattern::isSelect(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_s || c==u_S) && + ((c=msg.charAt(index++))==u_e || c==u_E) && + ((c=msg.charAt(index++))==u_l || c==u_L) && + ((c=msg.charAt(index++))==u_e || c==u_E) && + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index))==u_t || c==u_T); +} + +UBool +MessagePattern::isOrdinal(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_o || c==u_O) && + ((c=msg.charAt(index++))==u_r || c==u_R) && + ((c=msg.charAt(index++))==u_d || c==u_D) && + ((c=msg.charAt(index++))==u_i || c==u_I) && + ((c=msg.charAt(index++))==u_n || c==u_N) && + ((c=msg.charAt(index++))==u_a || c==u_A) && + ((c=msg.charAt(index))==u_l || c==u_L); +} + +UBool +MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { + return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; +} + +UBool +MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { + return + nestingLevel==1 && + parentType==UMSGPAT_ARG_TYPE_CHOICE && + partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; +} + +void +MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode) { + if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { + Part &part=partsList->a[partsLength++]; + part.type=type; + part.index=index; + part.length=(uint16_t)length; + part.value=(int16_t)value; + part.limitPartIndex=0; + } +} + +void +MessagePattern::addLimitPart(int32_t start, + UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode) { + partsList->a[start].limitPartIndex=partsLength; + addPart(type, index, length, value, errorCode); +} + +void +MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + int32_t numericIndex=numericValuesLength; + if(numericValuesList==NULL) { + numericValuesList=new MessagePatternDoubleList(); + if(numericValuesList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { + return; + } else { + if(numericIndex>Part::MAX_VALUE) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + } + numericValuesList->a[numericValuesLength++]=numericValue; + addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); +} + +void +MessagePattern::setParseError(UParseError *parseError, int32_t index) { + if(parseError==NULL) { + return; + } + parseError->offset=index; + + // Set preContext to some of msg before index. + // Avoid splitting a surrogate pair. + int32_t length=index; + if(length>=U_PARSE_CONTEXT_LEN) { + length=U_PARSE_CONTEXT_LEN-1; + if(length>0 && U16_IS_TRAIL(msg[index-length])) { + --length; + } + } + msg.extract(index-length, length, parseError->preContext); + parseError->preContext[length]=0; + + // Set postContext to some of msg starting at index. + length=msg.length()-index; + if(length>=U_PARSE_CONTEXT_LEN) { + length=U_PARSE_CONTEXT_LEN-1; + if(length>0 && U16_IS_LEAD(msg[index+length-1])) { + --length; + } + } + msg.extract(index, length, parseError->postContext); + parseError->postContext[length]=0; +} + +// MessageImpl ------------------------------------------------------------- *** + +void +MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, + UnicodeString &sb) { + int32_t doubleApos=-1; + for(;;) { + int32_t i=s.indexOf(u_apos, start); + if(i<0 || i>=limit) { + sb.append(s, start, limit-start); + break; + } + if(i==doubleApos) { + // Double apostrophe at start-1 and start==i, append one. + sb.append(u_apos); + ++start; + doubleApos=-1; + } else { + // Append text between apostrophes and skip this one. + sb.append(s, start, i-start); + doubleApos=start=i+1; + } + } +} + +// Ported from second half of ICU4J SelectFormat.format(String). +UnicodeString & +MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, + int32_t msgStart, + UnicodeString &result) { + const UnicodeString &msgString=msgPattern.getPatternString(); + int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); + for(int32_t i=msgStart;;) { + const MessagePattern::Part &part=msgPattern.getPart(++i); + UMessagePatternPartType type=part.getType(); + int32_t index=part.getIndex(); + if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { + return result.append(msgString, prevIndex, index-prevIndex); + } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { + result.append(msgString, prevIndex, index-prevIndex); + prevIndex=part.getLimit(); + } else if(type==UMSGPAT_PART_TYPE_ARG_START) { + result.append(msgString, prevIndex, index-prevIndex); + prevIndex=index; + i=msgPattern.getLimitPartIndex(i); + index=msgPattern.getPart(i).getLimit(); + appendReducedApostrophes(msgString, prevIndex, index, result); + prevIndex=index; + } + } +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING |