diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
commit | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch) | |
tree | 2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/io/uscanf_p.cpp | |
parent | 60040c91ffe701a84689b2c6310ff845e65cff42 (diff) | |
download | ydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/io/uscanf_p.cpp')
-rw-r--r-- | contrib/libs/icu/io/uscanf_p.cpp | 2896 |
1 files changed, 1448 insertions, 1448 deletions
diff --git a/contrib/libs/icu/io/uscanf_p.cpp b/contrib/libs/icu/io/uscanf_p.cpp index 6dc1c09dee..73c4da73dc 100644 --- a/contrib/libs/icu/io/uscanf_p.cpp +++ b/contrib/libs/icu/io/uscanf_p.cpp @@ -1,1450 +1,1450 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* -* File uscnnf_p.c -* -* Modification History: -* -* Date Name Description -* 12/02/98 stephen Creation. -* 03/13/99 stephen Modified for new C API. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/uchar.h" -#include "unicode/ustring.h" -#include "unicode/unum.h" -#include "unicode/udat.h" -#include "unicode/uset.h" -#include "uscanf.h" -#include "ufmt_cmn.h" -#include "ufile.h" -#include "locbund.h" - -#include "cmemory.h" -#include "ustr_cnv.h" - -/* flag characters for u_scanf */ -#define FLAG_ASTERISK 0x002A -#define FLAG_PAREN 0x0028 - -#define ISFLAG(s) (s) == FLAG_ASTERISK || \ - (s) == FLAG_PAREN - -/* special characters for u_scanf */ -#define SPEC_DOLLARSIGN 0x0024 - -/* unicode digits */ -#define DIGIT_ZERO 0x0030 -#define DIGIT_ONE 0x0031 -#define DIGIT_TWO 0x0032 -#define DIGIT_THREE 0x0033 -#define DIGIT_FOUR 0x0034 -#define DIGIT_FIVE 0x0035 -#define DIGIT_SIX 0x0036 -#define DIGIT_SEVEN 0x0037 -#define DIGIT_EIGHT 0x0038 -#define DIGIT_NINE 0x0039 - -#define ISDIGIT(s) (s) == DIGIT_ZERO || \ - (s) == DIGIT_ONE || \ - (s) == DIGIT_TWO || \ - (s) == DIGIT_THREE || \ - (s) == DIGIT_FOUR || \ - (s) == DIGIT_FIVE || \ - (s) == DIGIT_SIX || \ - (s) == DIGIT_SEVEN || \ - (s) == DIGIT_EIGHT || \ - (s) == DIGIT_NINE - -/* u_scanf modifiers */ -#define MOD_H 0x0068 -#define MOD_LOWERL 0x006C -#define MOD_L 0x004C - -#define ISMOD(s) (s) == MOD_H || \ - (s) == MOD_LOWERL || \ - (s) == MOD_L - -/** - * Struct encapsulating a single uscanf format specification. - */ -typedef struct u_scanf_spec_info { - int32_t fWidth; /* Width */ - - UChar fSpec; /* Format specification */ - - UChar fPadChar; /* Padding character */ - - UBool fSkipArg; /* TRUE if arg should be skipped */ - UBool fIsLongDouble; /* L flag */ - UBool fIsShort; /* h flag */ - UBool fIsLong; /* l flag */ - UBool fIsLongLong; /* ll flag */ - UBool fIsString; /* TRUE if this is a NULL-terminated string. */ -} u_scanf_spec_info; - - -/** - * Struct encapsulating a single u_scanf format specification. - */ -typedef struct u_scanf_spec { - u_scanf_spec_info fInfo; /* Information on this spec */ - int32_t fArgPos; /* Position of data in arg list */ -} u_scanf_spec; - -/** - * Parse a single u_scanf format specifier in Unicode. - * @param fmt A pointer to a '%' character in a u_scanf format specification. - * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed - * format specifier. - * @return The number of characters contained in this specifier. - */ -static int32_t -u_scanf_parse_spec (const UChar *fmt, - u_scanf_spec *spec) -{ - const UChar *s = fmt; - const UChar *backup; - u_scanf_spec_info *info = &(spec->fInfo); - - /* initialize spec to default values */ - spec->fArgPos = -1; - - info->fWidth = -1; - info->fSpec = 0x0000; - info->fPadChar = 0x0020; - info->fSkipArg = FALSE; - info->fIsLongDouble = FALSE; - info->fIsShort = FALSE; - info->fIsLong = FALSE; - info->fIsLongLong = FALSE; - info->fIsString = TRUE; - - - /* skip over the initial '%' */ - s++; - - /* Check for positional argument */ - if(ISDIGIT(*s)) { - - /* Save the current position */ - backup = s; - - /* handle positional parameters */ - if(ISDIGIT(*s)) { - spec->fArgPos = (int) (*s++ - DIGIT_ZERO); - - while(ISDIGIT(*s)) { - spec->fArgPos *= 10; - spec->fArgPos += (int) (*s++ - DIGIT_ZERO); - } - } - - /* if there is no '$', don't read anything */ - if(*s != SPEC_DOLLARSIGN) { - spec->fArgPos = -1; - s = backup; - } - /* munge the '$' */ - else - s++; - } - - /* Get any format flags */ - while(ISFLAG(*s)) { - switch(*s++) { - - /* skip argument */ - case FLAG_ASTERISK: - info->fSkipArg = TRUE; - break; - - /* pad character specified */ - case FLAG_PAREN: - - /* first four characters are hex values for pad char */ - info->fPadChar = (UChar)ufmt_digitvalue(*s++); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - - /* final character is ignored */ - s++; - - break; - } - } - - /* Get the width */ - if(ISDIGIT(*s)){ - info->fWidth = (int) (*s++ - DIGIT_ZERO); - - while(ISDIGIT(*s)) { - info->fWidth *= 10; - info->fWidth += (int) (*s++ - DIGIT_ZERO); - } - } - - /* Get any modifiers */ - if(ISMOD(*s)) { - switch(*s++) { - - /* short */ - case MOD_H: - info->fIsShort = TRUE; - break; - - /* long or long long */ - case MOD_LOWERL: - if(*s == MOD_LOWERL) { - info->fIsLongLong = TRUE; - /* skip over the next 'l' */ - s++; - } - else - info->fIsLong = TRUE; - break; - - /* long double */ - case MOD_L: - info->fIsLongDouble = TRUE; - break; - } - } - - /* finally, get the specifier letter */ - info->fSpec = *s++; - - /* return # of characters in this specifier */ - return (int32_t)(s - fmt); -} - -#define UP_PERCENT 0x0025 - - -/* ANSI style formatting */ -/* Use US-ASCII characters only for formatting */ - -/* % */ -#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} -/* s */ -#define UFMT_STRING {ufmt_string, u_scanf_string_handler} -/* c */ -#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} -/* d, i */ -#define UFMT_INT {ufmt_int, u_scanf_integer_handler} -/* u */ -#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} -/* o */ -#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} -/* x, X */ -#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} -/* f */ -#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} -/* e, E */ -#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} -/* g, G */ -#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} -/* n */ -#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} -/* [ */ -#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} - -/* non-ANSI extensions */ -/* Use US-ASCII characters only for formatting */ - -/* p */ -#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} -/* V */ -#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} -/* P */ -#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} -/* C K is old format */ -#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} -/* S U is old format */ -#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} - - -#define UFMT_EMPTY {ufmt_empty, NULL} - -/** - * A u_scanf handler function. - * A u_scanf handler is responsible for handling a single u_scanf - * format specification, for example 'd' or 's'. - * @param stream The UFILE to which to write output. - * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing - * information on the format specification. - * @param args A pointer to the argument data - * @param fmt A pointer to the first character in the format string - * following the spec. - * @param fmtConsumed On output, set to the number of characters consumed - * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. - * @param argConverted The number of arguments converted and assigned, or -1 if an - * error occurred. - * @return The number of code points consumed during reading. - */ -typedef int32_t (*u_scanf_handler) (UFILE *stream, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted); - -typedef struct u_scanf_info { - ufmt_type_info info; - u_scanf_handler handler; -} u_scanf_info; - -#define USCANF_NUM_FMT_HANDLERS 108 -#define USCANF_SYMBOL_BUFFER_SIZE 8 - -/* We do not use handlers for 0-0x1f */ -#define USCANF_BASE_FMT_HANDLERS 0x20 - - -static int32_t -u_scanf_skip_leading_ws(UFILE *input, - UChar pad) -{ - UChar c; - int32_t count = 0; - UBool isNotEOF; - - /* skip all leading ws in the input */ - while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (c == pad || u_isWhitespace(c)) ) - { - count++; - } - - /* put the final character back on the input */ - if(isNotEOF) - u_fungetc(c, input); - - return count; -} - -/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ -static int32_t -u_scanf_skip_leading_positive_sign(UFILE *input, - UNumberFormat *format, - UErrorCode *status) -{ - UChar c; - int32_t count = 0; - UBool isNotEOF; - UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; - int32_t symbolLen; - UErrorCode localStatus = U_ZERO_ERROR; - - if (U_SUCCESS(*status)) { - symbolLen = unum_getSymbol(format, - UNUM_PLUS_SIGN_SYMBOL, - plusSymbol, - UPRV_LENGTHOF(plusSymbol), - &localStatus); - - if (U_SUCCESS(localStatus)) { - /* skip all leading ws in the input */ - while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (count < symbolLen && c == plusSymbol[count]) ) - { - count++; - } - - /* put the final character back on the input */ - if(isNotEOF) { - u_fungetc(c, input); - } - } - } - - return count; -} - +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File uscnnf_p.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/unum.h" +#include "unicode/udat.h" +#include "unicode/uset.h" +#include "uscanf.h" +#include "ufmt_cmn.h" +#include "ufile.h" +#include "locbund.h" + +#include "cmemory.h" +#include "ustr_cnv.h" + +/* flag characters for u_scanf */ +#define FLAG_ASTERISK 0x002A +#define FLAG_PAREN 0x0028 + +#define ISFLAG(s) (s) == FLAG_ASTERISK || \ + (s) == FLAG_PAREN + +/* special characters for u_scanf */ +#define SPEC_DOLLARSIGN 0x0024 + +/* unicode digits */ +#define DIGIT_ZERO 0x0030 +#define DIGIT_ONE 0x0031 +#define DIGIT_TWO 0x0032 +#define DIGIT_THREE 0x0033 +#define DIGIT_FOUR 0x0034 +#define DIGIT_FIVE 0x0035 +#define DIGIT_SIX 0x0036 +#define DIGIT_SEVEN 0x0037 +#define DIGIT_EIGHT 0x0038 +#define DIGIT_NINE 0x0039 + +#define ISDIGIT(s) (s) == DIGIT_ZERO || \ + (s) == DIGIT_ONE || \ + (s) == DIGIT_TWO || \ + (s) == DIGIT_THREE || \ + (s) == DIGIT_FOUR || \ + (s) == DIGIT_FIVE || \ + (s) == DIGIT_SIX || \ + (s) == DIGIT_SEVEN || \ + (s) == DIGIT_EIGHT || \ + (s) == DIGIT_NINE + +/* u_scanf modifiers */ +#define MOD_H 0x0068 +#define MOD_LOWERL 0x006C +#define MOD_L 0x004C + +#define ISMOD(s) (s) == MOD_H || \ + (s) == MOD_LOWERL || \ + (s) == MOD_L + +/** + * Struct encapsulating a single uscanf format specification. + */ +typedef struct u_scanf_spec_info { + int32_t fWidth; /* Width */ + + UChar fSpec; /* Format specification */ + + UChar fPadChar; /* Padding character */ + + UBool fSkipArg; /* TRUE if arg should be skipped */ + UBool fIsLongDouble; /* L flag */ + UBool fIsShort; /* h flag */ + UBool fIsLong; /* l flag */ + UBool fIsLongLong; /* ll flag */ + UBool fIsString; /* TRUE if this is a NULL-terminated string. */ +} u_scanf_spec_info; + + +/** + * Struct encapsulating a single u_scanf format specification. + */ +typedef struct u_scanf_spec { + u_scanf_spec_info fInfo; /* Information on this spec */ + int32_t fArgPos; /* Position of data in arg list */ +} u_scanf_spec; + +/** + * Parse a single u_scanf format specifier in Unicode. + * @param fmt A pointer to a '%' character in a u_scanf format specification. + * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed + * format specifier. + * @return The number of characters contained in this specifier. + */ static int32_t -u_scanf_simple_percent_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)info; - (void)args; - (void)fmt; - (void)fmtConsumed; - - /* make sure the next character in the input is a percent */ - *argConverted = 0; - if(u_fgetc(input) != 0x0025) { - *argConverted = -1; - } - return 1; -} - -static int32_t -u_scanf_count_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)input; - (void)fmt; - (void)fmtConsumed; - - /* in the special case of count, the u_scanf_spec_info's width */ - /* will contain the # of items converted thus far */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); - else if (info->fIsLongLong) - *(int64_t*)(args[0].ptrValue) = info->fWidth; - else - *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); - } - *argConverted = 0; - - /* we converted 0 args */ - return 0; -} - -static int32_t -u_scanf_double_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -#define UPRINTF_SYMBOL_BUFFER_SIZE 8 - -static int32_t -u_scanf_scientific_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - int32_t srcLen, expLen; - UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); - - /* handle error */ - if(format == 0) - return 0; - - /* set the appropriate flags on the formatter */ - - srcLen = unum_getSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - srcExpBuf, - sizeof(srcExpBuf), - &status); - - /* Upper/lower case the e */ - if (info->fSpec == (UChar)0x65 /* e */) { - expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - input->str.fBundle.fLocale, - &status); - } - else { - expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - input->str.fBundle.fLocale, - &status); - } - - unum_setSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - expBuf, - expLen, - &status); - - - - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_scidbl_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - double num; - UNumberFormat *scientificFormat, *genericFormat; - /*int32_t scientificResult, genericResult;*/ - double scientificResult, genericResult; - int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; - int32_t skipped; - UErrorCode scientificStatus = U_ZERO_ERROR; - UErrorCode genericStatus = U_ZERO_ERROR; - - - /* since we can't determine by scanning the characters whether */ - /* a number was formatted in the 'f' or 'g' styles, parse the */ - /* string with both formatters, and assume whichever one */ - /* parsed the most is the correct formatter to use */ - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatters */ - scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); - genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(scientificFormat == 0 || genericFormat == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); - - /* parse the number using each format*/ - - scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, - &scientificParsePos, &scientificStatus); - - genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, - &genericParsePos, &genericStatus); - - /* determine which parse made it farther */ - if(scientificParsePos > genericParsePos) { - /* stash the result in num */ - num = scientificResult; - /* update the input's position to reflect consumed data */ - parsePos += scientificParsePos; - } - else { - /* stash the result in num */ - num = genericResult; - /* update the input's position to reflect consumed data */ - parsePos += genericParsePos; - } - input->str.fPos += parsePos; - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_integer_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - void *num = (void*) (args[0].ptrValue); - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - int64_t result; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_uinteger_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - /* TODO Fix this when Numberformat handles uint64_t */ - return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_percent_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - *(double*)(args[0].ptrValue) = num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos; -} - -static int32_t -u_scanf_string_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - const UChar *source; - UConverter *conv; - char *arg = (char*)(args[0].ptrValue); - char *alias = arg; - char *limit; - UErrorCode status = U_ZERO_ERROR; - int32_t count; - int32_t skipped = 0; - UChar c; - UBool isNotEOF = FALSE; - - /* skip all ws in the input */ - if (info->fIsString) { - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - } - - /* get the string one character at a time, truncating to the width */ - count = 0; - - /* open the default converter */ - conv = u_getDefaultConverter(&status); - - if(U_FAILURE(status)) - return -1; - +u_scanf_parse_spec (const UChar *fmt, + u_scanf_spec *spec) +{ + const UChar *s = fmt; + const UChar *backup; + u_scanf_spec_info *info = &(spec->fInfo); + + /* initialize spec to default values */ + spec->fArgPos = -1; + + info->fWidth = -1; + info->fSpec = 0x0000; + info->fPadChar = 0x0020; + info->fSkipArg = FALSE; + info->fIsLongDouble = FALSE; + info->fIsShort = FALSE; + info->fIsLong = FALSE; + info->fIsLongLong = FALSE; + info->fIsString = TRUE; + + + /* skip over the initial '%' */ + s++; + + /* Check for positional argument */ + if(ISDIGIT(*s)) { + + /* Save the current position */ + backup = s; + + /* handle positional parameters */ + if(ISDIGIT(*s)) { + spec->fArgPos = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + spec->fArgPos *= 10; + spec->fArgPos += (int) (*s++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*s != SPEC_DOLLARSIGN) { + spec->fArgPos = -1; + s = backup; + } + /* munge the '$' */ + else + s++; + } + + /* Get any format flags */ + while(ISFLAG(*s)) { + switch(*s++) { + + /* skip argument */ + case FLAG_ASTERISK: + info->fSkipArg = TRUE; + break; + + /* pad character specified */ + case FLAG_PAREN: + + /* first four characters are hex values for pad char */ + info->fPadChar = (UChar)ufmt_digitvalue(*s++); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + + /* final character is ignored */ + s++; + + break; + } + } + + /* Get the width */ + if(ISDIGIT(*s)){ + info->fWidth = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + info->fWidth *= 10; + info->fWidth += (int) (*s++ - DIGIT_ZERO); + } + } + + /* Get any modifiers */ + if(ISMOD(*s)) { + switch(*s++) { + + /* short */ + case MOD_H: + info->fIsShort = TRUE; + break; + + /* long or long long */ + case MOD_LOWERL: + if(*s == MOD_LOWERL) { + info->fIsLongLong = TRUE; + /* skip over the next 'l' */ + s++; + } + else + info->fIsLong = TRUE; + break; + + /* long double */ + case MOD_L: + info->fIsLongDouble = TRUE; + break; + } + } + + /* finally, get the specifier letter */ + info->fSpec = *s++; + + /* return # of characters in this specifier */ + return (int32_t)(s - fmt); +} + +#define UP_PERCENT 0x0025 + + +/* ANSI style formatting */ +/* Use US-ASCII characters only for formatting */ + +/* % */ +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} +/* s */ +#define UFMT_STRING {ufmt_string, u_scanf_string_handler} +/* c */ +#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} +/* d, i */ +#define UFMT_INT {ufmt_int, u_scanf_integer_handler} +/* u */ +#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} +/* o */ +#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} +/* x, X */ +#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} +/* f */ +#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} +/* e, E */ +#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} +/* g, G */ +#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} +/* n */ +#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} +/* [ */ +#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} + +/* non-ANSI extensions */ +/* Use US-ASCII characters only for formatting */ + +/* p */ +#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} +/* V */ +#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} +/* P */ +#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} +/* C K is old format */ +#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} +/* S U is old format */ +#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} + + +#define UFMT_EMPTY {ufmt_empty, NULL} + +/** + * A u_scanf handler function. + * A u_scanf handler is responsible for handling a single u_scanf + * format specification, for example 'd' or 's'. + * @param stream The UFILE to which to write output. + * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing + * information on the format specification. + * @param args A pointer to the argument data + * @param fmt A pointer to the first character in the format string + * following the spec. + * @param fmtConsumed On output, set to the number of characters consumed + * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. + * @param argConverted The number of arguments converted and assigned, or -1 if an + * error occurred. + * @return The number of code points consumed during reading. + */ +typedef int32_t (*u_scanf_handler) (UFILE *stream, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted); + +typedef struct u_scanf_info { + ufmt_type_info info; + u_scanf_handler handler; +} u_scanf_info; + +#define USCANF_NUM_FMT_HANDLERS 108 +#define USCANF_SYMBOL_BUFFER_SIZE 8 + +/* We do not use handlers for 0-0x1f */ +#define USCANF_BASE_FMT_HANDLERS 0x20 + + +static int32_t +u_scanf_skip_leading_ws(UFILE *input, + UChar pad) +{ + UChar c; + int32_t count = 0; + UBool isNotEOF; + + /* skip all leading ws in the input */ + while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (c == pad || u_isWhitespace(c)) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) + u_fungetc(c, input); + + return count; +} + +/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ +static int32_t +u_scanf_skip_leading_positive_sign(UFILE *input, + UNumberFormat *format, + UErrorCode *status) +{ + UChar c; + int32_t count = 0; + UBool isNotEOF; + UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; + int32_t symbolLen; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_SUCCESS(*status)) { + symbolLen = unum_getSymbol(format, + UNUM_PLUS_SIGN_SYMBOL, + plusSymbol, + UPRV_LENGTHOF(plusSymbol), + &localStatus); + + if (U_SUCCESS(localStatus)) { + /* skip all leading ws in the input */ + while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (count < symbolLen && c == plusSymbol[count]) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) { + u_fungetc(c, input); + } + } + } + + return count; +} + +static int32_t +u_scanf_simple_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)info; + (void)args; + (void)fmt; + (void)fmtConsumed; + + /* make sure the next character in the input is a percent */ + *argConverted = 0; + if(u_fgetc(input) != 0x0025) { + *argConverted = -1; + } + return 1; +} + +static int32_t +u_scanf_count_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)input; + (void)fmt; + (void)fmtConsumed; + + /* in the special case of count, the u_scanf_spec_info's width */ + /* will contain the # of items converted thus far */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); + else if (info->fIsLongLong) + *(int64_t*)(args[0].ptrValue) = info->fWidth; + else + *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); + } + *argConverted = 0; + + /* we converted 0 args */ + return 0; +} + +static int32_t +u_scanf_double_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +#define UPRINTF_SYMBOL_BUFFER_SIZE 8 + +static int32_t +u_scanf_scientific_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t srcLen, expLen; + UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + srcLen = unum_getSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + sizeof(srcExpBuf), + &status); + + /* Upper/lower case the e */ + if (info->fSpec == (UChar)0x65 /* e */) { + expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + else { + expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + + unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + expBuf, + expLen, + &status); + + + + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_scidbl_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *scientificFormat, *genericFormat; + /*int32_t scientificResult, genericResult;*/ + double scientificResult, genericResult; + int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; + int32_t skipped; + UErrorCode scientificStatus = U_ZERO_ERROR; + UErrorCode genericStatus = U_ZERO_ERROR; + + + /* since we can't determine by scanning the characters whether */ + /* a number was formatted in the 'f' or 'g' styles, parse the */ + /* string with both formatters, and assume whichever one */ + /* parsed the most is the correct formatter to use */ + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatters */ + scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(scientificFormat == 0 || genericFormat == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); + + /* parse the number using each format*/ + + scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, + &scientificParsePos, &scientificStatus); + + genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, + &genericParsePos, &genericStatus); + + /* determine which parse made it farther */ + if(scientificParsePos > genericParsePos) { + /* stash the result in num */ + num = scientificResult; + /* update the input's position to reflect consumed data */ + parsePos += scientificParsePos; + } + else { + /* stash the result in num */ + num = genericResult; + /* update the input's position to reflect consumed data */ + parsePos += genericParsePos; + } + input->str.fPos += parsePos; + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_integer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + void *num = (void*) (args[0].ptrValue); + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + int64_t result; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_uinteger_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + /* TODO Fix this when Numberformat handles uint64_t */ + return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos; +} + +static int32_t +u_scanf_string_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + const UChar *source; + UConverter *conv; + char *arg = (char*)(args[0].ptrValue); + char *alias = arg; + char *limit; + UErrorCode status = U_ZERO_ERROR; + int32_t count; + int32_t skipped = 0; + UChar c; + UBool isNotEOF = FALSE; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + + /* open the default converter */ + conv = u_getDefaultConverter(&status); + + if(U_FAILURE(status)) + return -1; + + while( (info->fWidth == -1 || count < info->fWidth) + && ((isNotEOF = ufile_getch(input, &c)) == TRUE) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + if (!info->fSkipArg) { + /* put the character from the input onto the target */ + source = &c; + /* Since we do this one character at a time, do it this way. */ + if (info->fWidth > 0) { + limit = alias + info->fWidth - count; + } + else { + limit = alias + ucnv_getMaxCharSize(conv); + } + + /* convert the character to the default codepage */ + ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, + NULL, TRUE, &status); + + if(U_FAILURE(status)) { + /* clean up */ + u_releaseDefaultConverter(conv); + return -1; + } + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) + u_fungetc(c, input); + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x00; + } + } + + /* clean up */ + u_releaseDefaultConverter(conv); + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_char_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = FALSE; + return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_ustring_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + UChar *arg = (UChar*)(args[0].ptrValue); + UChar *alias = arg; + int32_t count; + int32_t skipped = 0; + UChar c; + UBool isNotEOF = FALSE; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + while( (info->fWidth == -1 || count < info->fWidth) - && ((isNotEOF = ufile_getch(input, &c)) == TRUE) - && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) - { - - if (!info->fSkipArg) { - /* put the character from the input onto the target */ - source = &c; - /* Since we do this one character at a time, do it this way. */ - if (info->fWidth > 0) { - limit = alias + info->fWidth - count; - } - else { - limit = alias + ucnv_getMaxCharSize(conv); - } - - /* convert the character to the default codepage */ - ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, - NULL, TRUE, &status); - - if(U_FAILURE(status)) { - /* clean up */ - u_releaseDefaultConverter(conv); - return -1; - } - } - - /* increment the count */ - ++count; - } - - /* put the final character we read back on the input */ - if (!info->fSkipArg) { - if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) - u_fungetc(c, input); - - /* add the terminator */ - if (info->fIsString) { - *alias = 0x00; - } - } - - /* clean up */ - u_releaseDefaultConverter(conv); - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return count + skipped; -} - -static int32_t -u_scanf_char_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - if (info->fWidth < 0) { - info->fWidth = 1; - } - info->fIsString = FALSE; - return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_ustring_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - UChar *arg = (UChar*)(args[0].ptrValue); - UChar *alias = arg; - int32_t count; - int32_t skipped = 0; - UChar c; - UBool isNotEOF = FALSE; - - /* skip all ws in the input */ - if (info->fIsString) { - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - } - - /* get the string one character at a time, truncating to the width */ - count = 0; - - while( (info->fWidth == -1 || count < info->fWidth) - && ((isNotEOF = ufile_getch(input, &c)) == TRUE) - && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) - { - - /* put the character from the input onto the target */ - if (!info->fSkipArg) { - *alias++ = c; - } - - /* increment the count */ - ++count; - } - - /* put the final character we read back on the input */ - if (!info->fSkipArg) { - if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { - u_fungetc(c, input); - } - - /* add the terminator */ - if (info->fIsString) { - *alias = 0x0000; - } - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return count + skipped; -} - -static int32_t -u_scanf_uchar_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - if (info->fWidth < 0) { - info->fWidth = 1; - } - info->fIsString = FALSE; - return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_spellout_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - /* This is not applicable to RBNF. */ - /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - *(double*)(args[0].ptrValue) = num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_hex_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - int32_t skipped; - void *num = (void*) (args[0].ptrValue); - int64_t result; - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* check for alternate form */ - if( *(input->str.fPos) == 0x0030 && - (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { - - /* skip the '0' and 'x' or 'X' if present */ - input->str.fPos += 2; - len -= 2; - } - - /* parse the number */ - result = ufmt_uto64(input->str.fPos, &len, 16); - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_octal_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - int32_t skipped; - void *num = (void*) (args[0].ptrValue); - int64_t result; - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* parse the number */ - result = ufmt_uto64(input->str.fPos, &len, 8); - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_pointer_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - (void)fmt; - (void)fmtConsumed; - - int32_t len; - int32_t skipped; - void *result; - void **p = (void**)(args[0].ptrValue); - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) { - len = ufmt_min(len, info->fWidth); - } - - /* Make sure that we don't consume too much */ - if (len > (int32_t)(sizeof(void*)*2)) { - len = (int32_t)(sizeof(void*)*2); - } - - /* parse the pointer - assign to temporary value */ - result = ufmt_utop(input->str.fPos, &len); - - if (!info->fSkipArg) { - *p = result; - } - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_scanset_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - USet *scanset; - UErrorCode status = U_ZERO_ERROR; - int32_t chLeft = INT32_MAX; - UChar32 c; - UChar *alias = (UChar*) (args[0].ptrValue); - UBool isNotEOF = FALSE; - UBool readCharacter = FALSE; - - /* Create an empty set */ - scanset = uset_open(0, -1); - - /* Back up one to get the [ */ - fmt--; - - /* truncate to the width, if specified and alias the target */ - if(info->fWidth >= 0) { - chLeft = info->fWidth; - } - - /* parse the scanset from the fmt string */ - *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); - - /* verify that the parse was successful */ - if (U_SUCCESS(status)) { - c=0; - - /* grab characters one at a time and make sure they are in the scanset */ - while(chLeft > 0) { - if ( ((isNotEOF = ufile_getch32(input, &c)) == TRUE) && uset_contains(scanset, c) ) { - readCharacter = TRUE; - if (!info->fSkipArg) { - int32_t idx = 0; - UBool isError = FALSE; - - U16_APPEND(alias, idx, chLeft, c, isError); - if (isError) { - break; - } - alias += idx; - } - chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); - } - else { - /* if the character's not in the scanset, break out */ - break; - } - } - - /* put the final character we read back on the input */ - if(isNotEOF && chLeft > 0) { - u_fungetc(c, input); - } - } - - uset_close(scanset); - - /* if we didn't match at least 1 character, fail */ - if(!readCharacter) - return -1; - /* otherwise, add the terminator */ - else if (!info->fSkipArg) { - *alias = 0x00; - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; -} - -/* Use US-ASCII characters only for formatting. Most codepages have - characters 20-7F from Unicode. Using any other codepage specific - characters will make it very difficult to format the string on - non-Unicode machines */ -static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { -/* 0x20 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x30 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x40 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, - UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -#endif - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x50 */ - UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, -#endif - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x60 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, - UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, - UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, - -/* 0x70 */ - UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, - UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -}; - -U_CFUNC int32_t -u_scanf_parse(UFILE *f, - const UChar *patternSpecification, - va_list ap) -{ - const UChar *alias; - int32_t count, converted, argConsumed, cpConsumed; - uint16_t handlerNum; - - ufmt_args args; - u_scanf_spec spec; - ufmt_type_info info; - u_scanf_handler handler; - - /* alias the pattern */ - alias = patternSpecification; - - /* haven't converted anything yet */ - argConsumed = 0; - converted = 0; - cpConsumed = 0; - - /* iterate through the pattern */ - for(;;) { - - /* match any characters up to the next '%' */ - while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { - alias++; - } - - /* if we aren't at a '%', or if we're at end of string, break*/ - if(*alias != UP_PERCENT || *alias == 0x0000) - break; - - /* parse the specifier */ - count = u_scanf_parse_spec(alias, &spec); - - /* update the pointer in pattern */ - alias += count; - - handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); - if (handlerNum < USCANF_NUM_FMT_HANDLERS) { - /* skip the argument, if necessary */ - /* query the info function for argument information */ - info = g_u_scanf_infos[ handlerNum ].info; - if (info != ufmt_count && u_feof(f)) { - break; - } - else if(spec.fInfo.fSkipArg) { - args.ptrValue = NULL; - } - else { - switch(info) { - case ufmt_count: - /* set the spec's width to the # of items converted */ - spec.fInfo.fWidth = cpConsumed; - U_FALLTHROUGH; - case ufmt_char: - case ufmt_uchar: - case ufmt_int: - case ufmt_string: - case ufmt_ustring: - case ufmt_pointer: - case ufmt_float: - case ufmt_double: - args.ptrValue = va_arg(ap, void*); - break; - - default: - /* else args is ignored */ - args.ptrValue = NULL; - break; - } - } - - /* call the handler function */ - handler = g_u_scanf_infos[ handlerNum ].handler; - if(handler != 0) { - - /* reset count to 1 so that += for alias works. */ - count = 1; - - cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); - - /* if the handler encountered an error condition, break */ - if(argConsumed < 0) { - converted = -1; - break; - } - - /* add to the # of items converted */ - converted += argConsumed; - - /* update the pointer in pattern */ - alias += count-1; - } - /* else do nothing */ - } - /* else do nothing */ - - /* just ignore unknown tags */ - } - - /* return # of items converted */ - return converted; -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ + && ((isNotEOF = ufile_getch(input, &c)) == TRUE) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + /* put the character from the input onto the target */ + if (!info->fSkipArg) { + *alias++ = c; + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { + u_fungetc(c, input); + } + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x0000; + } + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_uchar_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = FALSE; + return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_spellout_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + /* This is not applicable to RBNF. */ + /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_hex_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* check for alternate form */ + if( *(input->str.fPos) == 0x0030 && + (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { + + /* skip the '0' and 'x' or 'X' if present */ + input->str.fPos += 2; + len -= 2; + } + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 16); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_octal_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 8); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_pointer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *result; + void **p = (void**)(args[0].ptrValue); + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) { + len = ufmt_min(len, info->fWidth); + } + + /* Make sure that we don't consume too much */ + if (len > (int32_t)(sizeof(void*)*2)) { + len = (int32_t)(sizeof(void*)*2); + } + + /* parse the pointer - assign to temporary value */ + result = ufmt_utop(input->str.fPos, &len); + + if (!info->fSkipArg) { + *p = result; + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_scanset_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + USet *scanset; + UErrorCode status = U_ZERO_ERROR; + int32_t chLeft = INT32_MAX; + UChar32 c; + UChar *alias = (UChar*) (args[0].ptrValue); + UBool isNotEOF = FALSE; + UBool readCharacter = FALSE; + + /* Create an empty set */ + scanset = uset_open(0, -1); + + /* Back up one to get the [ */ + fmt--; + + /* truncate to the width, if specified and alias the target */ + if(info->fWidth >= 0) { + chLeft = info->fWidth; + } + + /* parse the scanset from the fmt string */ + *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); + + /* verify that the parse was successful */ + if (U_SUCCESS(status)) { + c=0; + + /* grab characters one at a time and make sure they are in the scanset */ + while(chLeft > 0) { + if ( ((isNotEOF = ufile_getch32(input, &c)) == TRUE) && uset_contains(scanset, c) ) { + readCharacter = TRUE; + if (!info->fSkipArg) { + int32_t idx = 0; + UBool isError = FALSE; + + U16_APPEND(alias, idx, chLeft, c, isError); + if (isError) { + break; + } + alias += idx; + } + chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); + } + else { + /* if the character's not in the scanset, break out */ + break; + } + } + + /* put the final character we read back on the input */ + if(isNotEOF && chLeft > 0) { + u_fungetc(c, input); + } + } + + uset_close(scanset); + + /* if we didn't match at least 1 character, fail */ + if(!readCharacter) + return -1; + /* otherwise, add the terminator */ + else if (!info->fSkipArg) { + *alias = 0x00; + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; +} + +/* Use US-ASCII characters only for formatting. Most codepages have + characters 20-7F from Unicode. Using any other codepage specific + characters will make it very difficult to format the string on + non-Unicode machines */ +static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { +/* 0x20 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x30 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x40 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, + UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +#endif + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x50 */ + UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, +#endif + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x60 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, + UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, + UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, + +/* 0x70 */ + UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, + UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +}; + +U_CFUNC int32_t +u_scanf_parse(UFILE *f, + const UChar *patternSpecification, + va_list ap) +{ + const UChar *alias; + int32_t count, converted, argConsumed, cpConsumed; + uint16_t handlerNum; + + ufmt_args args; + u_scanf_spec spec; + ufmt_type_info info; + u_scanf_handler handler; + + /* alias the pattern */ + alias = patternSpecification; + + /* haven't converted anything yet */ + argConsumed = 0; + converted = 0; + cpConsumed = 0; + + /* iterate through the pattern */ + for(;;) { + + /* match any characters up to the next '%' */ + while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { + alias++; + } + + /* if we aren't at a '%', or if we're at end of string, break*/ + if(*alias != UP_PERCENT || *alias == 0x0000) + break; + + /* parse the specifier */ + count = u_scanf_parse_spec(alias, &spec); + + /* update the pointer in pattern */ + alias += count; + + handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); + if (handlerNum < USCANF_NUM_FMT_HANDLERS) { + /* skip the argument, if necessary */ + /* query the info function for argument information */ + info = g_u_scanf_infos[ handlerNum ].info; + if (info != ufmt_count && u_feof(f)) { + break; + } + else if(spec.fInfo.fSkipArg) { + args.ptrValue = NULL; + } + else { + switch(info) { + case ufmt_count: + /* set the spec's width to the # of items converted */ + spec.fInfo.fWidth = cpConsumed; + U_FALLTHROUGH; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + case ufmt_float: + case ufmt_double: + args.ptrValue = va_arg(ap, void*); + break; + + default: + /* else args is ignored */ + args.ptrValue = NULL; + break; + } + } + + /* call the handler function */ + handler = g_u_scanf_infos[ handlerNum ].handler; + if(handler != 0) { + + /* reset count to 1 so that += for alias works. */ + count = 1; + + cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); + + /* if the handler encountered an error condition, break */ + if(argConsumed < 0) { + converted = -1; + break; + } + + /* add to the # of items converted */ + converted += argConsumed; + + /* update the pointer in pattern */ + alias += count-1; + } + /* else do nothing */ + } + /* else do nothing */ + + /* just ignore unknown tags */ + } + + /* return # of items converted */ + return converted; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ |