diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
commit | 1312621288956f199a5bd5342b0133d4395fa725 (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/propsvec.cpp | |
parent | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff) | |
download | ydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/propsvec.cpp')
-rw-r--r-- | contrib/libs/icu/common/propsvec.cpp | 1058 |
1 files changed, 529 insertions, 529 deletions
diff --git a/contrib/libs/icu/common/propsvec.cpp b/contrib/libs/icu/common/propsvec.cpp index ff0010eea5..056fcda9cf 100644 --- a/contrib/libs/icu/common/propsvec.cpp +++ b/contrib/libs/icu/common/propsvec.cpp @@ -1,529 +1,529 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: propsvec.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb22 -* created by: Markus W. Scherer -* -* Store bits (Unicode character properties) in bit set vectors. -*/ - -#include <stdlib.h> -#include "unicode/utypes.h" -#include "cmemory.h" -#include "utrie.h" -#include "utrie2.h" -#include "uarrsort.h" -#include "propsvec.h" -#include "uassert.h" - -struct UPropsVectors { - uint32_t *v; - int32_t columns; /* number of columns, plus two for start & limit values */ - int32_t maxRows; - int32_t rows; - int32_t prevRow; /* search optimization: remember last row seen */ - UBool isCompacted; -}; - -#define UPVEC_INITIAL_ROWS (1<<12) -#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) -#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) - -U_CAPI UPropsVectors * U_EXPORT2 -upvec_open(int32_t columns, UErrorCode *pErrorCode) { - UPropsVectors *pv; - uint32_t *v, *row; - uint32_t cp; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(columns<1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - columns+=2; /* count range start and limit columns */ - - pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); - v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); - if(pv==NULL || v==NULL) { - uprv_free(pv); - uprv_free(v); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(pv, 0, sizeof(UPropsVectors)); - pv->v=v; - pv->columns=columns; - pv->maxRows=UPVEC_INITIAL_ROWS; - pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); - - /* set the all-Unicode row and the special-value rows */ - row=pv->v; - uprv_memset(row, 0, pv->rows*columns*4); - row[0]=0; - row[1]=0x110000; - row+=columns; - for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { - row[0]=cp; - row[1]=cp+1; - row+=columns; - } - return pv; -} - -U_CAPI void U_EXPORT2 -upvec_close(UPropsVectors *pv) { - if(pv!=NULL) { - uprv_free(pv->v); - uprv_free(pv); - } -} - -static uint32_t * -_findRow(UPropsVectors *pv, UChar32 rangeStart) { - uint32_t *row; - int32_t columns, i, start, limit, prevRow; - - columns=pv->columns; - limit=pv->rows; - prevRow=pv->prevRow; - - /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ - row=pv->v+prevRow*columns; - if(rangeStart>=(UChar32)row[0]) { - if(rangeStart<(UChar32)row[1]) { - /* same row as last seen */ - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* next row after the last one */ - pv->prevRow=prevRow+1; - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* second row after the last one */ - pv->prevRow=prevRow+2; - return row; - } else if((rangeStart-(UChar32)row[1])<10) { - /* we are close, continue looping */ - prevRow+=2; - do { - ++prevRow; - row+=columns; - } while(rangeStart>=(UChar32)row[1]); - pv->prevRow=prevRow; - return row; - } - } else if(rangeStart<(UChar32)pv->v[1]) { - /* the very first row */ - pv->prevRow=0; - return pv->v; - } - - /* do a binary search for the start of the range */ - start=0; - while(start<limit-1) { - i=(start+limit)/2; - row=pv->v+i*columns; - if(rangeStart<(UChar32)row[0]) { - limit=i; - } else if(rangeStart<(UChar32)row[1]) { - pv->prevRow=i; - return row; - } else { - start=i; - } - } - - /* must be found because all ranges together always cover all of Unicode */ - pv->prevRow=start; - return pv->v+start*columns; -} - -U_CAPI void U_EXPORT2 -upvec_setValue(UPropsVectors *pv, - UChar32 start, UChar32 end, - int32_t column, - uint32_t value, uint32_t mask, - UErrorCode *pErrorCode) { - uint32_t *firstRow, *lastRow; - int32_t columns; - UChar32 limit; - UBool splitFirstRow, splitLastRow; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if( pv==NULL || - start<0 || start>end || end>UPVEC_MAX_CP || - column<0 || column>=(pv->columns-2) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - *pErrorCode=U_NO_WRITE_PERMISSION; - return; - } - limit=end+1; - - /* initialize */ - columns=pv->columns; - column+=2; /* skip range start and limit columns */ - value&=mask; - - /* find the rows whose ranges overlap with the input range */ - - /* find the first and last rows, always successful */ - firstRow=_findRow(pv, start); - lastRow=_findRow(pv, end); - - /* - * Rows need to be split if they partially overlap with the - * input range (only possible for the first and last rows) - * and if their value differs from the input value. - */ - splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); - splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); - - /* split first/last rows if necessary */ - if(splitFirstRow || splitLastRow) { - int32_t count, rows; - - rows=pv->rows; - if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { - uint32_t *newVectors; - int32_t newMaxRows; - - if(pv->maxRows<UPVEC_MEDIUM_ROWS) { - newMaxRows=UPVEC_MEDIUM_ROWS; - } else if(pv->maxRows<UPVEC_MAX_ROWS) { - newMaxRows=UPVEC_MAX_ROWS; - } else { - /* Implementation bug, or UPVEC_MAX_ROWS too low. */ - *pErrorCode=U_INTERNAL_PROGRAM_ERROR; - return; - } - newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); - if(newVectors==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4); - firstRow=newVectors+(firstRow-pv->v); - lastRow=newVectors+(lastRow-pv->v); - uprv_free(pv->v); - pv->v=newVectors; - pv->maxRows=newMaxRows; - } - - /* count the number of row cells to move after the last row, and move them */ - count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); - if(count>0) { - uprv_memmove( - lastRow+(1+splitFirstRow+splitLastRow)*columns, - lastRow+columns, - count*4); - } - pv->rows=rows+splitFirstRow+splitLastRow; - - /* split the first row, and move the firstRow pointer to the second part */ - if(splitFirstRow) { - /* copy all affected rows up one and move the lastRow pointer */ - count = (int32_t)((lastRow-firstRow)+columns); - uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); - lastRow+=columns; - - /* split the range and move the firstRow pointer */ - firstRow[1]=firstRow[columns]=(uint32_t)start; - firstRow+=columns; - } - - /* split the last row */ - if(splitLastRow) { - /* copy the last row data */ - uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); - - /* split the range and move the firstRow pointer */ - lastRow[1]=lastRow[columns]=(uint32_t)limit; - } - } - - /* set the "row last seen" to the last row for the range */ - pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); - - /* set the input value in all remaining rows */ - firstRow+=column; - lastRow+=column; - mask=~mask; - for(;;) { - *firstRow=(*firstRow&mask)|value; - if(firstRow==lastRow) { - break; - } - firstRow+=columns; - } -} - -U_CAPI uint32_t U_EXPORT2 -upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { - uint32_t *row; - UPropsVectors *ncpv; - - if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { - return 0; - } - ncpv=(UPropsVectors *)pv; - row=_findRow(ncpv, c); - return row[2+column]; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, - UChar32 *pRangeStart, UChar32 *pRangeEnd) { - uint32_t *row; - int32_t columns; - - if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { - return NULL; - } - - columns=pv->columns; - row=pv->v+rowIndex*columns; - if(pRangeStart!=NULL) { - *pRangeStart=(UChar32)row[0]; - } - if(pRangeEnd!=NULL) { - *pRangeEnd=(UChar32)row[1]-1; - } - return row+2; -} - -static int32_t U_CALLCONV -upvec_compareRows(const void *context, const void *l, const void *r) { - const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; - const UPropsVectors *pv=(const UPropsVectors *)context; - int32_t i, count, columns; - - count=columns=pv->columns; /* includes start/limit columns */ - - /* start comparing after start/limit but wrap around to them */ - i=2; - do { - if(left[i]!=right[i]) { - return left[i]<right[i] ? -1 : 1; - } - if(++i==columns) { - i=0; - } - } while(--count>0); - - return 0; -} - -U_CAPI void U_EXPORT2 -upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { - uint32_t *row; - int32_t i, columns, valueColumns, rows, count; - UChar32 start, limit; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if(handler==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - return; - } - - /* Set the flag now: Sorting and compacting destroys the builder data structure. */ - pv->isCompacted=TRUE; - - rows=pv->rows; - columns=pv->columns; - U_ASSERT(columns>=3); /* upvec_open asserts this */ - valueColumns=columns-2; /* not counting start & limit */ - - /* sort the properties vectors to find unique vector values */ - uprv_sortArray(pv->v, rows, columns*4, - upvec_compareRows, pv, FALSE, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Find and set the special values. - * This has to do almost the same work as the compaction below, - * to find the indexes where the special-value rows will move. - */ - row=pv->v; - count=-valueColumns; - for(i=0; i<rows; ++i) { - start=(UChar32)row[0]; - - /* count a new values vector if it is different from the current one */ - if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { - count+=valueColumns; - } - - if(start>=UPVEC_FIRST_SPECIAL_CP) { - handler(context, start, start, count, row+2, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add valueColumns to include that last vector */ - count+=valueColumns; - - /* Call the handler once more to signal the start of delivering real values. */ - handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, - count, row-valueColumns, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Move vector contents up to a contiguous array with only unique - * vector values, and call the handler function for each vector. - * - * This destroys the Properties Vector structure and replaces it - * with an array of just vector values. - */ - row=pv->v; - count=-valueColumns; - for(i=0; i<rows; ++i) { - /* fetch these first before memmove() may overwrite them */ - start=(UChar32)row[0]; - limit=(UChar32)row[1]; - - /* add a new values vector if it is different from the current one */ - if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { - count+=valueColumns; - uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); - } - - if(start<UPVEC_FIRST_SPECIAL_CP) { - handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add one to include that last vector */ - pv->rows=count/valueColumns+1; -} - -U_CAPI const uint32_t * U_EXPORT2 -upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { - if(!pv->isCompacted) { - return NULL; - } - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return pv->v; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_cloneArray(const UPropsVectors *pv, - int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { - uint32_t *clonedArray; - int32_t byteLength; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(!pv->isCompacted) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - byteLength=pv->rows*(pv->columns-2)*4; - clonedArray=(uint32_t *)uprv_malloc(byteLength); - if(clonedArray==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(clonedArray, pv->v, byteLength); - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return clonedArray; -} - -U_CAPI UTrie2 * U_EXPORT2 -upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { - UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; - upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); - utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - utrie2_close(toUTrie2.trie); - toUTrie2.trie=NULL; - } - return toUTrie2.trie; -} - -/* - * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts - * some 16-bit field and builds and returns a UTrie2. - */ - -U_CAPI void U_CALLCONV -upvec_compactToUTrie2Handler(void *context, - UChar32 start, UChar32 end, - int32_t rowIndex, uint32_t *row, int32_t columns, - UErrorCode *pErrorCode) { - (void)row; - (void)columns; - UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; - if(start<UPVEC_FIRST_SPECIAL_CP) { - utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); - } else { - switch(start) { - case UPVEC_INITIAL_VALUE_CP: - toUTrie2->initialValue=rowIndex; - break; - case UPVEC_ERROR_VALUE_CP: - toUTrie2->errorValue=rowIndex; - break; - case UPVEC_START_REAL_VALUES_CP: - toUTrie2->maxValue=rowIndex; - if(rowIndex>0xffff) { - /* too many rows for a 16-bit trie */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - toUTrie2->trie=utrie2_open(toUTrie2->initialValue, - toUTrie2->errorValue, pErrorCode); - } - break; - default: - break; - } - } -} +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: propsvec.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb22 +* created by: Markus W. Scherer +* +* Store bits (Unicode character properties) in bit set vectors. +*/ + +#include <stdlib.h> +#include "unicode/utypes.h" +#include "cmemory.h" +#include "utrie.h" +#include "utrie2.h" +#include "uarrsort.h" +#include "propsvec.h" +#include "uassert.h" + +struct UPropsVectors { + uint32_t *v; + int32_t columns; /* number of columns, plus two for start & limit values */ + int32_t maxRows; + int32_t rows; + int32_t prevRow; /* search optimization: remember last row seen */ + UBool isCompacted; +}; + +#define UPVEC_INITIAL_ROWS (1<<12) +#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) +#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) + +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode) { + UPropsVectors *pv; + uint32_t *v, *row; + uint32_t cp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(columns<1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + columns+=2; /* count range start and limit columns */ + + pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); + v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); + if(pv==NULL || v==NULL) { + uprv_free(pv); + uprv_free(v); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(pv, 0, sizeof(UPropsVectors)); + pv->v=v; + pv->columns=columns; + pv->maxRows=UPVEC_INITIAL_ROWS; + pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); + + /* set the all-Unicode row and the special-value rows */ + row=pv->v; + uprv_memset(row, 0, pv->rows*columns*4); + row[0]=0; + row[1]=0x110000; + row+=columns; + for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { + row[0]=cp; + row[1]=cp+1; + row+=columns; + } + return pv; +} + +U_CAPI void U_EXPORT2 +upvec_close(UPropsVectors *pv) { + if(pv!=NULL) { + uprv_free(pv->v); + uprv_free(pv); + } +} + +static uint32_t * +_findRow(UPropsVectors *pv, UChar32 rangeStart) { + uint32_t *row; + int32_t columns, i, start, limit, prevRow; + + columns=pv->columns; + limit=pv->rows; + prevRow=pv->prevRow; + + /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ + row=pv->v+prevRow*columns; + if(rangeStart>=(UChar32)row[0]) { + if(rangeStart<(UChar32)row[1]) { + /* same row as last seen */ + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* next row after the last one */ + pv->prevRow=prevRow+1; + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* second row after the last one */ + pv->prevRow=prevRow+2; + return row; + } else if((rangeStart-(UChar32)row[1])<10) { + /* we are close, continue looping */ + prevRow+=2; + do { + ++prevRow; + row+=columns; + } while(rangeStart>=(UChar32)row[1]); + pv->prevRow=prevRow; + return row; + } + } else if(rangeStart<(UChar32)pv->v[1]) { + /* the very first row */ + pv->prevRow=0; + return pv->v; + } + + /* do a binary search for the start of the range */ + start=0; + while(start<limit-1) { + i=(start+limit)/2; + row=pv->v+i*columns; + if(rangeStart<(UChar32)row[0]) { + limit=i; + } else if(rangeStart<(UChar32)row[1]) { + pv->prevRow=i; + return row; + } else { + start=i; + } + } + + /* must be found because all ranges together always cover all of Unicode */ + pv->prevRow=start; + return pv->v+start*columns; +} + +U_CAPI void U_EXPORT2 +upvec_setValue(UPropsVectors *pv, + UChar32 start, UChar32 end, + int32_t column, + uint32_t value, uint32_t mask, + UErrorCode *pErrorCode) { + uint32_t *firstRow, *lastRow; + int32_t columns; + UChar32 limit; + UBool splitFirstRow, splitLastRow; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if( pv==NULL || + start<0 || start>end || end>UPVEC_MAX_CP || + column<0 || column>=(pv->columns-2) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } + limit=end+1; + + /* initialize */ + columns=pv->columns; + column+=2; /* skip range start and limit columns */ + value&=mask; + + /* find the rows whose ranges overlap with the input range */ + + /* find the first and last rows, always successful */ + firstRow=_findRow(pv, start); + lastRow=_findRow(pv, end); + + /* + * Rows need to be split if they partially overlap with the + * input range (only possible for the first and last rows) + * and if their value differs from the input value. + */ + splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); + splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); + + /* split first/last rows if necessary */ + if(splitFirstRow || splitLastRow) { + int32_t count, rows; + + rows=pv->rows; + if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { + uint32_t *newVectors; + int32_t newMaxRows; + + if(pv->maxRows<UPVEC_MEDIUM_ROWS) { + newMaxRows=UPVEC_MEDIUM_ROWS; + } else if(pv->maxRows<UPVEC_MAX_ROWS) { + newMaxRows=UPVEC_MAX_ROWS; + } else { + /* Implementation bug, or UPVEC_MAX_ROWS too low. */ + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); + if(newVectors==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4); + firstRow=newVectors+(firstRow-pv->v); + lastRow=newVectors+(lastRow-pv->v); + uprv_free(pv->v); + pv->v=newVectors; + pv->maxRows=newMaxRows; + } + + /* count the number of row cells to move after the last row, and move them */ + count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); + if(count>0) { + uprv_memmove( + lastRow+(1+splitFirstRow+splitLastRow)*columns, + lastRow+columns, + count*4); + } + pv->rows=rows+splitFirstRow+splitLastRow; + + /* split the first row, and move the firstRow pointer to the second part */ + if(splitFirstRow) { + /* copy all affected rows up one and move the lastRow pointer */ + count = (int32_t)((lastRow-firstRow)+columns); + uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); + lastRow+=columns; + + /* split the range and move the firstRow pointer */ + firstRow[1]=firstRow[columns]=(uint32_t)start; + firstRow+=columns; + } + + /* split the last row */ + if(splitLastRow) { + /* copy the last row data */ + uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); + + /* split the range and move the firstRow pointer */ + lastRow[1]=lastRow[columns]=(uint32_t)limit; + } + } + + /* set the "row last seen" to the last row for the range */ + pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); + + /* set the input value in all remaining rows */ + firstRow+=column; + lastRow+=column; + mask=~mask; + for(;;) { + *firstRow=(*firstRow&mask)|value; + if(firstRow==lastRow) { + break; + } + firstRow+=columns; + } +} + +U_CAPI uint32_t U_EXPORT2 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { + uint32_t *row; + UPropsVectors *ncpv; + + if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { + return 0; + } + ncpv=(UPropsVectors *)pv; + row=_findRow(ncpv, c); + return row[2+column]; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, + UChar32 *pRangeStart, UChar32 *pRangeEnd) { + uint32_t *row; + int32_t columns; + + if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { + return NULL; + } + + columns=pv->columns; + row=pv->v+rowIndex*columns; + if(pRangeStart!=NULL) { + *pRangeStart=(UChar32)row[0]; + } + if(pRangeEnd!=NULL) { + *pRangeEnd=(UChar32)row[1]-1; + } + return row+2; +} + +static int32_t U_CALLCONV +upvec_compareRows(const void *context, const void *l, const void *r) { + const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; + const UPropsVectors *pv=(const UPropsVectors *)context; + int32_t i, count, columns; + + count=columns=pv->columns; /* includes start/limit columns */ + + /* start comparing after start/limit but wrap around to them */ + i=2; + do { + if(left[i]!=right[i]) { + return left[i]<right[i] ? -1 : 1; + } + if(++i==columns) { + i=0; + } + } while(--count>0); + + return 0; +} + +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { + uint32_t *row; + int32_t i, columns, valueColumns, rows, count; + UChar32 start, limit; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if(handler==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + return; + } + + /* Set the flag now: Sorting and compacting destroys the builder data structure. */ + pv->isCompacted=TRUE; + + rows=pv->rows; + columns=pv->columns; + U_ASSERT(columns>=3); /* upvec_open asserts this */ + valueColumns=columns-2; /* not counting start & limit */ + + /* sort the properties vectors to find unique vector values */ + uprv_sortArray(pv->v, rows, columns*4, + upvec_compareRows, pv, FALSE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Find and set the special values. + * This has to do almost the same work as the compaction below, + * to find the indexes where the special-value rows will move. + */ + row=pv->v; + count=-valueColumns; + for(i=0; i<rows; ++i) { + start=(UChar32)row[0]; + + /* count a new values vector if it is different from the current one */ + if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { + count+=valueColumns; + } + + if(start>=UPVEC_FIRST_SPECIAL_CP) { + handler(context, start, start, count, row+2, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add valueColumns to include that last vector */ + count+=valueColumns; + + /* Call the handler once more to signal the start of delivering real values. */ + handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, + count, row-valueColumns, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Move vector contents up to a contiguous array with only unique + * vector values, and call the handler function for each vector. + * + * This destroys the Properties Vector structure and replaces it + * with an array of just vector values. + */ + row=pv->v; + count=-valueColumns; + for(i=0; i<rows; ++i) { + /* fetch these first before memmove() may overwrite them */ + start=(UChar32)row[0]; + limit=(UChar32)row[1]; + + /* add a new values vector if it is different from the current one */ + if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { + count+=valueColumns; + uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); + } + + if(start<UPVEC_FIRST_SPECIAL_CP) { + handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add one to include that last vector */ + pv->rows=count/valueColumns+1; +} + +U_CAPI const uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { + if(!pv->isCompacted) { + return NULL; + } + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return pv->v; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_cloneArray(const UPropsVectors *pv, + int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { + uint32_t *clonedArray; + int32_t byteLength; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(!pv->isCompacted) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + byteLength=pv->rows*(pv->columns-2)*4; + clonedArray=(uint32_t *)uprv_malloc(byteLength); + if(clonedArray==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(clonedArray, pv->v, byteLength); + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return clonedArray; +} + +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { + UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; + upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); + utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + utrie2_close(toUTrie2.trie); + toUTrie2.trie=NULL; + } + return toUTrie2.trie; +} + +/* + * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts + * some 16-bit field and builds and returns a UTrie2. + */ + +U_CAPI void U_CALLCONV +upvec_compactToUTrie2Handler(void *context, + UChar32 start, UChar32 end, + int32_t rowIndex, uint32_t *row, int32_t columns, + UErrorCode *pErrorCode) { + (void)row; + (void)columns; + UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; + if(start<UPVEC_FIRST_SPECIAL_CP) { + utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); + } else { + switch(start) { + case UPVEC_INITIAL_VALUE_CP: + toUTrie2->initialValue=rowIndex; + break; + case UPVEC_ERROR_VALUE_CP: + toUTrie2->errorValue=rowIndex; + break; + case UPVEC_START_REAL_VALUES_CP: + toUTrie2->maxValue=rowIndex; + if(rowIndex>0xffff) { + /* too many rows for a 16-bit trie */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + toUTrie2->trie=utrie2_open(toUTrie2->initialValue, + toUTrie2->errorValue, pErrorCode); + } + break; + default: + break; + } + } +} |