aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/stringtriebuilder.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:33 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:33 +0300
commit1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
treeb2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/stringtriebuilder.cpp
parent8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
downloadydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/stringtriebuilder.cpp')
-rw-r--r--contrib/libs/icu/common/stringtriebuilder.cpp1230
1 files changed, 615 insertions, 615 deletions
diff --git a/contrib/libs/icu/common/stringtriebuilder.cpp b/contrib/libs/icu/common/stringtriebuilder.cpp
index 632bf528bf..6f9cc2e5c2 100644
--- a/contrib/libs/icu/common/stringtriebuilder.cpp
+++ b/contrib/libs/icu/common/stringtriebuilder.cpp
@@ -1,618 +1,618 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: stringtriebuilder.cpp
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: stringtriebuilder.cpp
* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010dec24
-* created by: Markus W. Scherer
-*/
-
-#include "utypeinfo.h" // for 'typeid' to work
-#include "unicode/utypes.h"
-#include "unicode/stringtriebuilder.h"
-#include "uassert.h"
-#include "uhash.h"
-
-U_CDECL_BEGIN
-
-static int32_t U_CALLCONV
-hashStringTrieNode(const UHashTok key) {
- return icu::StringTrieBuilder::hashNode(key.pointer);
-}
-
-static UBool U_CALLCONV
-equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
- return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
-
-StringTrieBuilder::~StringTrieBuilder() {
- deleteCompactBuilder();
-}
-
-void
-StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
- sizeGuess, &errorCode);
- if(U_SUCCESS(errorCode)) {
- if(nodes==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else {
- uhash_setKeyDeleter(nodes, uprv_deleteUObject);
- }
- }
-}
-
-void
-StringTrieBuilder::deleteCompactBuilder() {
- uhash_close(nodes);
- nodes=NULL;
-}
-
-void
-StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
- UErrorCode &errorCode) {
- if(buildOption==USTRINGTRIE_BUILD_FAST) {
- writeNode(0, elementsLength, 0);
- } else /* USTRINGTRIE_BUILD_SMALL */ {
- createCompactBuilder(2*elementsLength, errorCode);
- Node *root=makeNode(0, elementsLength, 0, errorCode);
- if(U_SUCCESS(errorCode)) {
- root->markRightEdgesFirst(-1);
- root->write(*this);
- }
- deleteCompactBuilder();
- }
-}
-
-// Requires start<limit,
-// and all strings of the [start..limit[ elements must be sorted and
-// have a common prefix of length unitIndex.
-int32_t
-StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
- UBool hasValue=FALSE;
- int32_t value=0;
- int32_t type;
- if(unitIndex==getElementStringLength(start)) {
- // An intermediate or final value.
- value=getElementValue(start++);
- if(start==limit) {
- return writeValueAndFinal(value, TRUE); // final-value node
- }
- hasValue=TRUE;
- }
- // Now all [start..limit[ strings are longer than unitIndex.
- int32_t minUnit=getElementUnit(start, unitIndex);
- int32_t maxUnit=getElementUnit(limit-1, unitIndex);
- if(minUnit==maxUnit) {
- // Linear-match node: All strings have the same character at unitIndex.
- int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
- writeNode(start, limit, lastUnitIndex);
- // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
- int32_t length=lastUnitIndex-unitIndex;
- int32_t maxLinearMatchLength=getMaxLinearMatchLength();
- while(length>maxLinearMatchLength) {
- lastUnitIndex-=maxLinearMatchLength;
- length-=maxLinearMatchLength;
- writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
- write(getMinLinearMatch()+maxLinearMatchLength-1);
- }
- writeElementUnits(start, unitIndex, length);
- type=getMinLinearMatch()+length-1;
- } else {
- // Branch node.
- int32_t length=countElementUnits(start, limit, unitIndex);
- // length>=2 because minUnit!=maxUnit.
- writeBranchSubNode(start, limit, unitIndex, length);
- if(--length<getMinLinearMatch()) {
- type=length;
- } else {
- write(length);
- type=0;
- }
- }
- return writeValueAndType(hasValue, value, type);
-}
-
-// start<limit && all strings longer than unitIndex &&
-// length different units at unitIndex
-int32_t
-StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
- UChar middleUnits[kMaxSplitBranchLevels];
- int32_t lessThan[kMaxSplitBranchLevels];
- int32_t ltLength=0;
- while(length>getMaxBranchLinearSubNodeLength()) {
- // Branch on the middle unit.
- // First, find the middle unit.
- int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
- // Encode the less-than branch first.
- middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
- lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
- ++ltLength;
- // Continue for the greater-or-equal branch.
- start=i;
- length=length-length/2;
- }
- // For each unit, find its elements array start and whether it has a final value.
- int32_t starts[kMaxBranchLinearSubNodeLength];
- UBool isFinal[kMaxBranchLinearSubNodeLength-1];
- int32_t unitNumber=0;
- do {
- int32_t i=starts[unitNumber]=start;
- UChar unit=getElementUnit(i++, unitIndex);
- i=indexOfElementWithNextUnit(i, unitIndex, unit);
- isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
- start=i;
- } while(++unitNumber<length-1);
- // unitNumber==length-1, and the maxUnit elements range is [start..limit[
- starts[unitNumber]=start;
-
- // Write the sub-nodes in reverse order: The jump lengths are deltas from
- // after their own positions, so if we wrote the minUnit sub-node first,
- // then its jump delta would be larger.
- // Instead we write the minUnit sub-node last, for a shorter delta.
- int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
- do {
- --unitNumber;
- if(!isFinal[unitNumber]) {
- jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
- }
- } while(unitNumber>0);
- // The maxUnit sub-node is written as the very last one because we do
- // not jump for it at all.
- unitNumber=length-1;
- writeNode(start, limit, unitIndex+1);
- int32_t offset=write(getElementUnit(start, unitIndex));
- // Write the rest of this node's unit-value pairs.
- while(--unitNumber>=0) {
- start=starts[unitNumber];
- int32_t value;
- if(isFinal[unitNumber]) {
- // Write the final value for the one string ending with this unit.
- value=getElementValue(start);
- } else {
- // Write the delta to the start position of the sub-node.
- value=offset-jumpTargets[unitNumber];
- }
- writeValueAndFinal(value, isFinal[unitNumber]);
- offset=write(getElementUnit(start, unitIndex));
- }
- // Write the split-branch nodes.
- while(ltLength>0) {
- --ltLength;
- writeDeltaTo(lessThan[ltLength]);
- offset=write(middleUnits[ltLength]);
- }
- return offset;
-}
-
-// Requires start<limit,
-// and all strings of the [start..limit[ elements must be sorted and
-// have a common prefix of length unitIndex.
-StringTrieBuilder::Node *
-StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- UBool hasValue=FALSE;
- int32_t value=0;
- if(unitIndex==getElementStringLength(start)) {
- // An intermediate or final value.
- value=getElementValue(start++);
- if(start==limit) {
- return registerFinalValue(value, errorCode);
- }
- hasValue=TRUE;
- }
- Node *node;
- // Now all [start..limit[ strings are longer than unitIndex.
- int32_t minUnit=getElementUnit(start, unitIndex);
- int32_t maxUnit=getElementUnit(limit-1, unitIndex);
- if(minUnit==maxUnit) {
- // Linear-match node: All strings have the same character at unitIndex.
- int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
- Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
- // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
- int32_t length=lastUnitIndex-unitIndex;
- int32_t maxLinearMatchLength=getMaxLinearMatchLength();
- while(length>maxLinearMatchLength) {
- lastUnitIndex-=maxLinearMatchLength;
- length-=maxLinearMatchLength;
- node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
- nextNode=registerNode(node, errorCode);
- }
- node=createLinearMatchNode(start, unitIndex, length, nextNode);
- } else {
- // Branch node.
- int32_t length=countElementUnits(start, limit, unitIndex);
- // length>=2 because minUnit!=maxUnit.
- Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
- node=new BranchHeadNode(length, subNode);
- }
- if(hasValue && node!=NULL) {
- if(matchNodesCanHaveValues()) {
- ((ValueNode *)node)->setValue(value);
- } else {
- node=new IntermediateValueNode(value, registerNode(node, errorCode));
- }
- }
- return registerNode(node, errorCode);
-}
-
-// start<limit && all strings longer than unitIndex &&
-// length different units at unitIndex
-StringTrieBuilder::Node *
-StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
- int32_t length, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- UChar middleUnits[kMaxSplitBranchLevels];
- Node *lessThan[kMaxSplitBranchLevels];
- int32_t ltLength=0;
- while(length>getMaxBranchLinearSubNodeLength()) {
- // Branch on the middle unit.
- // First, find the middle unit.
- int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
- // Create the less-than branch.
- middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
- lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
- ++ltLength;
- // Continue for the greater-or-equal branch.
- start=i;
- length=length-length/2;
- }
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- ListBranchNode *listNode=new ListBranchNode();
- if(listNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- // For each unit, find its elements array start and whether it has a final value.
- int32_t unitNumber=0;
- do {
- int32_t i=start;
- UChar unit=getElementUnit(i++, unitIndex);
- i=indexOfElementWithNextUnit(i, unitIndex, unit);
- if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
- listNode->add(unit, getElementValue(start));
- } else {
- listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
- }
- start=i;
- } while(++unitNumber<length-1);
- // unitNumber==length-1, and the maxUnit elements range is [start..limit[
- UChar unit=getElementUnit(start, unitIndex);
- if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
- listNode->add(unit, getElementValue(start));
- } else {
- listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
- }
- Node *node=registerNode(listNode, errorCode);
- // Create the split-branch nodes.
- while(ltLength>0) {
- --ltLength;
- node=registerNode(
- new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
- }
- return node;
-}
-
-StringTrieBuilder::Node *
-StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- if(newNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- const UHashElement *old=uhash_find(nodes, newNode);
- if(old!=NULL) {
- delete newNode;
- return (Node *)old->key.pointer;
- }
- // If uhash_puti() returns a non-zero value from an equivalent, previously
- // registered node, then uhash_find() failed to find that and we will leak newNode.
-#if U_DEBUG
- int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
-#endif
- uhash_puti(nodes, newNode, 1, &errorCode);
- U_ASSERT(oldValue==0);
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- return newNode;
-}
-
-StringTrieBuilder::Node *
-StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- FinalValueNode key(value);
- const UHashElement *old=uhash_find(nodes, &key);
- if(old!=NULL) {
- return (Node *)old->key.pointer;
- }
- Node *newNode=new FinalValueNode(value);
- if(newNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- // If uhash_puti() returns a non-zero value from an equivalent, previously
- // registered node, then uhash_find() failed to find that and we will leak newNode.
-#if U_DEBUG
- int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
-#endif
- uhash_puti(nodes, newNode, 1, &errorCode);
- U_ASSERT(oldValue==0);
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- return newNode;
-}
-
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec24
+* created by: Markus W. Scherer
+*/
+
+#include "utypeinfo.h" // for 'typeid' to work
+#include "unicode/utypes.h"
+#include "unicode/stringtriebuilder.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+hashStringTrieNode(const UHashTok key) {
+ return icu::StringTrieBuilder::hashNode(key.pointer);
+}
+
+static UBool U_CALLCONV
+equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
+ return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
+
+StringTrieBuilder::~StringTrieBuilder() {
+ deleteCompactBuilder();
+}
+
+void
+StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
+ sizeGuess, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ if(nodes==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uhash_setKeyDeleter(nodes, uprv_deleteUObject);
+ }
+ }
+}
+
+void
+StringTrieBuilder::deleteCompactBuilder() {
+ uhash_close(nodes);
+ nodes=NULL;
+}
+
+void
+StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
+ UErrorCode &errorCode) {
+ if(buildOption==USTRINGTRIE_BUILD_FAST) {
+ writeNode(0, elementsLength, 0);
+ } else /* USTRINGTRIE_BUILD_SMALL */ {
+ createCompactBuilder(2*elementsLength, errorCode);
+ Node *root=makeNode(0, elementsLength, 0, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ root->markRightEdgesFirst(-1);
+ root->write(*this);
+ }
+ deleteCompactBuilder();
+ }
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
int32_t
-StringTrieBuilder::hashNode(const void *node) {
- return ((const Node *)node)->hashCode();
-}
-
-UBool
-StringTrieBuilder::equalNodes(const void *left, const void *right) {
- return *(const Node *)left==*(const Node *)right;
-}
-
-UBool
-StringTrieBuilder::Node::operator==(const Node &other) const {
- return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
-}
-
-int32_t
-StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber;
- }
- return edgeNumber;
-}
-
-UBool
-StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const FinalValueNode &o=(const FinalValueNode &)other;
- return value==o.value;
-}
-
-void
-StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
- offset=builder.writeValueAndFinal(value, TRUE);
-}
-
-UBool
-StringTrieBuilder::ValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const ValueNode &o=(const ValueNode &)other;
- return hasValue==o.hasValue && (!hasValue || value==o.value);
-}
-
-UBool
-StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const IntermediateValueNode &o=(const IntermediateValueNode &)other;
- return next==o.next;
-}
-
-int32_t
-StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
- next->write(builder);
- offset=builder.writeValueAndFinal(value, FALSE);
-}
-
-UBool
-StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const LinearMatchNode &o=(const LinearMatchNode &)other;
- return length==o.length && next==o.next;
-}
-
-int32_t
-StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-UBool
-StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const ListBranchNode &o=(const ListBranchNode &)other;
- for(int32_t i=0; i<length; ++i) {
- if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-int32_t
-StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- firstEdgeNumber=edgeNumber;
- int32_t step=0;
- int32_t i=length;
- do {
- Node *edge=equal[--i];
- if(edge!=NULL) {
- edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
- }
- // For all but the rightmost edge, decrement the edge number.
- step=1;
- } while(i>0);
- offset=edgeNumber;
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
- // Write the sub-nodes in reverse order: The jump lengths are deltas from
- // after their own positions, so if we wrote the minUnit sub-node first,
- // then its jump delta would be larger.
- // Instead we write the minUnit sub-node last, for a shorter delta.
- int32_t unitNumber=length-1;
- Node *rightEdge=equal[unitNumber];
- int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
- do {
- --unitNumber;
- if(equal[unitNumber]!=NULL) {
- equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
- }
- } while(unitNumber>0);
- // The maxUnit sub-node is written as the very last one because we do
- // not jump for it at all.
- unitNumber=length-1;
- if(rightEdge==NULL) {
- builder.writeValueAndFinal(values[unitNumber], TRUE);
- } else {
- rightEdge->write(builder);
- }
- offset=builder.write(units[unitNumber]);
- // Write the rest of this node's unit-value pairs.
- while(--unitNumber>=0) {
- int32_t value;
- UBool isFinal;
- if(equal[unitNumber]==NULL) {
- // Write the final value for the one string ending with this unit.
- value=values[unitNumber];
- isFinal=TRUE;
- } else {
- // Write the delta to the start position of the sub-node.
- U_ASSERT(equal[unitNumber]->getOffset()>0);
- value=offset-equal[unitNumber]->getOffset();
- isFinal=FALSE;
- }
- builder.writeValueAndFinal(value, isFinal);
- offset=builder.write(units[unitNumber]);
- }
-}
-
-UBool
-StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const SplitBranchNode &o=(const SplitBranchNode &)other;
- return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
-}
-
-int32_t
-StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- firstEdgeNumber=edgeNumber;
- edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
- offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
- // Encode the less-than branch first.
- lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
- // Encode the greater-or-equal branch last because we do not jump for it at all.
- greaterOrEqual->write(builder);
- // Write this node.
- U_ASSERT(lessThan->getOffset()>0);
- builder.writeDeltaTo(lessThan->getOffset()); // less-than
- offset=builder.write(unit);
-}
-
-UBool
-StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const BranchHeadNode &o=(const BranchHeadNode &)other;
- return length==o.length && next==o.next;
-}
-
-int32_t
-StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
- next->write(builder);
- if(length<=builder.getMinLinearMatch()) {
- offset=builder.writeValueAndType(hasValue, value, length-1);
- } else {
- builder.write(length-1);
- offset=builder.writeValueAndType(hasValue, value, 0);
- }
-}
-
-U_NAMESPACE_END
+StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
+ UBool hasValue=FALSE;
+ int32_t value=0;
+ int32_t type;
+ if(unitIndex==getElementStringLength(start)) {
+ // An intermediate or final value.
+ value=getElementValue(start++);
+ if(start==limit) {
+ return writeValueAndFinal(value, TRUE); // final-value node
+ }
+ hasValue=TRUE;
+ }
+ // Now all [start..limit[ strings are longer than unitIndex.
+ int32_t minUnit=getElementUnit(start, unitIndex);
+ int32_t maxUnit=getElementUnit(limit-1, unitIndex);
+ if(minUnit==maxUnit) {
+ // Linear-match node: All strings have the same character at unitIndex.
+ int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
+ writeNode(start, limit, lastUnitIndex);
+ // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+ int32_t length=lastUnitIndex-unitIndex;
+ int32_t maxLinearMatchLength=getMaxLinearMatchLength();
+ while(length>maxLinearMatchLength) {
+ lastUnitIndex-=maxLinearMatchLength;
+ length-=maxLinearMatchLength;
+ writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
+ write(getMinLinearMatch()+maxLinearMatchLength-1);
+ }
+ writeElementUnits(start, unitIndex, length);
+ type=getMinLinearMatch()+length-1;
+ } else {
+ // Branch node.
+ int32_t length=countElementUnits(start, limit, unitIndex);
+ // length>=2 because minUnit!=maxUnit.
+ writeBranchSubNode(start, limit, unitIndex, length);
+ if(--length<getMinLinearMatch()) {
+ type=length;
+ } else {
+ write(length);
+ type=0;
+ }
+ }
+ return writeValueAndType(hasValue, value, type);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+int32_t
+StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
+ UChar middleUnits[kMaxSplitBranchLevels];
+ int32_t lessThan[kMaxSplitBranchLevels];
+ int32_t ltLength=0;
+ while(length>getMaxBranchLinearSubNodeLength()) {
+ // Branch on the middle unit.
+ // First, find the middle unit.
+ int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
+ // Encode the less-than branch first.
+ middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
+ lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
+ ++ltLength;
+ // Continue for the greater-or-equal branch.
+ start=i;
+ length=length-length/2;
+ }
+ // For each unit, find its elements array start and whether it has a final value.
+ int32_t starts[kMaxBranchLinearSubNodeLength];
+ UBool isFinal[kMaxBranchLinearSubNodeLength-1];
+ int32_t unitNumber=0;
+ do {
+ int32_t i=starts[unitNumber]=start;
+ UChar unit=getElementUnit(i++, unitIndex);
+ i=indexOfElementWithNextUnit(i, unitIndex, unit);
+ isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
+ start=i;
+ } while(++unitNumber<length-1);
+ // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+ starts[unitNumber]=start;
+
+ // Write the sub-nodes in reverse order: The jump lengths are deltas from
+ // after their own positions, so if we wrote the minUnit sub-node first,
+ // then its jump delta would be larger.
+ // Instead we write the minUnit sub-node last, for a shorter delta.
+ int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
+ do {
+ --unitNumber;
+ if(!isFinal[unitNumber]) {
+ jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
+ }
+ } while(unitNumber>0);
+ // The maxUnit sub-node is written as the very last one because we do
+ // not jump for it at all.
+ unitNumber=length-1;
+ writeNode(start, limit, unitIndex+1);
+ int32_t offset=write(getElementUnit(start, unitIndex));
+ // Write the rest of this node's unit-value pairs.
+ while(--unitNumber>=0) {
+ start=starts[unitNumber];
+ int32_t value;
+ if(isFinal[unitNumber]) {
+ // Write the final value for the one string ending with this unit.
+ value=getElementValue(start);
+ } else {
+ // Write the delta to the start position of the sub-node.
+ value=offset-jumpTargets[unitNumber];
+ }
+ writeValueAndFinal(value, isFinal[unitNumber]);
+ offset=write(getElementUnit(start, unitIndex));
+ }
+ // Write the split-branch nodes.
+ while(ltLength>0) {
+ --ltLength;
+ writeDeltaTo(lessThan[ltLength]);
+ offset=write(middleUnits[ltLength]);
+ }
+ return offset;
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
+StringTrieBuilder::Node *
+StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ UBool hasValue=FALSE;
+ int32_t value=0;
+ if(unitIndex==getElementStringLength(start)) {
+ // An intermediate or final value.
+ value=getElementValue(start++);
+ if(start==limit) {
+ return registerFinalValue(value, errorCode);
+ }
+ hasValue=TRUE;
+ }
+ Node *node;
+ // Now all [start..limit[ strings are longer than unitIndex.
+ int32_t minUnit=getElementUnit(start, unitIndex);
+ int32_t maxUnit=getElementUnit(limit-1, unitIndex);
+ if(minUnit==maxUnit) {
+ // Linear-match node: All strings have the same character at unitIndex.
+ int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
+ Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
+ // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+ int32_t length=lastUnitIndex-unitIndex;
+ int32_t maxLinearMatchLength=getMaxLinearMatchLength();
+ while(length>maxLinearMatchLength) {
+ lastUnitIndex-=maxLinearMatchLength;
+ length-=maxLinearMatchLength;
+ node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
+ nextNode=registerNode(node, errorCode);
+ }
+ node=createLinearMatchNode(start, unitIndex, length, nextNode);
+ } else {
+ // Branch node.
+ int32_t length=countElementUnits(start, limit, unitIndex);
+ // length>=2 because minUnit!=maxUnit.
+ Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
+ node=new BranchHeadNode(length, subNode);
+ }
+ if(hasValue && node!=NULL) {
+ if(matchNodesCanHaveValues()) {
+ ((ValueNode *)node)->setValue(value);
+ } else {
+ node=new IntermediateValueNode(value, registerNode(node, errorCode));
+ }
+ }
+ return registerNode(node, errorCode);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+StringTrieBuilder::Node *
+StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+ int32_t length, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ UChar middleUnits[kMaxSplitBranchLevels];
+ Node *lessThan[kMaxSplitBranchLevels];
+ int32_t ltLength=0;
+ while(length>getMaxBranchLinearSubNodeLength()) {
+ // Branch on the middle unit.
+ // First, find the middle unit.
+ int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
+ // Create the less-than branch.
+ middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
+ lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
+ ++ltLength;
+ // Continue for the greater-or-equal branch.
+ start=i;
+ length=length-length/2;
+ }
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ ListBranchNode *listNode=new ListBranchNode();
+ if(listNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ // For each unit, find its elements array start and whether it has a final value.
+ int32_t unitNumber=0;
+ do {
+ int32_t i=start;
+ UChar unit=getElementUnit(i++, unitIndex);
+ i=indexOfElementWithNextUnit(i, unitIndex, unit);
+ if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
+ listNode->add(unit, getElementValue(start));
+ } else {
+ listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
+ }
+ start=i;
+ } while(++unitNumber<length-1);
+ // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+ UChar unit=getElementUnit(start, unitIndex);
+ if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
+ listNode->add(unit, getElementValue(start));
+ } else {
+ listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
+ }
+ Node *node=registerNode(listNode, errorCode);
+ // Create the split-branch nodes.
+ while(ltLength>0) {
+ --ltLength;
+ node=registerNode(
+ new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
+ }
+ return node;
+}
+
+StringTrieBuilder::Node *
+StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ if(newNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ const UHashElement *old=uhash_find(nodes, newNode);
+ if(old!=NULL) {
+ delete newNode;
+ return (Node *)old->key.pointer;
+ }
+ // If uhash_puti() returns a non-zero value from an equivalent, previously
+ // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if U_DEBUG
+ int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+ uhash_puti(nodes, newNode, 1, &errorCode);
+ U_ASSERT(oldValue==0);
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ return newNode;
+}
+
+StringTrieBuilder::Node *
+StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ FinalValueNode key(value);
+ const UHashElement *old=uhash_find(nodes, &key);
+ if(old!=NULL) {
+ return (Node *)old->key.pointer;
+ }
+ Node *newNode=new FinalValueNode(value);
+ if(newNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ // If uhash_puti() returns a non-zero value from an equivalent, previously
+ // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if U_DEBUG
+ int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+ uhash_puti(nodes, newNode, 1, &errorCode);
+ U_ASSERT(oldValue==0);
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ return newNode;
+}
+
+int32_t
+StringTrieBuilder::hashNode(const void *node) {
+ return ((const Node *)node)->hashCode();
+}
+
+UBool
+StringTrieBuilder::equalNodes(const void *left, const void *right) {
+ return *(const Node *)left==*(const Node *)right;
+}
+
+UBool
+StringTrieBuilder::Node::operator==(const Node &other) const {
+ return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
+}
+
+int32_t
+StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber;
+ }
+ return edgeNumber;
+}
+
+UBool
+StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const FinalValueNode &o=(const FinalValueNode &)other;
+ return value==o.value;
+}
+
+void
+StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
+ offset=builder.writeValueAndFinal(value, TRUE);
+}
+
+UBool
+StringTrieBuilder::ValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const ValueNode &o=(const ValueNode &)other;
+ return hasValue==o.hasValue && (!hasValue || value==o.value);
+}
+
+UBool
+StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const IntermediateValueNode &o=(const IntermediateValueNode &)other;
+ return next==o.next;
+}
+
+int32_t
+StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
+ next->write(builder);
+ offset=builder.writeValueAndFinal(value, FALSE);
+}
+
+UBool
+StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const LinearMatchNode &o=(const LinearMatchNode &)other;
+ return length==o.length && next==o.next;
+}
+
+int32_t
+StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+UBool
+StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const ListBranchNode &o=(const ListBranchNode &)other;
+ for(int32_t i=0; i<length; ++i) {
+ if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+int32_t
+StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ firstEdgeNumber=edgeNumber;
+ int32_t step=0;
+ int32_t i=length;
+ do {
+ Node *edge=equal[--i];
+ if(edge!=NULL) {
+ edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
+ }
+ // For all but the rightmost edge, decrement the edge number.
+ step=1;
+ } while(i>0);
+ offset=edgeNumber;
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
+ // Write the sub-nodes in reverse order: The jump lengths are deltas from
+ // after their own positions, so if we wrote the minUnit sub-node first,
+ // then its jump delta would be larger.
+ // Instead we write the minUnit sub-node last, for a shorter delta.
+ int32_t unitNumber=length-1;
+ Node *rightEdge=equal[unitNumber];
+ int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
+ do {
+ --unitNumber;
+ if(equal[unitNumber]!=NULL) {
+ equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
+ }
+ } while(unitNumber>0);
+ // The maxUnit sub-node is written as the very last one because we do
+ // not jump for it at all.
+ unitNumber=length-1;
+ if(rightEdge==NULL) {
+ builder.writeValueAndFinal(values[unitNumber], TRUE);
+ } else {
+ rightEdge->write(builder);
+ }
+ offset=builder.write(units[unitNumber]);
+ // Write the rest of this node's unit-value pairs.
+ while(--unitNumber>=0) {
+ int32_t value;
+ UBool isFinal;
+ if(equal[unitNumber]==NULL) {
+ // Write the final value for the one string ending with this unit.
+ value=values[unitNumber];
+ isFinal=TRUE;
+ } else {
+ // Write the delta to the start position of the sub-node.
+ U_ASSERT(equal[unitNumber]->getOffset()>0);
+ value=offset-equal[unitNumber]->getOffset();
+ isFinal=FALSE;
+ }
+ builder.writeValueAndFinal(value, isFinal);
+ offset=builder.write(units[unitNumber]);
+ }
+}
+
+UBool
+StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const SplitBranchNode &o=(const SplitBranchNode &)other;
+ return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
+}
+
+int32_t
+StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ firstEdgeNumber=edgeNumber;
+ edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
+ offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
+ // Encode the less-than branch first.
+ lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
+ // Encode the greater-or-equal branch last because we do not jump for it at all.
+ greaterOrEqual->write(builder);
+ // Write this node.
+ U_ASSERT(lessThan->getOffset()>0);
+ builder.writeDeltaTo(lessThan->getOffset()); // less-than
+ offset=builder.write(unit);
+}
+
+UBool
+StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const BranchHeadNode &o=(const BranchHeadNode &)other;
+ return length==o.length && next==o.next;
+}
+
+int32_t
+StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
+ next->write(builder);
+ if(length<=builder.getMinLinearMatch()) {
+ offset=builder.writeValueAndType(hasValue, value, length-1);
+ } else {
+ builder.write(length-1);
+ offset=builder.writeValueAndType(hasValue, value, 0);
+ }
+}
+
+U_NAMESPACE_END