aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/rbbirb.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:23 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:23 +0300
commit8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/common/rbbirb.cpp
parentd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
downloadydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/common/rbbirb.cpp')
-rw-r--r--contrib/libs/icu/common/rbbirb.cpp468
1 files changed, 234 insertions, 234 deletions
diff --git a/contrib/libs/icu/common/rbbirb.cpp b/contrib/libs/icu/common/rbbirb.cpp
index 68ded32e1d..8439583133 100644
--- a/contrib/libs/icu/common/rbbirb.cpp
+++ b/contrib/libs/icu/common/rbbirb.cpp
@@ -1,166 +1,166 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// file: rbbirb.cpp
-//
-// Copyright (C) 2002-2011, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains the RBBIRuleBuilder class implementation. This is the main class for
-// building (compiling) break rules into the tables required by the runtime
-// RBBI engine.
-//
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/rbbi.h"
-#include "unicode/ubrk.h"
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/uchriter.h"
-#include "unicode/parsepos.h"
-#include "unicode/parseerr.h"
-
-#include "cmemory.h"
-#include "cstring.h"
-#include "rbbirb.h"
-#include "rbbinode.h"
-#include "rbbiscan.h"
-#include "rbbisetb.h"
-#include "rbbitblb.h"
-#include "rbbidata.h"
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// file: rbbirb.cpp
+//
+// Copyright (C) 2002-2011, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains the RBBIRuleBuilder class implementation. This is the main class for
+// building (compiling) break rules into the tables required by the runtime
+// RBBI engine.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/rbbi.h"
+#include "unicode/ubrk.h"
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/uchriter.h"
+#include "unicode/parsepos.h"
+#include "unicode/parseerr.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "rbbirb.h"
+#include "rbbinode.h"
+#include "rbbiscan.h"
+#include "rbbisetb.h"
+#include "rbbitblb.h"
+#include "rbbidata.h"
#include "uassert.h"
-
-
-U_NAMESPACE_BEGIN
-
-
-//----------------------------------------------------------------------------------------
-//
-// Constructor.
-//
-//----------------------------------------------------------------------------------------
-RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
- UParseError *parseErr,
- UErrorCode &status)
+
+
+U_NAMESPACE_BEGIN
+
+
+//----------------------------------------------------------------------------------------
+//
+// Constructor.
+//
+//----------------------------------------------------------------------------------------
+RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
+ UParseError *parseErr,
+ UErrorCode &status)
: fRules(rules), fStrippedRules(rules)
-{
- fStatus = &status; // status is checked below
- fParseError = parseErr;
- fDebugEnv = NULL;
-#ifdef RBBI_DEBUG
- fDebugEnv = getenv("U_RBBIDEBUG");
-#endif
-
-
- fForwardTree = NULL;
- fReverseTree = NULL;
- fSafeFwdTree = NULL;
- fSafeRevTree = NULL;
- fDefaultTree = &fForwardTree;
+{
+ fStatus = &status; // status is checked below
+ fParseError = parseErr;
+ fDebugEnv = NULL;
+#ifdef RBBI_DEBUG
+ fDebugEnv = getenv("U_RBBIDEBUG");
+#endif
+
+
+ fForwardTree = NULL;
+ fReverseTree = NULL;
+ fSafeFwdTree = NULL;
+ fSafeRevTree = NULL;
+ fDefaultTree = &fForwardTree;
fForwardTable = NULL;
- fRuleStatusVals = NULL;
- fChainRules = FALSE;
- fLBCMNoChain = FALSE;
- fLookAheadHardBreak = FALSE;
- fUSetNodes = NULL;
- fRuleStatusVals = NULL;
- fScanner = NULL;
- fSetBuilder = NULL;
- if (parseErr) {
- uprv_memset(parseErr, 0, sizeof(UParseError));
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- fUSetNodes = new UVector(status); // bcos status gets overwritten here
- fRuleStatusVals = new UVector(status);
- fScanner = new RBBIRuleScanner(this);
- fSetBuilder = new RBBISetBuilder(this);
- if (U_FAILURE(status)) {
- return;
- }
- if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-
-
-//----------------------------------------------------------------------------------------
-//
-// Destructor
-//
-//----------------------------------------------------------------------------------------
-RBBIRuleBuilder::~RBBIRuleBuilder() {
-
- int i;
- for (i=0; ; i++) {
- RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
- if (n==NULL) {
- break;
- }
- delete n;
- }
-
- delete fUSetNodes;
- delete fSetBuilder;
+ fRuleStatusVals = NULL;
+ fChainRules = FALSE;
+ fLBCMNoChain = FALSE;
+ fLookAheadHardBreak = FALSE;
+ fUSetNodes = NULL;
+ fRuleStatusVals = NULL;
+ fScanner = NULL;
+ fSetBuilder = NULL;
+ if (parseErr) {
+ uprv_memset(parseErr, 0, sizeof(UParseError));
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ fUSetNodes = new UVector(status); // bcos status gets overwritten here
+ fRuleStatusVals = new UVector(status);
+ fScanner = new RBBIRuleScanner(this);
+ fSetBuilder = new RBBISetBuilder(this);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+
+//----------------------------------------------------------------------------------------
+//
+// Destructor
+//
+//----------------------------------------------------------------------------------------
+RBBIRuleBuilder::~RBBIRuleBuilder() {
+
+ int i;
+ for (i=0; ; i++) {
+ RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
+ if (n==NULL) {
+ break;
+ }
+ delete n;
+ }
+
+ delete fUSetNodes;
+ delete fSetBuilder;
delete fForwardTable;
- delete fForwardTree;
- delete fReverseTree;
- delete fSafeFwdTree;
- delete fSafeRevTree;
- delete fScanner;
- delete fRuleStatusVals;
-}
-
-
-
-
-
-//----------------------------------------------------------------------------------------
-//
-// flattenData() - Collect up the compiled RBBI rule data and put it into
-// the format for saving in ICU data files,
-// which is also the format needed by the RBBI runtime engine.
-//
-//----------------------------------------------------------------------------------------
-static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
-
-RBBIDataHeader *RBBIRuleBuilder::flattenData() {
- int32_t i;
-
- if (U_FAILURE(*fStatus)) {
- return NULL;
- }
-
+ delete fForwardTree;
+ delete fReverseTree;
+ delete fSafeFwdTree;
+ delete fSafeRevTree;
+ delete fScanner;
+ delete fRuleStatusVals;
+}
+
+
+
+
+
+//----------------------------------------------------------------------------------------
+//
+// flattenData() - Collect up the compiled RBBI rule data and put it into
+// the format for saving in ICU data files,
+// which is also the format needed by the RBBI runtime engine.
+//
+//----------------------------------------------------------------------------------------
+static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
+
+RBBIDataHeader *RBBIRuleBuilder::flattenData() {
+ int32_t i;
+
+ if (U_FAILURE(*fStatus)) {
+ return NULL;
+ }
+
// Remove whitespace from the rules to make it smaller.
// The rule parser has already removed comments.
fStrippedRules = fScanner->stripRules(fStrippedRules);
-
- // Calculate the size of each section in the data.
- // Sizes here are padded up to a multiple of 8 for better memory alignment.
- // Sections sizes actually stored in the header are for the actual data
- // without the padding.
- //
- int32_t headerSize = align8(sizeof(RBBIDataHeader));
+
+ // Calculate the size of each section in the data.
+ // Sizes here are padded up to a multiple of 8 for better memory alignment.
+ // Sections sizes actually stored in the header are for the actual data
+ // without the padding.
+ //
+ int32_t headerSize = align8(sizeof(RBBIDataHeader));
int32_t forwardTableSize = align8(fForwardTable->getTableSize());
int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
- int32_t trieSize = align8(fSetBuilder->getTrieSize());
- int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
+ int32_t trieSize = align8(fSetBuilder->getTrieSize());
+ int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
-
+
int32_t totalSize = headerSize
+ forwardTableSize
+ reverseTableSize
- + statusTableSize + trieSize + rulesSize;
-
+ + statusTableSize + trieSize + rulesSize;
+
#ifdef RBBI_DEBUG
if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) {
RBBIDebugPrintf("Header Size: %8d\n", headerSize);
@@ -174,79 +174,79 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
}
#endif
- RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
- if (data == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(data, 0, totalSize);
-
-
- data->fMagic = 0xb1a0;
+ RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
+ if (data == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(data, 0, totalSize);
+
+
+ data->fMagic = 0xb1a0;
data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
- data->fLength = totalSize;
- data->fCatCount = fSetBuilder->getNumCharCategories();
-
- data->fFTable = headerSize;
- data->fFTableLen = forwardTableSize;
+ data->fLength = totalSize;
+ data->fCatCount = fSetBuilder->getNumCharCategories();
+
+ data->fFTable = headerSize;
+ data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + data->fFTableLen;
- data->fRTableLen = reverseTableSize;
-
+ data->fRTableLen = reverseTableSize;
+
data->fTrie = data->fRTable + data->fRTableLen;
- data->fTrieLen = fSetBuilder->getTrieSize();
- data->fStatusTable = data->fTrie + trieSize;
- data->fStatusTableLen= statusTableSize;
- data->fRuleSource = data->fStatusTable + statusTableSize;
+ data->fTrieLen = fSetBuilder->getTrieSize();
+ data->fStatusTable = data->fTrie + trieSize;
+ data->fStatusTableLen= statusTableSize;
+ data->fRuleSource = data->fStatusTable + statusTableSize;
data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
-
- uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
-
+
+ uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
+
fForwardTable->exportTable((uint8_t *)data + data->fFTable);
fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
- fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
-
- int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
- for (i=0; i<fRuleStatusVals->size(); i++) {
- ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
- }
-
+ fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
+
+ int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
+ for (i=0; i<fRuleStatusVals->size(); i++) {
+ ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
+ }
+
fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
-
- return data;
-}
-
-
-//----------------------------------------------------------------------------------------
-//
-// createRuleBasedBreakIterator construct from source rules that are passed in
-// in a UnicodeString
-//
-//----------------------------------------------------------------------------------------
-BreakIterator *
-RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
- UParseError *parseError,
- UErrorCode &status)
-{
- //
- // Read the input rules, generate a parse tree, symbol table,
- // and list of all Unicode Sets referenced by the rules.
- //
- RBBIRuleBuilder builder(rules, parseError, status);
- if (U_FAILURE(status)) { // status checked here bcos build below doesn't
- return NULL;
- }
-
+
+ return data;
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+// createRuleBasedBreakIterator construct from source rules that are passed in
+// in a UnicodeString
+//
+//----------------------------------------------------------------------------------------
+BreakIterator *
+RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError *parseError,
+ UErrorCode &status)
+{
+ //
+ // Read the input rules, generate a parse tree, symbol table,
+ // and list of all Unicode Sets referenced by the rules.
+ //
+ RBBIRuleBuilder builder(rules, parseError, status);
+ if (U_FAILURE(status)) { // status checked here bcos build below doesn't
+ return NULL;
+ }
+
RBBIDataHeader *data = builder.build(status);
if (U_FAILURE(status)) {
return nullptr;
}
- //
+ //
// Create a break iterator from the compiled rules.
// (Identical to creation from stored pre-compiled rules)
//
@@ -273,53 +273,53 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
}
//
- // UnicodeSet processing.
- // Munge the Unicode Sets to create a set of character categories.
+ // UnicodeSet processing.
+ // Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input code points to
- // the character categories.
- //
+ // the character categories.
+ //
fSetBuilder->buildRanges();
-
- //
- // Generate the DFA state transition table.
- //
+
+ //
+ // Generate the DFA state transition table.
+ //
fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
if (fForwardTable == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
+ status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
- }
-
+ }
+
fForwardTable->buildForwardTable();
optimizeTables();
fForwardTable->buildSafeReverseTable(status);
+
-
-#ifdef RBBI_DEBUG
+#ifdef RBBI_DEBUG
if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
fForwardTable->printStates();
fForwardTable->printRuleStatusTable();
fForwardTable->printReverseTable();
- }
-#endif
-
+ }
+#endif
+
fSetBuilder->buildTrie();
- //
- // Package up the compiled data into a memory image
- // in the run-time format.
- //
+ //
+ // Package up the compiled data into a memory image
+ // in the run-time format.
+ //
RBBIDataHeader *data = flattenData(); // returns NULL if error
if (U_FAILURE(status)) {
return nullptr;
- }
+ }
return data;
}
-
+
void RBBIRuleBuilder::optimizeTables() {
bool didSomething;
do {
didSomething = false;
-
+
// Begin looking for duplicates with char class 3.
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
// and should not have other categories merged into them.
@@ -329,13 +329,13 @@ void RBBIRuleBuilder::optimizeTables() {
fForwardTable->removeColumn(duplPair.second);
didSomething = true;
}
-
+
while (fForwardTable->removeDuplicateStates() > 0) {
didSomething = true;
}
} while (didSomething);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */