Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:23 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:23 +0300
commit: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree: 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/common/uniset.cpp
parent: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download: ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
1 files changed, 2103 insertions, 2103 deletions
diff --git a/contrib/libs/icu/common/uniset.cpp b/contrib/libs/icu/common/uniset.cpp
index b73d612f24..405f734411 100644
--- a/contrib/libs/icu/common/uniset.cpp
+++ b/contrib/libs/icu/common/uniset.cpp
@@ -1,140 +1,140 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-*   Copyright (C) 1999-2015, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   10/20/99    alan        Creation.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/parsepos.h"
-#include "unicode/symtable.h"
-#include "unicode/uniset.h"
+// License & terms of use: http://www.unicode.org/copyright.html 
+/* 
+********************************************************************** 
+*   Copyright (C) 1999-2015, International Business Machines 
+*   Corporation and others.  All Rights Reserved. 
+********************************************************************** 
+*   Date        Name        Description 
+*   10/20/99    alan        Creation. 
+********************************************************************** 
+*/ 
+ 
+#include "unicode/utypes.h" 
+#include "unicode/parsepos.h" 
+#include "unicode/symtable.h" 
+#include "unicode/uniset.h" 
 #include "unicode/ustring.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "ruleiter.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "patternprops.h"
-#include "uelement.h"
-#include "util.h"
-#include "uvector.h"
-#include "charstr.h"
-#include "ustrfmt.h"
-#include "uassert.h"
-#include "bmpset.h"
-#include "unisetspan.h"
-
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN        ((UChar)0x005B) /*[*/
-#define SET_CLOSE       ((UChar)0x005D) /*]*/
-#define HYPHEN          ((UChar)0x002D) /*-*/
-#define COMPLEMENT      ((UChar)0x005E) /*^*/
-#define COLON           ((UChar)0x003A) /*:*/
-#define BACKSLASH       ((UChar)0x005C) /*\*/
-#define INTERSECTION    ((UChar)0x0026) /*&*/
-#define UPPER_U         ((UChar)0x0055) /*U*/
-#define LOWER_U         ((UChar)0x0075) /*u*/
-#define OPEN_BRACE      ((UChar)123)    /*{*/
-#define CLOSE_BRACE     ((UChar)125)    /*}*/
-#define UPPER_P         ((UChar)0x0050) /*P*/
-#define LOWER_P         ((UChar)0x0070) /*p*/
-#define UPPER_N         ((UChar)78)     /*N*/
-#define EQUALS          ((UChar)0x003D) /*=*/
-
-// HIGH_VALUE > all valid values. 110000 for codepoints
-#define UNICODESET_HIGH 0x0110000
-
-// LOW <= all valid values. ZERO for codepoints
-#define UNICODESET_LOW 0x000000
-
+#include "unicode/utf8.h" 
+#include "unicode/utf16.h" 
+#include "ruleiter.h" 
+#include "cmemory.h" 
+#include "cstring.h" 
+#include "patternprops.h" 
+#include "uelement.h" 
+#include "util.h" 
+#include "uvector.h" 
+#include "charstr.h" 
+#include "ustrfmt.h" 
+#include "uassert.h" 
+#include "bmpset.h" 
+#include "unisetspan.h" 
+ 
+// Define UChar constants using hex for EBCDIC compatibility 
+// Used #define to reduce private static exports and memory access time. 
+#define SET_OPEN        ((UChar)0x005B) /*[*/ 
+#define SET_CLOSE       ((UChar)0x005D) /*]*/ 
+#define HYPHEN          ((UChar)0x002D) /*-*/ 
+#define COMPLEMENT      ((UChar)0x005E) /*^*/ 
+#define COLON           ((UChar)0x003A) /*:*/ 
+#define BACKSLASH       ((UChar)0x005C) /*\*/ 
+#define INTERSECTION    ((UChar)0x0026) /*&*/ 
+#define UPPER_U         ((UChar)0x0055) /*U*/ 
+#define LOWER_U         ((UChar)0x0075) /*u*/ 
+#define OPEN_BRACE      ((UChar)123)    /*{*/ 
+#define CLOSE_BRACE     ((UChar)125)    /*}*/ 
+#define UPPER_P         ((UChar)0x0050) /*P*/ 
+#define LOWER_P         ((UChar)0x0070) /*p*/ 
+#define UPPER_N         ((UChar)78)     /*N*/ 
+#define EQUALS          ((UChar)0x003D) /*=*/ 
+ 
+// HIGH_VALUE > all valid values. 110000 for codepoints 
+#define UNICODESET_HIGH 0x0110000 
+ 
+// LOW <= all valid values. ZERO for codepoints 
+#define UNICODESET_LOW 0x000000 
+ 
 /** Max list [0, 1, 2, ..., max code point, HIGH] */
 constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1;
-
-U_NAMESPACE_BEGIN
-
-SymbolTable::~SymbolTable() {}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)
-
-/**
- * Modify the given UChar32 variable so that it is in range, by
- * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
- * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
- * It modifies its argument in-place and also returns it.
- */
-static inline UChar32 pinCodePoint(UChar32& c) {
-    if (c < UNICODESET_LOW) {
-        c = UNICODESET_LOW;
-    } else if (c > (UNICODESET_HIGH-1)) {
-        c = (UNICODESET_HIGH-1);
-    }
-    return c;
-}
-
-//----------------------------------------------------------------
-// Debugging
-//----------------------------------------------------------------
-
-// DO NOT DELETE THIS CODE.  This code is used to debug memory leaks.
-// To enable the debugging, define the symbol DEBUG_MEM in the line
-// below.  This will result in text being sent to stdout that looks
-// like this:
-//   DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
-//   DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
-// Each line lists a construction (ct) or destruction (dt) event, the
-// object address, the number of outstanding objects after the event,
-// and the pattern of the object in question.
-
-// #define DEBUG_MEM
-
-#ifdef DEBUG_MEM
-#include <stdio.h>
-static int32_t _dbgCount = 0;
-
-static inline void _dbgct(UnicodeSet* set) {
-    UnicodeString str;
-    set->toPattern(str, TRUE);
-    char buf[40];
-    str.extract(0, 39, buf, "");
-    printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
-}
-
-static inline void _dbgdt(UnicodeSet* set) {
-    UnicodeString str;
-    set->toPattern(str, TRUE);
-    char buf[40];
-    str.extract(0, 39, buf, "");
-    printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
-}
-
-#else
-
-#define _dbgct(set)
-#define _dbgdt(set)
-
-#endif
-
-//----------------------------------------------------------------
-// UnicodeString in UVector support
-//----------------------------------------------------------------
-
-static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) {
-    dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
-}
-
-static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
-    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
-    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
-    return a.compare(b);
-}
-
+ 
+U_NAMESPACE_BEGIN 
+ 
+SymbolTable::~SymbolTable() {} 
+ 
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) 
+ 
+/** 
+ * Modify the given UChar32 variable so that it is in range, by 
+ * pinning values < UNICODESET_LOW to UNICODESET_LOW, and 
+ * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. 
+ * It modifies its argument in-place and also returns it. 
+ */ 
+static inline UChar32 pinCodePoint(UChar32& c) { 
+    if (c < UNICODESET_LOW) { 
+        c = UNICODESET_LOW; 
+    } else if (c > (UNICODESET_HIGH-1)) { 
+        c = (UNICODESET_HIGH-1); 
+    } 
+    return c; 
+} 
+ 
+//---------------------------------------------------------------- 
+// Debugging 
+//---------------------------------------------------------------- 
+ 
+// DO NOT DELETE THIS CODE.  This code is used to debug memory leaks. 
+// To enable the debugging, define the symbol DEBUG_MEM in the line 
+// below.  This will result in text being sent to stdout that looks 
+// like this: 
+//   DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- 
+//   DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- 
+// Each line lists a construction (ct) or destruction (dt) event, the 
+// object address, the number of outstanding objects after the event, 
+// and the pattern of the object in question. 
+ 
+// #define DEBUG_MEM 
+ 
+#ifdef DEBUG_MEM 
+#include <stdio.h> 
+static int32_t _dbgCount = 0; 
+ 
+static inline void _dbgct(UnicodeSet* set) { 
+    UnicodeString str; 
+    set->toPattern(str, TRUE); 
+    char buf[40]; 
+    str.extract(0, 39, buf, ""); 
+    printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); 
+} 
+ 
+static inline void _dbgdt(UnicodeSet* set) { 
+    UnicodeString str; 
+    set->toPattern(str, TRUE); 
+    char buf[40]; 
+    str.extract(0, 39, buf, ""); 
+    printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); 
+} 
+ 
+#else 
+ 
+#define _dbgct(set) 
+#define _dbgdt(set) 
+ 
+#endif 
+ 
+//---------------------------------------------------------------- 
+// UnicodeString in UVector support 
+//---------------------------------------------------------------- 
+ 
+static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) { 
+    dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); 
+} 
+ 
+static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { 
+    const UnicodeString &a = *(const UnicodeString*)t1.pointer; 
+    const UnicodeString &b = *(const UnicodeString*)t2.pointer; 
+    return a.compare(b); 
+} 
+ 
 UBool UnicodeSet::hasStrings() const {
     return strings != nullptr && !strings->isEmpty();
 }
@@ -147,45 +147,45 @@ UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
     return strings != nullptr && strings->contains((void*) &s);
 }
 
-//----------------------------------------------------------------
-// Constructors &c
-//----------------------------------------------------------------
-
-/**
- * Constructs an empty set.
- */
+//---------------------------------------------------------------- 
+// Constructors &c 
+//---------------------------------------------------------------- 
+ 
+/** 
+ * Constructs an empty set. 
+ */ 
 UnicodeSet::UnicodeSet() {
     list[0] = UNICODESET_HIGH;
-    _dbgct(this);
-}
-
-/**
- * Constructs a set containing the given range. If <code>end >
- * start</code> then an empty set is created.
- *
- * @param start first character, inclusive, of range
- * @param end last character, inclusive, of range
- */
+    _dbgct(this); 
+} 
+ 
+/** 
+ * Constructs a set containing the given range. If <code>end > 
+ * start</code> then an empty set is created. 
+ * 
+ * @param start first character, inclusive, of range 
+ * @param end last character, inclusive, of range 
+ */ 
 UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
     list[0] = UNICODESET_HIGH;
     add(start, end);
-    _dbgct(this);
-}
-
-/**
- * Constructs a set that is identical to the given UnicodeSet.
- */
+    _dbgct(this); 
+} 
+ 
+/** 
+ * Constructs a set that is identical to the given UnicodeSet. 
+ */ 
 UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
     *this = o;
-    _dbgct(this);
-}
-
-// Copy-construct as thawed.
+    _dbgct(this); 
+} 
+ 
+// Copy-construct as thawed. 
 UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
     if (ensureCapacity(o.len)) {
-        // *this = o except for bmpSet and stringSpan
-        len = o.len;
-        uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
+        // *this = o except for bmpSet and stringSpan 
+        len = o.len; 
+        uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); 
         if (o.hasStrings()) {
             UErrorCode status = U_ZERO_ERROR;
             if (!allocateStrings(status) ||
@@ -193,62 +193,62 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilte
                 setToBogus();
                 return;
             }
-        }
-        if (o.pat) {
+        } 
+        if (o.pat) { 
             setPattern(o.pat, o.patLen);
-        }
+        } 
         _dbgct(this);
-    }
-}
-
-/**
- * Destructs the set.
- */
-UnicodeSet::~UnicodeSet() {
-    _dbgdt(this); // first!
+    } 
+} 
+ 
+/** 
+ * Destructs the set. 
+ */ 
+UnicodeSet::~UnicodeSet() { 
+    _dbgdt(this); // first! 
     if (list != stackList) {
         uprv_free(list);
     }
-    delete bmpSet;
+    delete bmpSet; 
     if (buffer != stackList) {
-        uprv_free(buffer);
-    }
-    delete strings;
-    delete stringSpan;
-    releasePattern();
-}
-
-/**
- * Assigns this object to be a copy of another.
- */
-UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
+        uprv_free(buffer); 
+    } 
+    delete strings; 
+    delete stringSpan; 
+    releasePattern(); 
+} 
+ 
+/** 
+ * Assigns this object to be a copy of another. 
+ */ 
+UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { 
     return copyFrom(o, FALSE);
 }
 
 UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
-    if (this == &o) {
-        return *this;
-    }
-    if (isFrozen()) {
-        return *this;
-    }
-    if (o.isBogus()) {
-        setToBogus();
-        return *this;
-    }
+    if (this == &o) { 
+        return *this; 
+    } 
+    if (isFrozen()) { 
+        return *this; 
+    } 
+    if (o.isBogus()) { 
+        setToBogus(); 
+        return *this; 
+    } 
     if (!ensureCapacity(o.len)) {
         // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
         return *this;
-    }
-    len = o.len;
-    uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
+    } 
+    len = o.len; 
+    uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); 
     if (o.bmpSet != nullptr && !asThawed) {
-        bmpSet = new BMPSet(*o.bmpSet, list, len);
-        if (bmpSet == NULL) { // Check for memory allocation error.
-            setToBogus();
-            return *this;
-        }
-    }
+        bmpSet = new BMPSet(*o.bmpSet, list, len); 
+        if (bmpSet == NULL) { // Check for memory allocation error. 
+            setToBogus(); 
+            return *this; 
+        } 
+    } 
     if (o.hasStrings()) {
         UErrorCode status = U_ZERO_ERROR;
         if ((strings == nullptr && !allocateStrings(status)) ||
@@ -258,540 +258,540 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
         }
     } else if (hasStrings()) {
         strings->removeAllElements();
-    }
+    } 
     if (o.stringSpan != nullptr && !asThawed) {
-        stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
-        if (stringSpan == NULL) { // Check for memory allocation error.
-            setToBogus();
-            return *this;
-        }
-    }
-    releasePattern();
-    if (o.pat) {
+        stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); 
+        if (stringSpan == NULL) { // Check for memory allocation error. 
+            setToBogus(); 
+            return *this; 
+        } 
+    } 
+    releasePattern(); 
+    if (o.pat) { 
         setPattern(o.pat, o.patLen);
-    }
-    return *this;
-}
-
-/**
- * Returns a copy of this object.  All UnicodeMatcher objects have
- * to support cloning in order to allow classes using
- * UnicodeMatchers, such as Transliterator, to implement cloning.
- */
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Returns a copy of this object.  All UnicodeMatcher objects have 
+ * to support cloning in order to allow classes using 
+ * UnicodeMatchers, such as Transliterator, to implement cloning. 
+ */ 
 UnicodeSet* UnicodeSet::clone() const {
-    return new UnicodeSet(*this);
-}
-
+    return new UnicodeSet(*this); 
+} 
+ 
 UnicodeSet *UnicodeSet::cloneAsThawed() const {
-    return new UnicodeSet(*this, TRUE);
-}
-
-/**
- * Compares the specified object with this set for equality.  Returns
- * <tt>true</tt> if the two sets
- * have the same size, and every member of the specified set is
- * contained in this set (or equivalently, every member of this set is
- * contained in the specified set).
- *
- * @param o set to be compared for equality with this set.
- * @return <tt>true</tt> if the specified set is equal to this set.
- */
-UBool UnicodeSet::operator==(const UnicodeSet& o) const {
-    if (len != o.len) return FALSE;
-    for (int32_t i = 0; i < len; ++i) {
-        if (list[i] != o.list[i]) return FALSE;
-    }
+    return new UnicodeSet(*this, TRUE); 
+} 
+ 
+/** 
+ * Compares the specified object with this set for equality.  Returns 
+ * <tt>true</tt> if the two sets 
+ * have the same size, and every member of the specified set is 
+ * contained in this set (or equivalently, every member of this set is 
+ * contained in the specified set). 
+ * 
+ * @param o set to be compared for equality with this set. 
+ * @return <tt>true</tt> if the specified set is equal to this set. 
+ */ 
+UBool UnicodeSet::operator==(const UnicodeSet& o) const { 
+    if (len != o.len) return FALSE; 
+    for (int32_t i = 0; i < len; ++i) { 
+        if (list[i] != o.list[i]) return FALSE; 
+    } 
     if (hasStrings() != o.hasStrings()) { return FALSE; }
     if (hasStrings() && *strings != *o.strings) return FALSE;
-    return TRUE;
-}
-
-/**
- * Returns the hash code value for this set.
- *
- * @return the hash code value for this set.
- * @see Object#hashCode()
- */
-int32_t UnicodeSet::hashCode(void) const {
+    return TRUE; 
+} 
+ 
+/** 
+ * Returns the hash code value for this set. 
+ * 
+ * @return the hash code value for this set. 
+ * @see Object#hashCode() 
+ */ 
+int32_t UnicodeSet::hashCode(void) const { 
     uint32_t result = static_cast<uint32_t>(len);
-    for (int32_t i = 0; i < len; ++i) {
+    for (int32_t i = 0; i < len; ++i) { 
         result *= 1000003u;
-        result += list[i];
-    }
+        result += list[i]; 
+    } 
     return static_cast<int32_t>(result);
-}
-
-//----------------------------------------------------------------
-// Public API
-//----------------------------------------------------------------
-
-/**
- * Returns the number of elements in this set (its cardinality),
- * Note than the elements of a set may include both individual
- * codepoints and strings.
- *
- * @return the number of elements in this set (its cardinality).
- */
-int32_t UnicodeSet::size(void) const {
-    int32_t n = 0;
-    int32_t count = getRangeCount();
-    for (int32_t i = 0; i < count; ++i) {
-        n += getRangeEnd(i) - getRangeStart(i) + 1;
-    }
+} 
+ 
+//---------------------------------------------------------------- 
+// Public API 
+//---------------------------------------------------------------- 
+ 
+/** 
+ * Returns the number of elements in this set (its cardinality), 
+ * Note than the elements of a set may include both individual 
+ * codepoints and strings. 
+ * 
+ * @return the number of elements in this set (its cardinality). 
+ */ 
+int32_t UnicodeSet::size(void) const { 
+    int32_t n = 0; 
+    int32_t count = getRangeCount(); 
+    for (int32_t i = 0; i < count; ++i) { 
+        n += getRangeEnd(i) - getRangeStart(i) + 1; 
+    } 
     return n + stringsSize();
-}
-
-/**
- * Returns <tt>true</tt> if this set contains no elements.
- *
- * @return <tt>true</tt> if this set contains no elements.
- */
-UBool UnicodeSet::isEmpty(void) const {
+} 
+ 
+/** 
+ * Returns <tt>true</tt> if this set contains no elements. 
+ * 
+ * @return <tt>true</tt> if this set contains no elements. 
+ */ 
+UBool UnicodeSet::isEmpty(void) const { 
     return len == 1 && !hasStrings();
-}
-
-/**
- * Returns true if this set contains the given character.
- * @param c character to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::contains(UChar32 c) const {
-    // Set i to the index of the start item greater than ch
-    // We know we will terminate without length test!
-    // LATER: for large sets, add binary search
-    //int32_t i = -1;
-    //for (;;) {
-    //    if (c < list[++i]) break;
-    //}
-    if (bmpSet != NULL) {
-        return bmpSet->contains(c);
-    }
-    if (stringSpan != NULL) {
-        return stringSpan->contains(c);
-    }
-    if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
-        return FALSE;
-    }
-    int32_t i = findCodePoint(c);
-    return (UBool)(i & 1); // return true if odd
-}
-
-/**
- * Returns the smallest value i such that c < list[i].  Caller
- * must ensure that c is a legal value or this method will enter
- * an infinite loop.  This method performs a binary search.
- * @param c a character in the range MIN_VALUE..MAX_VALUE
- * inclusive
- * @return the smallest integer i in the range 0..len-1,
- * inclusive, such that c < list[i]
- */
-int32_t UnicodeSet::findCodePoint(UChar32 c) const {
-    /* Examples:
-                                       findCodePoint(c)
-       set              list[]         c=0 1 3 4 7 8
-       ===              ==============   ===========
-       []               [110000]         0 0 0 0 0 0
-       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
-       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
-       [:Any:]          [0, 110000]      1 1 1 1 1 1
-     */
-
-    // Return the smallest i such that c < list[i].  Assume
-    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
-    if (c < list[0])
-        return 0;
-    // High runner test.  c is often after the last range, so an
-    // initial check for this condition pays off.
-    int32_t lo = 0;
-    int32_t hi = len - 1;
-    if (lo >= hi || c >= list[hi-1])
-        return hi;
-    // invariant: c >= list[lo]
-    // invariant: c < list[hi]
-    for (;;) {
-        int32_t i = (lo + hi) >> 1;
-        if (i == lo) {
-            break; // Found!
-        } else if (c < list[i]) {
-            hi = i;
-        } else {
-            lo = i;
-        }
-    }
-    return hi;
-}
-
-/**
- * Returns true if this set contains every character
- * of the given range.
- * @param start first character, inclusive, of the range
- * @param end last character, inclusive, of the range
- * @return true if the test condition is met
- */
-UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
-    //int32_t i = -1;
-    //for (;;) {
-    //    if (start < list[++i]) break;
-    //}
-    int32_t i = findCodePoint(start);
-    return ((i & 1) != 0 && end < list[i]);
-}
-
-/**
- * Returns <tt>true</tt> if this set contains the given
- * multicharacter string.
- * @param s string to be checked for containment
- * @return <tt>true</tt> if this set contains the specified string
- */
-UBool UnicodeSet::contains(const UnicodeString& s) const {
-    if (s.length() == 0) return FALSE;
-    int32_t cp = getSingleCP(s);
-    if (cp < 0) {
+} 
+ 
+/** 
+ * Returns true if this set contains the given character. 
+ * @param c character to be checked for containment 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::contains(UChar32 c) const { 
+    // Set i to the index of the start item greater than ch 
+    // We know we will terminate without length test! 
+    // LATER: for large sets, add binary search 
+    //int32_t i = -1; 
+    //for (;;) { 
+    //    if (c < list[++i]) break; 
+    //} 
+    if (bmpSet != NULL) { 
+        return bmpSet->contains(c); 
+    } 
+    if (stringSpan != NULL) { 
+        return stringSpan->contains(c); 
+    } 
+    if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound 
+        return FALSE; 
+    } 
+    int32_t i = findCodePoint(c); 
+    return (UBool)(i & 1); // return true if odd 
+} 
+ 
+/** 
+ * Returns the smallest value i such that c < list[i].  Caller 
+ * must ensure that c is a legal value or this method will enter 
+ * an infinite loop.  This method performs a binary search. 
+ * @param c a character in the range MIN_VALUE..MAX_VALUE 
+ * inclusive 
+ * @return the smallest integer i in the range 0..len-1, 
+ * inclusive, such that c < list[i] 
+ */ 
+int32_t UnicodeSet::findCodePoint(UChar32 c) const { 
+    /* Examples: 
+                                       findCodePoint(c) 
+       set              list[]         c=0 1 3 4 7 8 
+       ===              ==============   =========== 
+       []               [110000]         0 0 0 0 0 0 
+       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2 
+       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2 
+       [:Any:]          [0, 110000]      1 1 1 1 1 1 
+     */ 
+ 
+    // Return the smallest i such that c < list[i].  Assume 
+    // list[len - 1] == HIGH and that c is legal (0..HIGH-1). 
+    if (c < list[0]) 
+        return 0; 
+    // High runner test.  c is often after the last range, so an 
+    // initial check for this condition pays off. 
+    int32_t lo = 0; 
+    int32_t hi = len - 1; 
+    if (lo >= hi || c >= list[hi-1]) 
+        return hi; 
+    // invariant: c >= list[lo] 
+    // invariant: c < list[hi] 
+    for (;;) { 
+        int32_t i = (lo + hi) >> 1; 
+        if (i == lo) { 
+            break; // Found! 
+        } else if (c < list[i]) { 
+            hi = i; 
+        } else { 
+            lo = i; 
+        } 
+    } 
+    return hi; 
+} 
+ 
+/** 
+ * Returns true if this set contains every character 
+ * of the given range. 
+ * @param start first character, inclusive, of the range 
+ * @param end last character, inclusive, of the range 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::contains(UChar32 start, UChar32 end) const { 
+    //int32_t i = -1; 
+    //for (;;) { 
+    //    if (start < list[++i]) break; 
+    //} 
+    int32_t i = findCodePoint(start); 
+    return ((i & 1) != 0 && end < list[i]); 
+} 
+ 
+/** 
+ * Returns <tt>true</tt> if this set contains the given 
+ * multicharacter string. 
+ * @param s string to be checked for containment 
+ * @return <tt>true</tt> if this set contains the specified string 
+ */ 
+UBool UnicodeSet::contains(const UnicodeString& s) const { 
+    if (s.length() == 0) return FALSE; 
+    int32_t cp = getSingleCP(s); 
+    if (cp < 0) { 
         return stringsContains(s);
-    } else {
-        return contains((UChar32) cp);
-    }
-}
-
-/**
- * Returns true if this set contains all the characters and strings
- * of the given set.
- * @param c set to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
-    // The specified set is a subset if all of its pairs are contained in
-    // this set.  It's possible to code this more efficiently in terms of
-    // direct manipulation of the inversion lists if the need arises.
-    int32_t n = c.getRangeCount();
-    for (int i=0; i<n; ++i) {
-        if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
-            return FALSE;
-        }
-    }
+    } else { 
+        return contains((UChar32) cp); 
+    } 
+} 
+ 
+/** 
+ * Returns true if this set contains all the characters and strings 
+ * of the given set. 
+ * @param c set to be checked for containment 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::containsAll(const UnicodeSet& c) const { 
+    // The specified set is a subset if all of its pairs are contained in 
+    // this set.  It's possible to code this more efficiently in terms of 
+    // direct manipulation of the inversion lists if the need arises. 
+    int32_t n = c.getRangeCount(); 
+    for (int i=0; i<n; ++i) { 
+        if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { 
+            return FALSE; 
+        } 
+    } 
     return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
-}
-
-/**
- * Returns true if this set contains all the characters
- * of the given string.
- * @param s string containing characters to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsAll(const UnicodeString& s) const {
-    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
-                   s.length());
-}
-
-/**
- * Returns true if this set contains none of the characters
- * of the given range.
- * @param start first character, inclusive, of the range
- * @param end last character, inclusive, of the range
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
-    //int32_t i = -1;
-    //for (;;) {
-    //    if (start < list[++i]) break;
-    //}
-    int32_t i = findCodePoint(start);
-    return ((i & 1) == 0 && end < list[i]);
-}
-
-/**
- * Returns true if this set contains none of the characters and strings
- * of the given set.
- * @param c set to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
-    // The specified set is a subset if all of its pairs are contained in
-    // this set.  It's possible to code this more efficiently in terms of
-    // direct manipulation of the inversion lists if the need arises.
-    int32_t n = c.getRangeCount();
-    for (int32_t i=0; i<n; ++i) {
-        if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
-            return FALSE;
-        }
-    }
+} 
+ 
+/** 
+ * Returns true if this set contains all the characters 
+ * of the given string. 
+ * @param s string containing characters to be checked for containment 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::containsAll(const UnicodeString& s) const { 
+    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == 
+                   s.length()); 
+} 
+ 
+/** 
+ * Returns true if this set contains none of the characters 
+ * of the given range. 
+ * @param start first character, inclusive, of the range 
+ * @param end last character, inclusive, of the range 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { 
+    //int32_t i = -1; 
+    //for (;;) { 
+    //    if (start < list[++i]) break; 
+    //} 
+    int32_t i = findCodePoint(start); 
+    return ((i & 1) == 0 && end < list[i]); 
+} 
+ 
+/** 
+ * Returns true if this set contains none of the characters and strings 
+ * of the given set. 
+ * @param c set to be checked for containment 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::containsNone(const UnicodeSet& c) const { 
+    // The specified set is a subset if all of its pairs are contained in 
+    // this set.  It's possible to code this more efficiently in terms of 
+    // direct manipulation of the inversion lists if the need arises. 
+    int32_t n = c.getRangeCount(); 
+    for (int32_t i=0; i<n; ++i) { 
+        if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { 
+            return FALSE; 
+        } 
+    } 
     return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
-}
-
-/**
- * Returns true if this set contains none of the characters
- * of the given string.
- * @param s string containing characters to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(const UnicodeString& s) const {
-    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
-                   s.length());
-}
-
-/**
- * Returns <tt>true</tt> if this set contains any character whose low byte
- * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
- * indexing.
- */
-UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
-    /* The index value v, in the range [0,255], is contained in this set if
-     * it is contained in any pair of this set.  Pairs either have the high
-     * bytes equal, or unequal.  If the high bytes are equal, then we have
-     * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
-     * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
-     * Then v is contained if xx <= v || v <= yy.  (This is identical to the
-     * time zone month containment logic.)
-     */
-    int32_t i;
-    int32_t rangeCount=getRangeCount();
-    for (i=0; i<rangeCount; ++i) {
-        UChar32 low = getRangeStart(i);
-        UChar32 high = getRangeEnd(i);
-        if ((low & ~0xFF) == (high & ~0xFF)) {
-            if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
-                return TRUE;
-            }
-        } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
-            return TRUE;
-        }
-    }
+} 
+ 
+/** 
+ * Returns true if this set contains none of the characters 
+ * of the given string. 
+ * @param s string containing characters to be checked for containment 
+ * @return true if the test condition is met 
+ */ 
+UBool UnicodeSet::containsNone(const UnicodeString& s) const { 
+    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == 
+                   s.length()); 
+} 
+ 
+/** 
+ * Returns <tt>true</tt> if this set contains any character whose low byte 
+ * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for 
+ * indexing. 
+ */ 
+UBool UnicodeSet::matchesIndexValue(uint8_t v) const { 
+    /* The index value v, in the range [0,255], is contained in this set if 
+     * it is contained in any pair of this set.  Pairs either have the high 
+     * bytes equal, or unequal.  If the high bytes are equal, then we have 
+     * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <= 
+     * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa. 
+     * Then v is contained if xx <= v || v <= yy.  (This is identical to the 
+     * time zone month containment logic.) 
+     */ 
+    int32_t i; 
+    int32_t rangeCount=getRangeCount(); 
+    for (i=0; i<rangeCount; ++i) { 
+        UChar32 low = getRangeStart(i); 
+        UChar32 high = getRangeEnd(i); 
+        if ((low & ~0xFF) == (high & ~0xFF)) { 
+            if ((low & 0xFF) <= v && v <= (high & 0xFF)) { 
+                return TRUE; 
+            } 
+        } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { 
+            return TRUE; 
+        } 
+    } 
     if (hasStrings()) {
-        for (i=0; i<strings->size(); ++i) {
-            const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
-            //if (s.length() == 0) {
-            //    // Empty strings match everything
-            //    return TRUE;
-            //}
-            // assert(s.length() != 0); // We enforce this elsewhere
-            UChar32 c = s.char32At(0);
-            if ((c & 0xFF) == v) {
-                return TRUE;
-            }
-        }
-    }
-    return FALSE;
-}
-
-/**
- * Implementation of UnicodeMatcher::matches().  Always matches the
- * longest possible multichar string.
- */
-UMatchDegree UnicodeSet::matches(const Replaceable& text,
-                                 int32_t& offset,
-                                 int32_t limit,
-                                 UBool incremental) {
-    if (offset == limit) {
-        // Strings, if any, have length != 0, so we don't worry
-        // about them here.  If we ever allow zero-length strings
-        // we much check for them here.
-        if (contains(U_ETHER)) {
-            return incremental ? U_PARTIAL_MATCH : U_MATCH;
-        } else {
-            return U_MISMATCH;
-        }
-    } else {
+        for (i=0; i<strings->size(); ++i) { 
+            const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); 
+            //if (s.length() == 0) { 
+            //    // Empty strings match everything 
+            //    return TRUE; 
+            //} 
+            // assert(s.length() != 0); // We enforce this elsewhere 
+            UChar32 c = s.char32At(0); 
+            if ((c & 0xFF) == v) { 
+                return TRUE; 
+            } 
+        } 
+    } 
+    return FALSE; 
+} 
+ 
+/** 
+ * Implementation of UnicodeMatcher::matches().  Always matches the 
+ * longest possible multichar string. 
+ */ 
+UMatchDegree UnicodeSet::matches(const Replaceable& text, 
+                                 int32_t& offset, 
+                                 int32_t limit, 
+                                 UBool incremental) { 
+    if (offset == limit) { 
+        // Strings, if any, have length != 0, so we don't worry 
+        // about them here.  If we ever allow zero-length strings 
+        // we much check for them here. 
+        if (contains(U_ETHER)) { 
+            return incremental ? U_PARTIAL_MATCH : U_MATCH; 
+        } else { 
+            return U_MISMATCH; 
+        } 
+    } else { 
         if (hasStrings()) { // try strings first
-
-            // might separate forward and backward loops later
-            // for now they are combined
-
-            // TODO Improve efficiency of this, at least in the forward
-            // direction, if not in both.  In the forward direction we
-            // can assume the strings are sorted.
-
-            int32_t i;
-            UBool forward = offset < limit;
-
-            // firstChar is the leftmost char to match in the
-            // forward direction or the rightmost char to match in
-            // the reverse direction.
-            UChar firstChar = text.charAt(offset);
-
-            // If there are multiple strings that can match we
-            // return the longest match.
-            int32_t highWaterLength = 0;
-
-            for (i=0; i<strings->size(); ++i) {
-                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
-
-                //if (trial.length() == 0) {
-                //    return U_MATCH; // null-string always matches
-                //}
-                // assert(trial.length() != 0); // We ensure this elsewhere
-
-                UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
-
-                // Strings are sorted, so we can optimize in the
-                // forward direction.
-                if (forward && c > firstChar) break;
-                if (c != firstChar) continue;
-
-                int32_t matchLen = matchRest(text, offset, limit, trial);
-
-                if (incremental) {
-                    int32_t maxLen = forward ? limit-offset : offset-limit;
-                    if (matchLen == maxLen) {
-                        // We have successfully matched but only up to limit.
-                        return U_PARTIAL_MATCH;
-                    }
-                }
-
-                if (matchLen == trial.length()) {
-                    // We have successfully matched the whole string.
-                    if (matchLen > highWaterLength) {
-                        highWaterLength = matchLen;
-                    }
-                    // In the forward direction we know strings
-                    // are sorted so we can bail early.
-                    if (forward && matchLen < highWaterLength) {
-                        break;
-                    }
-                    continue;
-                }
-            }
-
-            // We've checked all strings without a partial match.
-            // If we have full matches, return the longest one.
-            if (highWaterLength != 0) {
-                offset += forward ? highWaterLength : -highWaterLength;
-                return U_MATCH;
-            }
-        }
-        return UnicodeFilter::matches(text, offset, limit, incremental);
-    }
-}
-
-/**
- * Returns the longest match for s in text at the given position.
- * If limit > start then match forward from start+1 to limit
- * matching all characters except s.charAt(0).  If limit < start,
- * go backward starting from start-1 matching all characters
- * except s.charAt(s.length()-1).  This method assumes that the
- * first character, text.charAt(start), matches s, so it does not
- * check it.
- * @param text the text to match
- * @param start the first character to match.  In the forward
- * direction, text.charAt(start) is matched against s.charAt(0).
- * In the reverse direction, it is matched against
- * s.charAt(s.length()-1).
- * @param limit the limit offset for matching, either last+1 in
- * the forward direction, or last-1 in the reverse direction,
- * where last is the index of the last character to match.
- * @return If part of s matches up to the limit, return |limit -
- * start|.  If all of s matches before reaching the limit, return
- * s.length().  If there is a mismatch between s and text, return
- * 0
- */
-int32_t UnicodeSet::matchRest(const Replaceable& text,
-                              int32_t start, int32_t limit,
-                              const UnicodeString& s) {
-    int32_t i;
-    int32_t maxLen;
-    int32_t slen = s.length();
-    if (start < limit) {
-        maxLen = limit - start;
-        if (maxLen > slen) maxLen = slen;
-        for (i = 1; i < maxLen; ++i) {
-            if (text.charAt(start + i) != s.charAt(i)) return 0;
-        }
-    } else {
-        maxLen = start - limit;
-        if (maxLen > slen) maxLen = slen;
-        --slen; // <=> slen = s.length() - 1;
-        for (i = 1; i < maxLen; ++i) {
-            if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
-        }
-    }
-    return maxLen;
-}
-
-/**
- * Implement of UnicodeMatcher
- */
-void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
-    toUnionTo.addAll(*this);
-}
-
-/**
- * Returns the index of the given character within this set, where
- * the set is ordered by ascending code point.  If the character
- * is not in this set, return -1.  The inverse of this method is
- * <code>charAt()</code>.
- * @return an index from 0..size()-1, or -1
- */
-int32_t UnicodeSet::indexOf(UChar32 c) const {
-    if (c < MIN_VALUE || c > MAX_VALUE) {
-        return -1;
-    }
-    int32_t i = 0;
-    int32_t n = 0;
-    for (;;) {
-        UChar32 start = list[i++];
-        if (c < start) {
-            return -1;
-        }
-        UChar32 limit = list[i++];
-        if (c < limit) {
-            return n + c - start;
-        }
-        n += limit - start;
-    }
-}
-
-/**
- * Returns the character at the given index within this set, where
- * the set is ordered by ascending code point.  If the index is
- * out of range, return (UChar32)-1.  The inverse of this method is
- * <code>indexOf()</code>.
- * @param index an index from 0..size()-1
- * @return the character at the given index, or (UChar32)-1.
- */
-UChar32 UnicodeSet::charAt(int32_t index) const {
-    if (index >= 0) {
-        // len2 is the largest even integer <= len, that is, it is len
-        // for even values and len-1 for odd values.  With odd values
-        // the last entry is UNICODESET_HIGH.
-        int32_t len2 = len & ~1;
-        for (int32_t i=0; i < len2;) {
-            UChar32 start = list[i++];
-            int32_t count = list[i++] - start;
-            if (index < count) {
-                return (UChar32)(start + index);
-            }
-            index -= count;
-        }
-    }
-    return (UChar32)-1;
-}
-
-/**
- * Make this object represent the range <code>start - end</code>.
- * If <code>end > start</code> then this object is set to an
- * an empty range.
- *
- * @param start first character in the set, inclusive
- * @rparam end last character in the set, inclusive
- */
-UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
-    clear();
-    complement(start, end);
-    return *this;
-}
-
-/**
- * Adds the specified range to this set if it is not already
- * present.  If this set already contains the specified range,
- * the call leaves this set unchanged.  If <code>end > start</code>
- * then an empty range is added, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be added
- * to this set.
- * @param end last character, inclusive, of range to be added
- * to this set.
- */
-UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
-    if (pinCodePoint(start) < pinCodePoint(end)) {
+ 
+            // might separate forward and backward loops later 
+            // for now they are combined 
+ 
+            // TODO Improve efficiency of this, at least in the forward 
+            // direction, if not in both.  In the forward direction we 
+            // can assume the strings are sorted. 
+ 
+            int32_t i; 
+            UBool forward = offset < limit; 
+ 
+            // firstChar is the leftmost char to match in the 
+            // forward direction or the rightmost char to match in 
+            // the reverse direction. 
+            UChar firstChar = text.charAt(offset); 
+ 
+            // If there are multiple strings that can match we 
+            // return the longest match. 
+            int32_t highWaterLength = 0; 
+ 
+            for (i=0; i<strings->size(); ++i) { 
+                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); 
+ 
+                //if (trial.length() == 0) { 
+                //    return U_MATCH; // null-string always matches 
+                //} 
+                // assert(trial.length() != 0); // We ensure this elsewhere 
+ 
+                UChar c = trial.charAt(forward ? 0 : trial.length() - 1); 
+ 
+                // Strings are sorted, so we can optimize in the 
+                // forward direction. 
+                if (forward && c > firstChar) break; 
+                if (c != firstChar) continue; 
+ 
+                int32_t matchLen = matchRest(text, offset, limit, trial); 
+ 
+                if (incremental) { 
+                    int32_t maxLen = forward ? limit-offset : offset-limit; 
+                    if (matchLen == maxLen) { 
+                        // We have successfully matched but only up to limit. 
+                        return U_PARTIAL_MATCH; 
+                    } 
+                } 
+ 
+                if (matchLen == trial.length()) { 
+                    // We have successfully matched the whole string. 
+                    if (matchLen > highWaterLength) { 
+                        highWaterLength = matchLen; 
+                    } 
+                    // In the forward direction we know strings 
+                    // are sorted so we can bail early. 
+                    if (forward && matchLen < highWaterLength) { 
+                        break; 
+                    } 
+                    continue; 
+                } 
+            } 
+ 
+            // We've checked all strings without a partial match. 
+            // If we have full matches, return the longest one. 
+            if (highWaterLength != 0) { 
+                offset += forward ? highWaterLength : -highWaterLength; 
+                return U_MATCH; 
+            } 
+        } 
+        return UnicodeFilter::matches(text, offset, limit, incremental); 
+    } 
+} 
+ 
+/** 
+ * Returns the longest match for s in text at the given position. 
+ * If limit > start then match forward from start+1 to limit 
+ * matching all characters except s.charAt(0).  If limit < start, 
+ * go backward starting from start-1 matching all characters 
+ * except s.charAt(s.length()-1).  This method assumes that the 
+ * first character, text.charAt(start), matches s, so it does not 
+ * check it. 
+ * @param text the text to match 
+ * @param start the first character to match.  In the forward 
+ * direction, text.charAt(start) is matched against s.charAt(0). 
+ * In the reverse direction, it is matched against 
+ * s.charAt(s.length()-1). 
+ * @param limit the limit offset for matching, either last+1 in 
+ * the forward direction, or last-1 in the reverse direction, 
+ * where last is the index of the last character to match. 
+ * @return If part of s matches up to the limit, return |limit - 
+ * start|.  If all of s matches before reaching the limit, return 
+ * s.length().  If there is a mismatch between s and text, return 
+ * 0 
+ */ 
+int32_t UnicodeSet::matchRest(const Replaceable& text, 
+                              int32_t start, int32_t limit, 
+                              const UnicodeString& s) { 
+    int32_t i; 
+    int32_t maxLen; 
+    int32_t slen = s.length(); 
+    if (start < limit) { 
+        maxLen = limit - start; 
+        if (maxLen > slen) maxLen = slen; 
+        for (i = 1; i < maxLen; ++i) { 
+            if (text.charAt(start + i) != s.charAt(i)) return 0; 
+        } 
+    } else { 
+        maxLen = start - limit; 
+        if (maxLen > slen) maxLen = slen; 
+        --slen; // <=> slen = s.length() - 1; 
+        for (i = 1; i < maxLen; ++i) { 
+            if (text.charAt(start - i) != s.charAt(slen - i)) return 0; 
+        } 
+    } 
+    return maxLen; 
+} 
+ 
+/** 
+ * Implement of UnicodeMatcher 
+ */ 
+void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { 
+    toUnionTo.addAll(*this); 
+} 
+ 
+/** 
+ * Returns the index of the given character within this set, where 
+ * the set is ordered by ascending code point.  If the character 
+ * is not in this set, return -1.  The inverse of this method is 
+ * <code>charAt()</code>. 
+ * @return an index from 0..size()-1, or -1 
+ */ 
+int32_t UnicodeSet::indexOf(UChar32 c) const { 
+    if (c < MIN_VALUE || c > MAX_VALUE) { 
+        return -1; 
+    } 
+    int32_t i = 0; 
+    int32_t n = 0; 
+    for (;;) { 
+        UChar32 start = list[i++]; 
+        if (c < start) { 
+            return -1; 
+        } 
+        UChar32 limit = list[i++]; 
+        if (c < limit) { 
+            return n + c - start; 
+        } 
+        n += limit - start; 
+    } 
+} 
+ 
+/** 
+ * Returns the character at the given index within this set, where 
+ * the set is ordered by ascending code point.  If the index is 
+ * out of range, return (UChar32)-1.  The inverse of this method is 
+ * <code>indexOf()</code>. 
+ * @param index an index from 0..size()-1 
+ * @return the character at the given index, or (UChar32)-1. 
+ */ 
+UChar32 UnicodeSet::charAt(int32_t index) const { 
+    if (index >= 0) { 
+        // len2 is the largest even integer <= len, that is, it is len 
+        // for even values and len-1 for odd values.  With odd values 
+        // the last entry is UNICODESET_HIGH. 
+        int32_t len2 = len & ~1; 
+        for (int32_t i=0; i < len2;) { 
+            UChar32 start = list[i++]; 
+            int32_t count = list[i++] - start; 
+            if (index < count) { 
+                return (UChar32)(start + index); 
+            } 
+            index -= count; 
+        } 
+    } 
+    return (UChar32)-1; 
+} 
+ 
+/** 
+ * Make this object represent the range <code>start - end</code>. 
+ * If <code>end > start</code> then this object is set to an 
+ * an empty range. 
+ * 
+ * @param start first character in the set, inclusive 
+ * @rparam end last character in the set, inclusive 
+ */ 
+UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { 
+    clear(); 
+    complement(start, end); 
+    return *this; 
+} 
+ 
+/** 
+ * Adds the specified range to this set if it is not already 
+ * present.  If this set already contains the specified range, 
+ * the call leaves this set unchanged.  If <code>end > start</code> 
+ * then an empty range is added, leaving the set unchanged. 
+ * 
+ * @param start first character, inclusive, of range to be added 
+ * to this set. 
+ * @param end last character, inclusive, of range to be added 
+ * to this set. 
+ */ 
+UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { 
+    if (pinCodePoint(start) < pinCodePoint(end)) { 
         UChar32 limit = end + 1;
         // Fast path for adding a new range after the last one.
         // Odd list length: [..., lastStart, lastLimit, HIGH]
@@ -825,494 +825,494 @@ UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
         // This is slow. Could be much faster using findCodePoint(start)
         // and modifying the list, dealing with adjacent & overlapping ranges.
         UChar32 range[3] = { start, limit, UNICODESET_HIGH };
-        add(range, 2, 0);
-    } else if (start == end) {
-        add(start);
-    }
-    return *this;
-}
-
-// #define DEBUG_US_ADD
-
-#ifdef DEBUG_US_ADD
-#include <stdio.h>
-void dump(UChar32 c) {
-    if (c <= 0xFF) {
-        printf("%c", (char)c);
-    } else {
-        printf("U+%04X", c);
-    }
-}
-void dump(const UChar32* list, int32_t len) {
-    printf("[");
-    for (int32_t i=0; i<len; ++i) {
-        if (i != 0) printf(", ");
-        dump(list[i]);
-    }
-    printf("]");
-}
-#endif
-
-/**
- * Adds the specified character to this set if it is not already
- * present.  If this set already contains the specified character,
- * the call leaves this set unchanged.
- */
-UnicodeSet& UnicodeSet::add(UChar32 c) {
-    // find smallest i such that c < list[i]
-    // if odd, then it is IN the set
-    // if even, then it is OUT of the set
-    int32_t i = findCodePoint(pinCodePoint(c));
-
-    // already in set?
-    if ((i & 1) != 0  || isFrozen() || isBogus()) return *this;
-
-    // HIGH is 0x110000
-    // assert(list[len-1] == HIGH);
-
-    // empty = [HIGH]
-    // [start_0, limit_0, start_1, limit_1, HIGH]
-
-    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
-    //                             ^
-    //                             list[i]
-
-    // i == 0 means c is before the first range
-
-#ifdef DEBUG_US_ADD
-    printf("Add of ");
-    dump(c);
-    printf(" found at %d", i);
-    printf(": ");
-    dump(list, len);
-    printf(" => ");
-#endif
-
-    if (c == list[i]-1) {
-        // c is before start of next range
-        list[i] = c;
-        // if we touched the HIGH mark, then add a new one
-        if (c == (UNICODESET_HIGH - 1)) {
+        add(range, 2, 0); 
+    } else if (start == end) { 
+        add(start); 
+    } 
+    return *this; 
+} 
+ 
+// #define DEBUG_US_ADD 
+ 
+#ifdef DEBUG_US_ADD 
+#include <stdio.h> 
+void dump(UChar32 c) { 
+    if (c <= 0xFF) { 
+        printf("%c", (char)c); 
+    } else { 
+        printf("U+%04X", c); 
+    } 
+} 
+void dump(const UChar32* list, int32_t len) { 
+    printf("["); 
+    for (int32_t i=0; i<len; ++i) { 
+        if (i != 0) printf(", "); 
+        dump(list[i]); 
+    } 
+    printf("]"); 
+} 
+#endif 
+ 
+/** 
+ * Adds the specified character to this set if it is not already 
+ * present.  If this set already contains the specified character, 
+ * the call leaves this set unchanged. 
+ */ 
+UnicodeSet& UnicodeSet::add(UChar32 c) { 
+    // find smallest i such that c < list[i] 
+    // if odd, then it is IN the set 
+    // if even, then it is OUT of the set 
+    int32_t i = findCodePoint(pinCodePoint(c)); 
+ 
+    // already in set? 
+    if ((i & 1) != 0  || isFrozen() || isBogus()) return *this; 
+ 
+    // HIGH is 0x110000 
+    // assert(list[len-1] == HIGH); 
+ 
+    // empty = [HIGH] 
+    // [start_0, limit_0, start_1, limit_1, HIGH] 
+ 
+    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 
+    //                             ^ 
+    //                             list[i] 
+ 
+    // i == 0 means c is before the first range 
+ 
+#ifdef DEBUG_US_ADD 
+    printf("Add of "); 
+    dump(c); 
+    printf(" found at %d", i); 
+    printf(": "); 
+    dump(list, len); 
+    printf(" => "); 
+#endif 
+ 
+    if (c == list[i]-1) { 
+        // c is before start of next range 
+        list[i] = c; 
+        // if we touched the HIGH mark, then add a new one 
+        if (c == (UNICODESET_HIGH - 1)) { 
             if (!ensureCapacity(len+1)) {
                 // ensureCapacity will mark the object as Bogus if OOM failure happens.
                 return *this;
-            }
-            list[len++] = UNICODESET_HIGH;
-        }
-        if (i > 0 && c == list[i-1]) {
-            // collapse adjacent ranges
-
-            // [..., start_k-1, c, c, limit_k, ..., HIGH]
-            //                     ^
-            //                     list[i]
-
-            //for (int32_t k=i-1; k<len-2; ++k) {
-            //    list[k] = list[k+2];
-            //}
-            UChar32* dst = list + i - 1;
-            UChar32* src = dst + 2;
-            UChar32* srclimit = list + len;
-            while (src < srclimit) *(dst++) = *(src++);
-
-            len -= 2;
-        }
-    }
-
-    else if (i > 0 && c == list[i-1]) {
-        // c is after end of prior range
-        list[i-1]++;
-        // no need to check for collapse here
-    }
-
-    else {
-        // At this point we know the new char is not adjacent to
-        // any existing ranges, and it is not 10FFFF.
-
-
-        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
-        //                             ^
-        //                             list[i]
-
-        // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
-        //                             ^
-        //                             list[i]
-
+            } 
+            list[len++] = UNICODESET_HIGH; 
+        } 
+        if (i > 0 && c == list[i-1]) { 
+            // collapse adjacent ranges 
+ 
+            // [..., start_k-1, c, c, limit_k, ..., HIGH] 
+            //                     ^ 
+            //                     list[i] 
+ 
+            //for (int32_t k=i-1; k<len-2; ++k) { 
+            //    list[k] = list[k+2]; 
+            //} 
+            UChar32* dst = list + i - 1; 
+            UChar32* src = dst + 2; 
+            UChar32* srclimit = list + len; 
+            while (src < srclimit) *(dst++) = *(src++); 
+ 
+            len -= 2; 
+        } 
+    } 
+ 
+    else if (i > 0 && c == list[i-1]) { 
+        // c is after end of prior range 
+        list[i-1]++; 
+        // no need to check for collapse here 
+    } 
+ 
+    else { 
+        // At this point we know the new char is not adjacent to 
+        // any existing ranges, and it is not 10FFFF. 
+ 
+ 
+        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 
+        //                             ^ 
+        //                             list[i] 
+ 
+        // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] 
+        //                             ^ 
+        //                             list[i] 
+ 
         if (!ensureCapacity(len+2)) {
             // ensureCapacity will mark the object as Bogus if OOM failure happens.
             return *this;
-        }
-
+        } 
+ 
         UChar32 *p = list + i;
         uprv_memmove(p + 2, p, (len - i) * sizeof(*p));
-        list[i] = c;
-        list[i+1] = c+1;
-        len += 2;
-    }
-
-#ifdef DEBUG_US_ADD
-    dump(list, len);
-    printf("\n");
-
-    for (i=1; i<len; ++i) {
-        if (list[i] <= list[i-1]) {
-            // Corrupt array!
-            printf("ERROR: list has been corrupted\n");
-            exit(1);
-        }
-    }
-#endif
-
-    releasePattern();
-    return *this;
-}
-
-/**
- * Adds the specified multicharacter to this set if it is not already
- * present.  If this set already contains the multicharacter,
- * the call leaves this set unchanged.
- * Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param s the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
-    int32_t cp = getSingleCP(s);
-    if (cp < 0) {
+        list[i] = c; 
+        list[i+1] = c+1; 
+        len += 2; 
+    } 
+ 
+#ifdef DEBUG_US_ADD 
+    dump(list, len); 
+    printf("\n"); 
+ 
+    for (i=1; i<len; ++i) { 
+        if (list[i] <= list[i-1]) { 
+            // Corrupt array! 
+            printf("ERROR: list has been corrupted\n"); 
+            exit(1); 
+        } 
+    } 
+#endif 
+ 
+    releasePattern(); 
+    return *this; 
+} 
+ 
+/** 
+ * Adds the specified multicharacter to this set if it is not already 
+ * present.  If this set already contains the multicharacter, 
+ * the call leaves this set unchanged. 
+ * Thus "ch" => {"ch"} 
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 
+ * @param s the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::add(const UnicodeString& s) { 
+    if (s.length() == 0 || isFrozen() || isBogus()) return *this; 
+    int32_t cp = getSingleCP(s); 
+    if (cp < 0) { 
         if (!stringsContains(s)) {
-            _add(s);
-            releasePattern();
-        }
-    } else {
-        add((UChar32)cp);
-    }
-    return *this;
-}
-
-/**
- * Adds the given string, in order, to 'strings'.  The given string
- * must have been checked by the caller to not be empty and to not
- * already be in 'strings'.
- */
-void UnicodeSet::_add(const UnicodeString& s) {
-    if (isFrozen() || isBogus()) {
-        return;
-    }
+            _add(s); 
+            releasePattern(); 
+        } 
+    } else { 
+        add((UChar32)cp); 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Adds the given string, in order, to 'strings'.  The given string 
+ * must have been checked by the caller to not be empty and to not 
+ * already be in 'strings'. 
+ */ 
+void UnicodeSet::_add(const UnicodeString& s) { 
+    if (isFrozen() || isBogus()) { 
+        return; 
+    } 
     UErrorCode ec = U_ZERO_ERROR;
     if (strings == nullptr && !allocateStrings(ec)) {
         setToBogus();
         return;
     }
-    UnicodeString* t = new UnicodeString(s);
-    if (t == NULL) { // Check for memory allocation error.
-        setToBogus();
-        return;
-    }
-    strings->sortedInsert(t, compareUnicodeString, ec);
-    if (U_FAILURE(ec)) {
-        setToBogus();
-        delete t;
-    }
-}
-
-/**
- * @return a code point IF the string consists of a single one.
- * otherwise returns -1.
- * @param string to test
- */
-int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
-    //if (s.length() < 1) {
-    //    throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
-    //}
-    if (s.length() > 2) return -1;
-    if (s.length() == 1) return s.charAt(0);
-
-    // at this point, len = 2
-    UChar32 cp = s.char32At(0);
-    if (cp > 0xFFFF) { // is surrogate pair
-        return cp;
-    }
-    return -1;
-}
-
-/**
- * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
-    UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
-        cp = s.char32At(i);
-        add(cp);
-    }
-    return *this;
-}
-
-/**
- * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
-    UnicodeSet set;
-    set.addAll(s);
-    retainAll(set);
-    return *this;
-}
-
-/**
- * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
-    UnicodeSet set;
-    set.addAll(s);
-    complementAll(set);
-    return *this;
-}
-
-/**
- * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
-    UnicodeSet set;
-    set.addAll(s);
-    removeAll(set);
-    return *this;
-}
-
-UnicodeSet& UnicodeSet::removeAllStrings() {
+    UnicodeString* t = new UnicodeString(s); 
+    if (t == NULL) { // Check for memory allocation error. 
+        setToBogus(); 
+        return; 
+    } 
+    strings->sortedInsert(t, compareUnicodeString, ec); 
+    if (U_FAILURE(ec)) { 
+        setToBogus(); 
+        delete t; 
+    } 
+} 
+ 
+/** 
+ * @return a code point IF the string consists of a single one. 
+ * otherwise returns -1. 
+ * @param string to test 
+ */ 
+int32_t UnicodeSet::getSingleCP(const UnicodeString& s) { 
+    //if (s.length() < 1) { 
+    //    throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); 
+    //} 
+    if (s.length() > 2) return -1; 
+    if (s.length() == 1) return s.charAt(0); 
+ 
+    // at this point, len = 2 
+    UChar32 cp = s.char32At(0); 
+    if (cp > 0xFFFF) { // is surrogate pair 
+        return cp; 
+    } 
+    return -1; 
+} 
+ 
+/** 
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} 
+ * If this set already any particular character, it has no effect on that character. 
+ * @param the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { 
+    UChar32 cp; 
+    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { 
+        cp = s.char32At(i); 
+        add(cp); 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 
+ * If this set already any particular character, it has no effect on that character. 
+ * @param the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { 
+    UnicodeSet set; 
+    set.addAll(s); 
+    retainAll(set); 
+    return *this; 
+} 
+ 
+/** 
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} 
+ * If this set already any particular character, it has no effect on that character. 
+ * @param the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { 
+    UnicodeSet set; 
+    set.addAll(s); 
+    complementAll(set); 
+    return *this; 
+} 
+ 
+/** 
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} 
+ * If this set already any particular character, it has no effect on that character. 
+ * @param the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { 
+    UnicodeSet set; 
+    set.addAll(s); 
+    removeAll(set); 
+    return *this; 
+} 
+ 
+UnicodeSet& UnicodeSet::removeAllStrings() { 
     if (!isFrozen() && hasStrings()) {
         strings->removeAllElements();
         releasePattern();
     }
-    return *this;
-}
-
-
-/**
- * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param the source string
- * @return a newly created set containing the given string
- */
-UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
-    UnicodeSet *set = new UnicodeSet();
-    if (set != NULL) { // Check for memory allocation error.
-        set->add(s);
-    }
-    return set;
-}
-
-
-/**
- * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
- * @param the source string
- * @return a newly created set containing the given characters
- */
-UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
-    UnicodeSet *set = new UnicodeSet();
-    if (set != NULL) { // Check for memory allocation error.
-        set->addAll(s);
-    }
-    return set;
-}
-
-/**
- * Retain only the elements in this set that are contained in the
- * specified range.  If <code>end > start</code> then an empty range is
- * retained, leaving the set empty.
- *
- * @param start first character, inclusive, of range to be retained
- * to this set.
- * @param end last character, inclusive, of range to be retained
- * to this set.
- */
-UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
-    if (pinCodePoint(start) <= pinCodePoint(end)) {
-        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
-        retain(range, 2, 0);
-    } else {
-        clear();
-    }
-    return *this;
-}
-
-UnicodeSet& UnicodeSet::retain(UChar32 c) {
-    return retain(c, c);
-}
-
-/**
- * Removes the specified range from this set if it is present.
- * The set will not contain the specified range once the call
- * returns.  If <code>end > start</code> then an empty range is
- * removed, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be removed
- * from this set.
- * @param end last character, inclusive, of range to be removed
- * from this set.
- */
-UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
-    if (pinCodePoint(start) <= pinCodePoint(end)) {
-        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
-        retain(range, 2, 2);
-    }
-    return *this;
-}
-
-/**
- * Removes the specified character from this set if it is present.
- * The set will not contain the specified range once the call
- * returns.
- */
-UnicodeSet& UnicodeSet::remove(UChar32 c) {
-    return remove(c, c);
-}
-
-/**
- * Removes the specified string from this set if it is present.
- * The set will not contain the specified character once the call
- * returns.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
-    int32_t cp = getSingleCP(s);
-    if (cp < 0) {
+    return *this; 
+} 
+ 
+ 
+/** 
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"} 
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 
+ * @param the source string 
+ * @return a newly created set containing the given string 
+ */ 
+UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { 
+    UnicodeSet *set = new UnicodeSet(); 
+    if (set != NULL) { // Check for memory allocation error. 
+        set->add(s); 
+    } 
+    return set; 
+} 
+ 
+ 
+/** 
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} 
+ * @param the source string 
+ * @return a newly created set containing the given characters 
+ */ 
+UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { 
+    UnicodeSet *set = new UnicodeSet(); 
+    if (set != NULL) { // Check for memory allocation error. 
+        set->addAll(s); 
+    } 
+    return set; 
+} 
+ 
+/** 
+ * Retain only the elements in this set that are contained in the 
+ * specified range.  If <code>end > start</code> then an empty range is 
+ * retained, leaving the set empty. 
+ * 
+ * @param start first character, inclusive, of range to be retained 
+ * to this set. 
+ * @param end last character, inclusive, of range to be retained 
+ * to this set. 
+ */ 
+UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { 
+    if (pinCodePoint(start) <= pinCodePoint(end)) { 
+        UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 
+        retain(range, 2, 0); 
+    } else { 
+        clear(); 
+    } 
+    return *this; 
+} 
+ 
+UnicodeSet& UnicodeSet::retain(UChar32 c) { 
+    return retain(c, c); 
+} 
+ 
+/** 
+ * Removes the specified range from this set if it is present. 
+ * The set will not contain the specified range once the call 
+ * returns.  If <code>end > start</code> then an empty range is 
+ * removed, leaving the set unchanged. 
+ * 
+ * @param start first character, inclusive, of range to be removed 
+ * from this set. 
+ * @param end last character, inclusive, of range to be removed 
+ * from this set. 
+ */ 
+UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { 
+    if (pinCodePoint(start) <= pinCodePoint(end)) { 
+        UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 
+        retain(range, 2, 2); 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Removes the specified character from this set if it is present. 
+ * The set will not contain the specified range once the call 
+ * returns. 
+ */ 
+UnicodeSet& UnicodeSet::remove(UChar32 c) { 
+    return remove(c, c); 
+} 
+ 
+/** 
+ * Removes the specified string from this set if it is present. 
+ * The set will not contain the specified character once the call 
+ * returns. 
+ * @param the source string 
+ * @return the modified set, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { 
+    if (s.length() == 0 || isFrozen() || isBogus()) return *this; 
+    int32_t cp = getSingleCP(s); 
+    if (cp < 0) { 
         if (strings != nullptr && strings->removeElement((void*) &s)) {
             releasePattern();
         }
-    } else {
-        remove((UChar32)cp, (UChar32)cp);
-    }
-    return *this;
-}
-
-/**
- * Complements the specified range in this set.  Any character in
- * the range will be removed if it is in this set, or will be
- * added if it is not in this set.  If <code>end > start</code>
- * then an empty range is xor'ed, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be removed
- * from this set.
- * @param end last character, inclusive, of range to be removed
- * from this set.
- */
-UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    if (pinCodePoint(start) <= pinCodePoint(end)) {
-        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
-        exclusiveOr(range, 2, 0);
-    }
-    releasePattern();
-    return *this;
-}
-
-UnicodeSet& UnicodeSet::complement(UChar32 c) {
-    return complement(c, c);
-}
-
-/**
- * This is equivalent to
- * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
- */
-UnicodeSet& UnicodeSet::complement(void) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    if (list[0] == UNICODESET_LOW) {
+    } else { 
+        remove((UChar32)cp, (UChar32)cp); 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Complements the specified range in this set.  Any character in 
+ * the range will be removed if it is in this set, or will be 
+ * added if it is not in this set.  If <code>end > start</code> 
+ * then an empty range is xor'ed, leaving the set unchanged. 
+ * 
+ * @param start first character, inclusive, of range to be removed 
+ * from this set. 
+ * @param end last character, inclusive, of range to be removed 
+ * from this set. 
+ */ 
+UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    if (pinCodePoint(start) <= pinCodePoint(end)) { 
+        UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 
+        exclusiveOr(range, 2, 0); 
+    } 
+    releasePattern(); 
+    return *this; 
+} 
+ 
+UnicodeSet& UnicodeSet::complement(UChar32 c) { 
+    return complement(c, c); 
+} 
+ 
+/** 
+ * This is equivalent to 
+ * <code>complement(MIN_VALUE, MAX_VALUE)</code>. 
+ */ 
+UnicodeSet& UnicodeSet::complement(void) { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    if (list[0] == UNICODESET_LOW) { 
         uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32));
-        --len;
-    } else {
+        --len; 
+    } else { 
         if (!ensureCapacity(len+1)) {
-            return *this;
-        }
+            return *this; 
+        } 
         uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32));
         list[0] = UNICODESET_LOW;
-        ++len;
-    }
-    releasePattern();
-    return *this;
-}
-
-/**
- * Complement the specified string in this set.
- * The set will not contain the specified string once the call
- * returns.
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param s the string to complement
- * @return this object, for chaining
- */
-UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
-    int32_t cp = getSingleCP(s);
-    if (cp < 0) {
+        ++len; 
+    } 
+    releasePattern(); 
+    return *this; 
+} 
+ 
+/** 
+ * Complement the specified string in this set. 
+ * The set will not contain the specified string once the call 
+ * returns. 
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 
+ * @param s the string to complement 
+ * @return this object, for chaining 
+ */ 
+UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { 
+    if (s.length() == 0 || isFrozen() || isBogus()) return *this; 
+    int32_t cp = getSingleCP(s); 
+    if (cp < 0) { 
         if (stringsContains(s)) {
-            strings->removeElement((void*) &s);
-        } else {
-            _add(s);
-        }
-        releasePattern();
-    } else {
-        complement((UChar32)cp, (UChar32)cp);
-    }
-    return *this;
-}
-
-/**
- * Adds all of the elements in the specified set to this set if
- * they're not already present.  This operation effectively
- * modifies this set so that its value is the <i>union</i> of the two
- * sets.  The behavior of this operation is unspecified if the specified
- * collection is modified while the operation is in progress.
- *
- * @param c set whose elements are to be added to this set.
- * @see #add(char, char)
- */
-UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
-    if ( c.len>0 && c.list!=NULL ) {
-        add(c.list, c.len, 0);
-    }
-
-    // Add strings in order
-    if ( c.strings!=NULL ) {
-        for (int32_t i=0; i<c.strings->size(); ++i) {
-            const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
+            strings->removeElement((void*) &s); 
+        } else { 
+            _add(s); 
+        } 
+        releasePattern(); 
+    } else { 
+        complement((UChar32)cp, (UChar32)cp); 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Adds all of the elements in the specified set to this set if 
+ * they're not already present.  This operation effectively 
+ * modifies this set so that its value is the <i>union</i> of the two 
+ * sets.  The behavior of this operation is unspecified if the specified 
+ * collection is modified while the operation is in progress. 
+ * 
+ * @param c set whose elements are to be added to this set. 
+ * @see #add(char, char) 
+ */ 
+UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { 
+    if ( c.len>0 && c.list!=NULL ) { 
+        add(c.list, c.len, 0); 
+    } 
+ 
+    // Add strings in order 
+    if ( c.strings!=NULL ) { 
+        for (int32_t i=0; i<c.strings->size(); ++i) { 
+            const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); 
             if (!stringsContains(*s)) {
-                _add(*s);
-            }
-        }
-    }
-    return *this;
-}
-
-/**
- * Retains only the elements in this set that are contained in the
- * specified set.  In other words, removes from this set all of
- * its elements that are not contained in the specified set.  This
- * operation effectively modifies this set so that its value is
- * the <i>intersection</i> of the two sets.
- *
- * @param c set that defines which elements this set will retain.
- */
-UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    retain(c.list, c.len, 0);
+                _add(*s); 
+            } 
+        } 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Retains only the elements in this set that are contained in the 
+ * specified set.  In other words, removes from this set all of 
+ * its elements that are not contained in the specified set.  This 
+ * operation effectively modifies this set so that its value is 
+ * the <i>intersection</i> of the two sets. 
+ * 
+ * @param c set that defines which elements this set will retain. 
+ */ 
+UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    retain(c.list, c.len, 0); 
     if (hasStrings()) {
         if (!c.hasStrings()) {
             strings->removeAllElements();
@@ -1320,121 +1320,121 @@ UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
             strings->retainAll(*c.strings);
         }
     }
-    return *this;
-}
-
-/**
- * Removes from this set all of its elements that are contained in the
- * specified set.  This operation effectively modifies this
- * set so that its value is the <i>asymmetric set difference</i> of
- * the two sets.
- *
- * @param c set that defines which elements will be removed from
- *          this set.
- */
-UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    retain(c.list, c.len, 2);
+    return *this; 
+} 
+ 
+/** 
+ * Removes from this set all of its elements that are contained in the 
+ * specified set.  This operation effectively modifies this 
+ * set so that its value is the <i>asymmetric set difference</i> of 
+ * the two sets. 
+ * 
+ * @param c set that defines which elements will be removed from 
+ *          this set. 
+ */ 
+UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    retain(c.list, c.len, 2); 
     if (hasStrings() && c.hasStrings()) {
         strings->removeAll(*c.strings);
     }
-    return *this;
-}
-
-/**
- * Complements in this set all elements contained in the specified
- * set.  Any character in the other set will be removed if it is
- * in this set, or will be added if it is not in this set.
- *
- * @param c set that defines which elements will be xor'ed from
- *          this set.
- */
-UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    exclusiveOr(c.list, c.len, 0);
-
+    return *this; 
+} 
+ 
+/** 
+ * Complements in this set all elements contained in the specified 
+ * set.  Any character in the other set will be removed if it is 
+ * in this set, or will be added if it is not in this set. 
+ * 
+ * @param c set that defines which elements will be xor'ed from 
+ *          this set. 
+ */ 
+UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    exclusiveOr(c.list, c.len, 0); 
+ 
     if (c.strings != nullptr) {
         for (int32_t i=0; i<c.strings->size(); ++i) {
             void* e = c.strings->elementAt(i);
             if (strings == nullptr || !strings->removeElement(e)) {
                 _add(*(const UnicodeString*)e);
             }
-        }
-    }
-    return *this;
-}
-
-/**
- * Removes all of the elements from this set.  This set will be
- * empty after this call returns.
- */
-UnicodeSet& UnicodeSet::clear(void) {
-    if (isFrozen()) {
-        return *this;
-    }
+        } 
+    } 
+    return *this; 
+} 
+ 
+/** 
+ * Removes all of the elements from this set.  This set will be 
+ * empty after this call returns. 
+ */ 
+UnicodeSet& UnicodeSet::clear(void) { 
+    if (isFrozen()) { 
+        return *this; 
+    } 
     list[0] = UNICODESET_HIGH;
-    len = 1;
-    releasePattern();
-    if (strings != NULL) {
-        strings->removeAllElements();
-    }
+    len = 1; 
+    releasePattern(); 
+    if (strings != NULL) { 
+        strings->removeAllElements(); 
+    } 
     // Remove bogus
     fFlags = 0;
-    return *this;
-}
-
-/**
- * Iteration method that returns the number of ranges contained in
- * this set.
- * @see #getRangeStart
- * @see #getRangeEnd
- */
-int32_t UnicodeSet::getRangeCount() const {
-    return len/2;
-}
-
-/**
- * Iteration method that returns the first character in the
- * specified range of this set.
- * @see #getRangeCount
- * @see #getRangeEnd
- */
-UChar32 UnicodeSet::getRangeStart(int32_t index) const {
-    return list[index*2];
-}
-
-/**
- * Iteration method that returns the last character in the
- * specified range of this set.
- * @see #getRangeStart
- * @see #getRangeEnd
- */
-UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
-    return list[index*2 + 1] - 1;
-}
-
-const UnicodeString* UnicodeSet::getString(int32_t index) const {
-    return (const UnicodeString*) strings->elementAt(index);
-}
-
-/**
- * Reallocate this objects internal structures to take up the least
- * possible space, without changing this object's value.
- */
-UnicodeSet& UnicodeSet::compact() {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    // Delete buffer first to defragment memory less.
+    return *this; 
+} 
+ 
+/** 
+ * Iteration method that returns the number of ranges contained in 
+ * this set. 
+ * @see #getRangeStart 
+ * @see #getRangeEnd 
+ */ 
+int32_t UnicodeSet::getRangeCount() const { 
+    return len/2; 
+} 
+ 
+/** 
+ * Iteration method that returns the first character in the 
+ * specified range of this set. 
+ * @see #getRangeCount 
+ * @see #getRangeEnd 
+ */ 
+UChar32 UnicodeSet::getRangeStart(int32_t index) const { 
+    return list[index*2]; 
+} 
+ 
+/** 
+ * Iteration method that returns the last character in the 
+ * specified range of this set. 
+ * @see #getRangeStart 
+ * @see #getRangeEnd 
+ */ 
+UChar32 UnicodeSet::getRangeEnd(int32_t index) const { 
+    return list[index*2 + 1] - 1; 
+} 
+ 
+const UnicodeString* UnicodeSet::getString(int32_t index) const { 
+    return (const UnicodeString*) strings->elementAt(index); 
+} 
+ 
+/** 
+ * Reallocate this objects internal structures to take up the least 
+ * possible space, without changing this object's value. 
+ */ 
+UnicodeSet& UnicodeSet::compact() { 
+    if (isFrozen() || isBogus()) { 
+        return *this; 
+    } 
+    // Delete buffer first to defragment memory less. 
     if (buffer != stackList) {
-        uprv_free(buffer);
-        buffer = NULL;
+        uprv_free(buffer); 
+        buffer = NULL; 
         bufferCapacity = 0;
-    }
+    } 
     if (list == stackList) {
         // pass
     } else if (len <= INITIAL_CAPACITY) {
@@ -1445,195 +1445,195 @@ UnicodeSet& UnicodeSet::compact() {
     } else if ((len + 7) < capacity) {
         // If we have more than a little unused capacity, shrink it to len.
         UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len);
-        if (temp) {
-            list = temp;
+        if (temp) { 
+            list = temp; 
             capacity = len;
-        }
-        // else what the heck happened?! We allocated less memory!
-        // Oh well. We'll keep our original array.
-    }
+        } 
+        // else what the heck happened?! We allocated less memory! 
+        // Oh well. We'll keep our original array. 
+    } 
     if (strings != nullptr && strings->isEmpty()) {
         delete strings;
         strings = nullptr;
     }
-    return *this;
-}
-
-#ifdef DEBUG_SERIALIZE
-#include <stdio.h>
-#endif
-
-/**
- * Deserialize constructor.
- */
+    return *this; 
+} 
+ 
+#ifdef DEBUG_SERIALIZE 
+#include <stdio.h> 
+#endif 
+ 
+/** 
+ * Deserialize constructor. 
+ */ 
 UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
                        UErrorCode &ec) {
-
-  if(U_FAILURE(ec)) {
-    setToBogus();
-    return;
-  }
-
-  if( (serialization != kSerialized)
-      || (data==NULL)
-      || (dataLen < 1)) {
-    ec = U_ILLEGAL_ARGUMENT_ERROR;
-    setToBogus();
-    return;
-  }
-
-  // bmp?
-  int32_t headerSize = ((data[0]&0x8000)) ?2:1;
-  int32_t bmpLength = (headerSize==1)?data[0]:data[1];
-
+ 
+  if(U_FAILURE(ec)) { 
+    setToBogus(); 
+    return; 
+  } 
+ 
+  if( (serialization != kSerialized) 
+      || (data==NULL) 
+      || (dataLen < 1)) { 
+    ec = U_ILLEGAL_ARGUMENT_ERROR; 
+    setToBogus(); 
+    return; 
+  } 
+ 
+  // bmp? 
+  int32_t headerSize = ((data[0]&0x8000)) ?2:1; 
+  int32_t bmpLength = (headerSize==1)?data[0]:data[1]; 
+ 
   int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
-#ifdef DEBUG_SERIALIZE
+#ifdef DEBUG_SERIALIZE 
   printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
-#endif
+#endif 
   if(!ensureCapacity(newLength + 1)) {  // +1 for HIGH
-    return;
-  }
-  // copy bmp
-  int32_t i;
-  for(i = 0; i< bmpLength;i++) {
-    list[i] = data[i+headerSize];
-#ifdef DEBUG_SERIALIZE
-    printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
-#endif
-  }
-  // copy smp
+    return; 
+  } 
+  // copy bmp 
+  int32_t i; 
+  for(i = 0; i< bmpLength;i++) { 
+    list[i] = data[i+headerSize]; 
+#ifdef DEBUG_SERIALIZE 
+    printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]); 
+#endif 
+  } 
+  // copy smp 
   for(i=bmpLength;i<newLength;i++) {
-    list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
-              ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
-#ifdef DEBUG_SERIALIZE
-    printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
-#endif
-  }
+    list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) + 
+              ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]); 
+#ifdef DEBUG_SERIALIZE 
+    printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]); 
+#endif 
+  } 
   U_ASSERT(i == newLength);
   if (i == 0 || list[i - 1] != UNICODESET_HIGH) {
     list[i++] = UNICODESET_HIGH;
   }
   len = i;
-}
-
-
-int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
-    int32_t bmpLength, length, destLength;
-
-    if (U_FAILURE(ec)) {
-        return 0;
-    }
-
-    if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
-        ec=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    /* count necessary 16-bit units */
-    length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
-    // assert(length>=0);
-    if (length==0) {
-        /* empty set */
-        if (destCapacity>0) {
-            *dest=0;
-        } else {
-            ec=U_BUFFER_OVERFLOW_ERROR;
-        }
-        return 1;
-    }
-    /* now length>0 */
-
-    if (this->list[length-1]<=0xffff) {
-        /* all BMP */
-        bmpLength=length;
-    } else if (this->list[0]>=0x10000) {
-        /* all supplementary */
-        bmpLength=0;
-        length*=2;
-    } else {
-        /* some BMP, some supplementary */
-        for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
-        length=bmpLength+2*(length-bmpLength);
-    }
-#ifdef DEBUG_SERIALIZE
-    printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len);
-#endif
-    /* length: number of 16-bit array units */
-    if (length>0x7fff) {
-        /* there are only 15 bits for the length in the first serialized word */
-        ec=U_INDEX_OUTOFBOUNDS_ERROR;
-        return 0;
-    }
-
-    /*
-     * total serialized length:
-     * number of 16-bit array units (length) +
-     * 1 length unit (always) +
-     * 1 bmpLength unit (if there are supplementary values)
-     */
-    destLength=length+((length>bmpLength)?2:1);
-    if (destLength<=destCapacity) {
-        const UChar32 *p;
-        int32_t i;
-
-#ifdef DEBUG_SERIALIZE
-        printf("writeHdr\n");
-#endif
-        *dest=(uint16_t)length;
-        if (length>bmpLength) {
-            *dest|=0x8000;
-            *++dest=(uint16_t)bmpLength;
-        }
-        ++dest;
-
-        /* write the BMP part of the array */
-        p=this->list;
-        for (i=0; i<bmpLength; ++i) {
-#ifdef DEBUG_SERIALIZE
-          printf("writebmp: %x\n", (int)*p);
-#endif
-            *dest++=(uint16_t)*p++;
-        }
-
-        /* write the supplementary part of the array */
-        for (; i<length; i+=2) {
-#ifdef DEBUG_SERIALIZE
-          printf("write32: %x\n", (int)*p);
-#endif
-            *dest++=(uint16_t)(*p>>16);
-            *dest++=(uint16_t)*p++;
-        }
-    } else {
-        ec=U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destLength;
-}
-
-//----------------------------------------------------------------
-// Implementation: Utility methods
-//----------------------------------------------------------------
-
-/**
- * Allocate our strings vector and return TRUE if successful.
- */
-UBool UnicodeSet::allocateStrings(UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return FALSE;
-    }
-    strings = new UVector(uprv_deleteUObject,
-                          uhash_compareUnicodeString, 1, status);
-    if (strings == NULL) { // Check for memory allocation error.
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return FALSE;
-    }
-    if (U_FAILURE(status)) {
-        delete strings;
-        strings = NULL;
-        return FALSE;
+} 
+ 
+ 
+int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const { 
+    int32_t bmpLength, length, destLength; 
+ 
+    if (U_FAILURE(ec)) { 
+        return 0; 
     } 
-    return TRUE;
-}
-
+ 
+    if (destCapacity<0 || (destCapacity>0 && dest==NULL)) { 
+        ec=U_ILLEGAL_ARGUMENT_ERROR; 
+        return 0; 
+    } 
+ 
+    /* count necessary 16-bit units */ 
+    length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH 
+    // assert(length>=0); 
+    if (length==0) { 
+        /* empty set */ 
+        if (destCapacity>0) { 
+            *dest=0; 
+        } else { 
+            ec=U_BUFFER_OVERFLOW_ERROR; 
+        } 
+        return 1; 
+    } 
+    /* now length>0 */ 
+ 
+    if (this->list[length-1]<=0xffff) { 
+        /* all BMP */ 
+        bmpLength=length; 
+    } else if (this->list[0]>=0x10000) { 
+        /* all supplementary */ 
+        bmpLength=0; 
+        length*=2; 
+    } else { 
+        /* some BMP, some supplementary */ 
+        for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {} 
+        length=bmpLength+2*(length-bmpLength); 
+    } 
+#ifdef DEBUG_SERIALIZE 
+    printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len); 
+#endif 
+    /* length: number of 16-bit array units */ 
+    if (length>0x7fff) { 
+        /* there are only 15 bits for the length in the first serialized word */ 
+        ec=U_INDEX_OUTOFBOUNDS_ERROR; 
+        return 0; 
+    } 
+ 
+    /* 
+     * total serialized length: 
+     * number of 16-bit array units (length) + 
+     * 1 length unit (always) + 
+     * 1 bmpLength unit (if there are supplementary values) 
+     */ 
+    destLength=length+((length>bmpLength)?2:1); 
+    if (destLength<=destCapacity) { 
+        const UChar32 *p; 
+        int32_t i; 
+ 
+#ifdef DEBUG_SERIALIZE 
+        printf("writeHdr\n"); 
+#endif 
+        *dest=(uint16_t)length; 
+        if (length>bmpLength) { 
+            *dest|=0x8000; 
+            *++dest=(uint16_t)bmpLength; 
+        } 
+        ++dest; 
+ 
+        /* write the BMP part of the array */ 
+        p=this->list; 
+        for (i=0; i<bmpLength; ++i) { 
+#ifdef DEBUG_SERIALIZE 
+          printf("writebmp: %x\n", (int)*p); 
+#endif 
+            *dest++=(uint16_t)*p++; 
+        } 
+ 
+        /* write the supplementary part of the array */ 
+        for (; i<length; i+=2) { 
+#ifdef DEBUG_SERIALIZE 
+          printf("write32: %x\n", (int)*p); 
+#endif 
+            *dest++=(uint16_t)(*p>>16); 
+            *dest++=(uint16_t)*p++; 
+        } 
+    } else { 
+        ec=U_BUFFER_OVERFLOW_ERROR; 
+    } 
+    return destLength; 
+} 
+ 
+//---------------------------------------------------------------- 
+// Implementation: Utility methods 
+//---------------------------------------------------------------- 
+ 
+/** 
+ * Allocate our strings vector and return TRUE if successful. 
+ */ 
+UBool UnicodeSet::allocateStrings(UErrorCode &status) { 
+    if (U_FAILURE(status)) { 
+        return FALSE; 
+    } 
+    strings = new UVector(uprv_deleteUObject, 
+                          uhash_compareUnicodeString, 1, status); 
+    if (strings == NULL) { // Check for memory allocation error. 
+        status = U_MEMORY_ALLOCATION_ERROR; 
+        return FALSE; 
+    } 
+    if (U_FAILURE(status)) { 
+        delete strings; 
+        strings = NULL; 
+        return FALSE; 
+    }  
+    return TRUE; 
+} 
+ 
 int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
     // Grow exponentially to reduce the frequency of allocations.
     if (minCapacity < INITIAL_CAPACITY) {
@@ -1658,20 +1658,20 @@ bool UnicodeSet::ensureCapacity(int32_t newLen) {
     }
     int32_t newCapacity = nextCapacity(newLen);
     UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
-    if (temp == NULL) {
+    if (temp == NULL) { 
         setToBogus(); // set the object to bogus state if an OOM failure occurred.
         return false;
-    }
+    } 
     // Copy only the actual contents.
     uprv_memcpy(temp, list, len * sizeof(UChar32));
     if (list != stackList) {
         uprv_free(list);
     }
-    list = temp;
+    list = temp; 
     capacity = newCapacity;
     return true;
-}
-
+} 
+ 
 bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
     if (newLen > MAX_LENGTH) {
         newLen = MAX_LENGTH;
@@ -1681,459 +1681,459 @@ bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
     }
     int32_t newCapacity = nextCapacity(newLen);
     UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
-    if (temp == NULL) {
-        setToBogus();
+    if (temp == NULL) { 
+        setToBogus(); 
         return false;
-    }
+    } 
     // The buffer has no contents to be copied.
     // It is always filled from scratch after this call.
     if (buffer != stackList) {
         uprv_free(buffer);
     }
-    buffer = temp;
+    buffer = temp; 
     bufferCapacity = newCapacity;
     return true;
-}
-
-/**
- * Swap list and buffer.
- */
-void UnicodeSet::swapBuffers(void) {
-    // swap list and buffer
-    UChar32* temp = list;
-    list = buffer;
-    buffer = temp;
-
-    int32_t c = capacity;
-    capacity = bufferCapacity;
-    bufferCapacity = c;
-}
-
-void UnicodeSet::setToBogus() {
-    clear(); // Remove everything in the set.
-    fFlags = kIsBogus;
-}
-
-//----------------------------------------------------------------
-// Implementation: Fundamental operators
-//----------------------------------------------------------------
-
-static inline UChar32 max(UChar32 a, UChar32 b) {
-    return (a > b) ? a : b;
-}
-
-// polarity = 0, 3 is normal: x xor y
-// polarity = 1, 2: x xor ~y == x === y
-
-void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
-    if (isFrozen() || isBogus()) {
-        return;
-    }
+} 
+ 
+/** 
+ * Swap list and buffer. 
+ */ 
+void UnicodeSet::swapBuffers(void) { 
+    // swap list and buffer 
+    UChar32* temp = list; 
+    list = buffer; 
+    buffer = temp; 
+ 
+    int32_t c = capacity; 
+    capacity = bufferCapacity; 
+    bufferCapacity = c; 
+} 
+ 
+void UnicodeSet::setToBogus() { 
+    clear(); // Remove everything in the set. 
+    fFlags = kIsBogus; 
+} 
+ 
+//---------------------------------------------------------------- 
+// Implementation: Fundamental operators 
+//---------------------------------------------------------------- 
+ 
+static inline UChar32 max(UChar32 a, UChar32 b) { 
+    return (a > b) ? a : b; 
+} 
+ 
+// polarity = 0, 3 is normal: x xor y 
+// polarity = 1, 2: x xor ~y == x === y 
+ 
+void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { 
+    if (isFrozen() || isBogus()) { 
+        return; 
+    } 
     if (!ensureBufferCapacity(len + otherLen)) {
-        return;
-    }
-
-    int32_t i = 0, j = 0, k = 0;
-    UChar32 a = list[i++];
-    UChar32 b;
-    if (polarity == 1 || polarity == 2) {
-        b = UNICODESET_LOW;
-        if (other[j] == UNICODESET_LOW) { // skip base if already LOW
-            ++j;
-            b = other[j];
-        }
-    } else {
-        b = other[j++];
-    }
-    // simplest of all the routines
-    // sort the values, discarding identicals!
-    for (;;) {
-        if (a < b) {
-            buffer[k++] = a;
-            a = list[i++];
-        } else if (b < a) {
-            buffer[k++] = b;
-            b = other[j++];
-        } else if (a != UNICODESET_HIGH) { // at this point, a == b
-            // discard both values!
-            a = list[i++];
-            b = other[j++];
-        } else { // DONE!
-            buffer[k++] = UNICODESET_HIGH;
-            len = k;
-            break;
-        }
-    }
-    swapBuffers();
-    releasePattern();
-}
-
-// polarity = 0 is normal: x union y
-// polarity = 2: x union ~y
-// polarity = 1: ~x union y
-// polarity = 3: ~x union ~y
-
-void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
-    if (isFrozen() || isBogus() || other==NULL) {
-        return;
-    }
+        return; 
+    } 
+ 
+    int32_t i = 0, j = 0, k = 0; 
+    UChar32 a = list[i++]; 
+    UChar32 b; 
+    if (polarity == 1 || polarity == 2) { 
+        b = UNICODESET_LOW; 
+        if (other[j] == UNICODESET_LOW) { // skip base if already LOW 
+            ++j; 
+            b = other[j]; 
+        } 
+    } else { 
+        b = other[j++]; 
+    } 
+    // simplest of all the routines 
+    // sort the values, discarding identicals! 
+    for (;;) { 
+        if (a < b) { 
+            buffer[k++] = a; 
+            a = list[i++]; 
+        } else if (b < a) { 
+            buffer[k++] = b; 
+            b = other[j++]; 
+        } else if (a != UNICODESET_HIGH) { // at this point, a == b 
+            // discard both values! 
+            a = list[i++]; 
+            b = other[j++]; 
+        } else { // DONE! 
+            buffer[k++] = UNICODESET_HIGH; 
+            len = k; 
+            break; 
+        } 
+    } 
+    swapBuffers(); 
+    releasePattern(); 
+} 
+ 
+// polarity = 0 is normal: x union y 
+// polarity = 2: x union ~y 
+// polarity = 1: ~x union y 
+// polarity = 3: ~x union ~y 
+ 
+void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { 
+    if (isFrozen() || isBogus() || other==NULL) { 
+        return; 
+    } 
     if (!ensureBufferCapacity(len + otherLen)) {
-        return;
-    }
-
-    int32_t i = 0, j = 0, k = 0;
-    UChar32 a = list[i++];
-    UChar32 b = other[j++];
-    // change from xor is that we have to check overlapping pairs
-    // polarity bit 1 means a is second, bit 2 means b is.
-    for (;;) {
-        switch (polarity) {
-          case 0: // both first; take lower if unequal
-            if (a < b) { // take a
-                // Back up over overlapping ranges in buffer[]
-                if (k > 0 && a <= buffer[k-1]) {
-                    // Pick latter end value in buffer[] vs. list[]
-                    a = max(list[i], buffer[--k]);
-                } else {
-                    // No overlap
-                    buffer[k++] = a;
-                    a = list[i];
-                }
-                i++; // Common if/else code factored out
-                polarity ^= 1;
-            } else if (b < a) { // take b
-                if (k > 0 && b <= buffer[k-1]) {
-                    b = max(other[j], buffer[--k]);
-                } else {
-                    buffer[k++] = b;
-                    b = other[j];
-                }
-                j++;
-                polarity ^= 2;
-            } else { // a == b, take a, drop b
-                if (a == UNICODESET_HIGH) goto loop_end;
-                // This is symmetrical; it doesn't matter if
-                // we backtrack with a or b. - liu
-                if (k > 0 && a <= buffer[k-1]) {
-                    a = max(list[i], buffer[--k]);
-                } else {
-                    // No overlap
-                    buffer[k++] = a;
-                    a = list[i];
-                }
-                i++;
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-          case 3: // both second; take higher if unequal, and drop other
-            if (b <= a) { // take a
-                if (a == UNICODESET_HIGH) goto loop_end;
-                buffer[k++] = a;
-            } else { // take b
-                if (b == UNICODESET_HIGH) goto loop_end;
-                buffer[k++] = b;
-            }
-            a = list[i++];
-            polarity ^= 1;   // factored common code
-            b = other[j++];
-            polarity ^= 2;
-            break;
-          case 1: // a second, b first; if b < a, overlap
-            if (a < b) { // no overlap, take a
-                buffer[k++] = a; a = list[i++]; polarity ^= 1;
-            } else if (b < a) { // OVERLAP, drop b
-                b = other[j++];
-                polarity ^= 2;
-            } else { // a == b, drop both!
-                if (a == UNICODESET_HIGH) goto loop_end;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-          case 2: // a first, b second; if a < b, overlap
-            if (b < a) { // no overlap, take b
-                buffer[k++] = b;
-                b = other[j++];
-                polarity ^= 2;
-            } else  if (a < b) { // OVERLAP, drop a
-                a = list[i++];
-                polarity ^= 1;
-            } else { // a == b, drop both!
-                if (a == UNICODESET_HIGH) goto loop_end;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-        }
-    }
- loop_end:
-    buffer[k++] = UNICODESET_HIGH;    // terminate
-    len = k;
-    swapBuffers();
-    releasePattern();
-}
-
-// polarity = 0 is normal: x intersect y
-// polarity = 2: x intersect ~y == set-minus
-// polarity = 1: ~x intersect y
-// polarity = 3: ~x intersect ~y
-
-void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
-    if (isFrozen() || isBogus()) {
-        return;
-    }
+        return; 
+    } 
+ 
+    int32_t i = 0, j = 0, k = 0; 
+    UChar32 a = list[i++]; 
+    UChar32 b = other[j++]; 
+    // change from xor is that we have to check overlapping pairs 
+    // polarity bit 1 means a is second, bit 2 means b is. 
+    for (;;) { 
+        switch (polarity) { 
+          case 0: // both first; take lower if unequal 
+            if (a < b) { // take a 
+                // Back up over overlapping ranges in buffer[] 
+                if (k > 0 && a <= buffer[k-1]) { 
+                    // Pick latter end value in buffer[] vs. list[] 
+                    a = max(list[i], buffer[--k]); 
+                } else { 
+                    // No overlap 
+                    buffer[k++] = a; 
+                    a = list[i]; 
+                } 
+                i++; // Common if/else code factored out 
+                polarity ^= 1; 
+            } else if (b < a) { // take b 
+                if (k > 0 && b <= buffer[k-1]) { 
+                    b = max(other[j], buffer[--k]); 
+                } else { 
+                    buffer[k++] = b; 
+                    b = other[j]; 
+                } 
+                j++; 
+                polarity ^= 2; 
+            } else { // a == b, take a, drop b 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                // This is symmetrical; it doesn't matter if 
+                // we backtrack with a or b. - liu 
+                if (k > 0 && a <= buffer[k-1]) { 
+                    a = max(list[i], buffer[--k]); 
+                } else { 
+                    // No overlap 
+                    buffer[k++] = a; 
+                    a = list[i]; 
+                } 
+                i++; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+          case 3: // both second; take higher if unequal, and drop other 
+            if (b <= a) { // take a 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                buffer[k++] = a; 
+            } else { // take b 
+                if (b == UNICODESET_HIGH) goto loop_end; 
+                buffer[k++] = b; 
+            } 
+            a = list[i++]; 
+            polarity ^= 1;   // factored common code 
+            b = other[j++]; 
+            polarity ^= 2; 
+            break; 
+          case 1: // a second, b first; if b < a, overlap 
+            if (a < b) { // no overlap, take a 
+                buffer[k++] = a; a = list[i++]; polarity ^= 1; 
+            } else if (b < a) { // OVERLAP, drop b 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else { // a == b, drop both! 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+          case 2: // a first, b second; if a < b, overlap 
+            if (b < a) { // no overlap, take b 
+                buffer[k++] = b; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else  if (a < b) { // OVERLAP, drop a 
+                a = list[i++]; 
+                polarity ^= 1; 
+            } else { // a == b, drop both! 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+        } 
+    } 
+ loop_end: 
+    buffer[k++] = UNICODESET_HIGH;    // terminate 
+    len = k; 
+    swapBuffers(); 
+    releasePattern(); 
+} 
+ 
+// polarity = 0 is normal: x intersect y 
+// polarity = 2: x intersect ~y == set-minus 
+// polarity = 1: ~x intersect y 
+// polarity = 3: ~x intersect ~y 
+ 
+void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { 
+    if (isFrozen() || isBogus()) { 
+        return; 
+    } 
     if (!ensureBufferCapacity(len + otherLen)) {
-        return;
-    }
-
-    int32_t i = 0, j = 0, k = 0;
-    UChar32 a = list[i++];
-    UChar32 b = other[j++];
-    // change from xor is that we have to check overlapping pairs
-    // polarity bit 1 means a is second, bit 2 means b is.
-    for (;;) {
-        switch (polarity) {
-          case 0: // both first; drop the smaller
-            if (a < b) { // drop a
-                a = list[i++];
-                polarity ^= 1;
-            } else if (b < a) { // drop b
-                b = other[j++];
-                polarity ^= 2;
-            } else { // a == b, take one, drop other
-                if (a == UNICODESET_HIGH) goto loop_end;
-                buffer[k++] = a;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-          case 3: // both second; take lower if unequal
-            if (a < b) { // take a
-                buffer[k++] = a;
-                a = list[i++];
-                polarity ^= 1;
-            } else if (b < a) { // take b
-                buffer[k++] = b;
-                b = other[j++];
-                polarity ^= 2;
-            } else { // a == b, take one, drop other
-                if (a == UNICODESET_HIGH) goto loop_end;
-                buffer[k++] = a;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-          case 1: // a second, b first;
-            if (a < b) { // NO OVERLAP, drop a
-                a = list[i++];
-                polarity ^= 1;
-            } else if (b < a) { // OVERLAP, take b
-                buffer[k++] = b;
-                b = other[j++];
-                polarity ^= 2;
-            } else { // a == b, drop both!
-                if (a == UNICODESET_HIGH) goto loop_end;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-          case 2: // a first, b second; if a < b, overlap
-            if (b < a) { // no overlap, drop b
-                b = other[j++];
-                polarity ^= 2;
-            } else  if (a < b) { // OVERLAP, take a
-                buffer[k++] = a;
-                a = list[i++];
-                polarity ^= 1;
-            } else { // a == b, drop both!
-                if (a == UNICODESET_HIGH) goto loop_end;
-                a = list[i++];
-                polarity ^= 1;
-                b = other[j++];
-                polarity ^= 2;
-            }
-            break;
-        }
-    }
- loop_end:
-    buffer[k++] = UNICODESET_HIGH;    // terminate
-    len = k;
-    swapBuffers();
-    releasePattern();
-}
-
-/**
- * Append the <code>toPattern()</code> representation of a
- * string to the given <code>StringBuffer</code>.
- */
-void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
-escapeUnprintable) {
-    UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
-        _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
-    }
-}
-
-/**
- * Append the <code>toPattern()</code> representation of a
- * character to the given <code>StringBuffer</code>.
- */
-void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
-escapeUnprintable) {
-    if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
-        // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
-        // unprintable
-        if (ICU_Utility::escapeUnprintable(buf, c)) {
-            return;
-        }
-    }
-    // Okay to let ':' pass through
-    switch (c) {
-    case SET_OPEN:
-    case SET_CLOSE:
-    case HYPHEN:
-    case COMPLEMENT:
-    case INTERSECTION:
-    case BACKSLASH:
-    case OPEN_BRACE:
-    case CLOSE_BRACE:
-    case COLON:
-    case SymbolTable::SYMBOL_REF:
-        buf.append(BACKSLASH);
-        break;
-    default:
-        // Escape whitespace
-        if (PatternProps::isWhiteSpace(c)) {
-            buf.append(BACKSLASH);
-        }
-        break;
-    }
-    buf.append(c);
-}
-
-/**
- * Append a string representation of this set to result.  This will be
- * a cleaned version of the string passed to applyPattern(), if there
- * is one.  Otherwise it will be generated.
- */
-UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
-                                      UBool escapeUnprintable) const
-{
-    if (pat != NULL) {
-        int32_t i;
-        int32_t backslashCount = 0;
-        for (i=0; i<patLen; ) {
-            UChar32 c;
-            U16_NEXT(pat, i, patLen, c);
-            if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
-                // If the unprintable character is preceded by an odd
-                // number of backslashes, then it has been escaped.
-                // Before unescaping it, we delete the final
-                // backslash.
-                if ((backslashCount % 2) == 1) {
-                    result.truncate(result.length() - 1);
-                }
-                ICU_Utility::escapeUnprintable(result, c);
-                backslashCount = 0;
-            } else {
-                result.append(c);
-                if (c == BACKSLASH) {
-                    ++backslashCount;
-                } else {
-                    backslashCount = 0;
-                }
-            }
-        }
-        return result;
-    }
-
-    return _generatePattern(result, escapeUnprintable);
-}
-
-/**
- * Returns a string representation of this set.  If the result of
- * calling this function is passed to a UnicodeSet constructor, it
- * will produce another set that is equal to this one.
- */
-UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
-                                     UBool escapeUnprintable) const
-{
-    result.truncate(0);
-    return _toPattern(result, escapeUnprintable);
-}
-
-/**
- * Generate and append a string representation of this set to result.
- * This does not use this.pat, the cleaned up copy of the string
- * passed to applyPattern().
- */
-UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
-                                            UBool escapeUnprintable) const
-{
-    result.append(SET_OPEN);
-
-//  // Check against the predefined categories.  We implicitly build
-//  // up ALL category sets the first time toPattern() is called.
-//  for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
-//      if (*this == getCategorySet(cat)) {
-//          result.append(COLON);
-//          result.append(CATEGORY_NAMES, cat*2, 2);
-//          return result.append(CATEGORY_CLOSE);
-//      }
-//  }
-
-    int32_t count = getRangeCount();
-
-    // If the set contains at least 2 intervals and includes both
-    // MIN_VALUE and MAX_VALUE, then the inverse representation will
-    // be more economical.
-    if (count > 1 &&
-        getRangeStart(0) == MIN_VALUE &&
-        getRangeEnd(count-1) == MAX_VALUE) {
-
-        // Emit the inverse
-        result.append(COMPLEMENT);
-
-        for (int32_t i = 1; i < count; ++i) {
-            UChar32 start = getRangeEnd(i-1)+1;
-            UChar32 end = getRangeStart(i)-1;
-            _appendToPat(result, start, escapeUnprintable);
-            if (start != end) {
-                if ((start+1) != end) {
-                    result.append(HYPHEN);
-                }
-                _appendToPat(result, end, escapeUnprintable);
-            }
-        }
-    }
-
-    // Default; emit the ranges as pairs
-    else {
-        for (int32_t i = 0; i < count; ++i) {
-            UChar32 start = getRangeStart(i);
-            UChar32 end = getRangeEnd(i);
-            _appendToPat(result, start, escapeUnprintable);
-            if (start != end) {
-                if ((start+1) != end) {
-                    result.append(HYPHEN);
-                }
-                _appendToPat(result, end, escapeUnprintable);
-            }
-        }
-    }
-
+        return; 
+    } 
+ 
+    int32_t i = 0, j = 0, k = 0; 
+    UChar32 a = list[i++]; 
+    UChar32 b = other[j++]; 
+    // change from xor is that we have to check overlapping pairs 
+    // polarity bit 1 means a is second, bit 2 means b is. 
+    for (;;) { 
+        switch (polarity) { 
+          case 0: // both first; drop the smaller 
+            if (a < b) { // drop a 
+                a = list[i++]; 
+                polarity ^= 1; 
+            } else if (b < a) { // drop b 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else { // a == b, take one, drop other 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                buffer[k++] = a; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+          case 3: // both second; take lower if unequal 
+            if (a < b) { // take a 
+                buffer[k++] = a; 
+                a = list[i++]; 
+                polarity ^= 1; 
+            } else if (b < a) { // take b 
+                buffer[k++] = b; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else { // a == b, take one, drop other 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                buffer[k++] = a; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+          case 1: // a second, b first; 
+            if (a < b) { // NO OVERLAP, drop a 
+                a = list[i++]; 
+                polarity ^= 1; 
+            } else if (b < a) { // OVERLAP, take b 
+                buffer[k++] = b; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else { // a == b, drop both! 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+          case 2: // a first, b second; if a < b, overlap 
+            if (b < a) { // no overlap, drop b 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } else  if (a < b) { // OVERLAP, take a 
+                buffer[k++] = a; 
+                a = list[i++]; 
+                polarity ^= 1; 
+            } else { // a == b, drop both! 
+                if (a == UNICODESET_HIGH) goto loop_end; 
+                a = list[i++]; 
+                polarity ^= 1; 
+                b = other[j++]; 
+                polarity ^= 2; 
+            } 
+            break; 
+        } 
+    } 
+ loop_end: 
+    buffer[k++] = UNICODESET_HIGH;    // terminate 
+    len = k; 
+    swapBuffers(); 
+    releasePattern(); 
+} 
+ 
+/** 
+ * Append the <code>toPattern()</code> representation of a 
+ * string to the given <code>StringBuffer</code>. 
+ */ 
+void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool 
+escapeUnprintable) { 
+    UChar32 cp; 
+    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { 
+        _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); 
+    } 
+} 
+ 
+/** 
+ * Append the <code>toPattern()</code> representation of a 
+ * character to the given <code>StringBuffer</code>. 
+ */ 
+void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool 
+escapeUnprintable) { 
+    if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 
+        // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything 
+        // unprintable 
+        if (ICU_Utility::escapeUnprintable(buf, c)) { 
+            return; 
+        } 
+    } 
+    // Okay to let ':' pass through 
+    switch (c) { 
+    case SET_OPEN: 
+    case SET_CLOSE: 
+    case HYPHEN: 
+    case COMPLEMENT: 
+    case INTERSECTION: 
+    case BACKSLASH: 
+    case OPEN_BRACE: 
+    case CLOSE_BRACE: 
+    case COLON: 
+    case SymbolTable::SYMBOL_REF: 
+        buf.append(BACKSLASH); 
+        break; 
+    default: 
+        // Escape whitespace 
+        if (PatternProps::isWhiteSpace(c)) { 
+            buf.append(BACKSLASH); 
+        } 
+        break; 
+    } 
+    buf.append(c); 
+} 
+ 
+/** 
+ * Append a string representation of this set to result.  This will be 
+ * a cleaned version of the string passed to applyPattern(), if there 
+ * is one.  Otherwise it will be generated. 
+ */ 
+UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, 
+                                      UBool escapeUnprintable) const 
+{ 
+    if (pat != NULL) { 
+        int32_t i; 
+        int32_t backslashCount = 0; 
+        for (i=0; i<patLen; ) { 
+            UChar32 c; 
+            U16_NEXT(pat, i, patLen, c); 
+            if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 
+                // If the unprintable character is preceded by an odd 
+                // number of backslashes, then it has been escaped. 
+                // Before unescaping it, we delete the final 
+                // backslash. 
+                if ((backslashCount % 2) == 1) { 
+                    result.truncate(result.length() - 1); 
+                } 
+                ICU_Utility::escapeUnprintable(result, c); 
+                backslashCount = 0; 
+            } else { 
+                result.append(c); 
+                if (c == BACKSLASH) { 
+                    ++backslashCount; 
+                } else { 
+                    backslashCount = 0; 
+                } 
+            } 
+        } 
+        return result; 
+    } 
+ 
+    return _generatePattern(result, escapeUnprintable); 
+} 
+ 
+/** 
+ * Returns a string representation of this set.  If the result of 
+ * calling this function is passed to a UnicodeSet constructor, it 
+ * will produce another set that is equal to this one. 
+ */ 
+UnicodeString& UnicodeSet::toPattern(UnicodeString& result, 
+                                     UBool escapeUnprintable) const 
+{ 
+    result.truncate(0); 
+    return _toPattern(result, escapeUnprintable); 
+} 
+ 
+/** 
+ * Generate and append a string representation of this set to result. 
+ * This does not use this.pat, the cleaned up copy of the string 
+ * passed to applyPattern(). 
+ */ 
+UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result, 
+                                            UBool escapeUnprintable) const 
+{ 
+    result.append(SET_OPEN); 
+ 
+//  // Check against the predefined categories.  We implicitly build 
+//  // up ALL category sets the first time toPattern() is called. 
+//  for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) { 
+//      if (*this == getCategorySet(cat)) { 
+//          result.append(COLON); 
+//          result.append(CATEGORY_NAMES, cat*2, 2); 
+//          return result.append(CATEGORY_CLOSE); 
+//      } 
+//  } 
+ 
+    int32_t count = getRangeCount(); 
+ 
+    // If the set contains at least 2 intervals and includes both 
+    // MIN_VALUE and MAX_VALUE, then the inverse representation will 
+    // be more economical. 
+    if (count > 1 && 
+        getRangeStart(0) == MIN_VALUE && 
+        getRangeEnd(count-1) == MAX_VALUE) { 
+ 
+        // Emit the inverse 
+        result.append(COMPLEMENT); 
+ 
+        for (int32_t i = 1; i < count; ++i) { 
+            UChar32 start = getRangeEnd(i-1)+1; 
+            UChar32 end = getRangeStart(i)-1; 
+            _appendToPat(result, start, escapeUnprintable); 
+            if (start != end) { 
+                if ((start+1) != end) { 
+                    result.append(HYPHEN); 
+                } 
+                _appendToPat(result, end, escapeUnprintable); 
+            } 
+        } 
+    } 
+ 
+    // Default; emit the ranges as pairs 
+    else { 
+        for (int32_t i = 0; i < count; ++i) { 
+            UChar32 start = getRangeStart(i); 
+            UChar32 end = getRangeEnd(i); 
+            _appendToPat(result, start, escapeUnprintable); 
+            if (start != end) { 
+                if ((start+1) != end) { 
+                    result.append(HYPHEN); 
+                } 
+                _appendToPat(result, end, escapeUnprintable); 
+            } 
+        } 
+    } 
+ 
     if (strings != nullptr) {
         for (int32_t i = 0; i<strings->size(); ++i) {
             result.append(OPEN_BRACE);
@@ -2142,215 +2142,215 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
                          escapeUnprintable);
             result.append(CLOSE_BRACE);
         }
-    }
-    return result.append(SET_CLOSE);
-}
-
-/**
-* Release existing cached pattern
-*/
-void UnicodeSet::releasePattern() {
-    if (pat) {
-        uprv_free(pat);
-        pat = NULL;
-        patLen = 0;
-    }
-}
-
-/**
-* Set the new pattern to cache.
-*/
+    } 
+    return result.append(SET_CLOSE); 
+} 
+ 
+/** 
+* Release existing cached pattern 
+*/ 
+void UnicodeSet::releasePattern() { 
+    if (pat) { 
+        uprv_free(pat); 
+        pat = NULL; 
+        patLen = 0; 
+    } 
+} 
+ 
+/** 
+* Set the new pattern to cache. 
+*/ 
 void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
-    releasePattern();
-    pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
-    if (pat) {
-        patLen = newPatLen;
+    releasePattern(); 
+    pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); 
+    if (pat) { 
+        patLen = newPatLen; 
         u_memcpy(pat, newPat, patLen);
-        pat[patLen] = 0;
-    }
-    // else we don't care if malloc failed. This was just a nice cache.
-    // We can regenerate an equivalent pattern later when requested.
-}
-
+        pat[patLen] = 0; 
+    } 
+    // else we don't care if malloc failed. This was just a nice cache. 
+    // We can regenerate an equivalent pattern later when requested. 
+} 
+ 
 UnicodeSet *UnicodeSet::freeze() {
-    if(!isFrozen() && !isBogus()) {
+    if(!isFrozen() && !isBogus()) { 
         compact();
-
+ 
         // Optimize contains() and span() and similar functions.
         if (hasStrings()) {
             stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
             if (stringSpan == nullptr) {
-                setToBogus();
-                return this;
+                setToBogus(); 
+                return this; 
             } else if (!stringSpan->needsStringSpanUTF16()) {
-                // All strings are irrelevant for span() etc. because
-                // all of each string's code points are contained in this set.
-                // Do not check needsStringSpanUTF8() because UTF-8 has at most as
-                // many relevant strings as UTF-16.
-                // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
-                delete stringSpan;
-                stringSpan = NULL;
-            }
-        }
-        if (stringSpan == NULL) {
-            // No span-relevant strings: Optimize for code point spans.
-            bmpSet=new BMPSet(list, len);
-            if (bmpSet == NULL) { // Check for memory allocation error.
-                setToBogus();
-            }
-        }
-    }
-    return this;
-}
-
-int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
-    if(length>0 && bmpSet!=NULL) {
-        return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
-    }
-    if(length<0) {
-        length=u_strlen(s);
-    }
-    if(length==0) {
-        return 0;
-    }
-    if(stringSpan!=NULL) {
-        return stringSpan->span(s, length, spanCondition);
+                // All strings are irrelevant for span() etc. because 
+                // all of each string's code points are contained in this set. 
+                // Do not check needsStringSpanUTF8() because UTF-8 has at most as 
+                // many relevant strings as UTF-16. 
+                // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) 
+                delete stringSpan; 
+                stringSpan = NULL; 
+            } 
+        } 
+        if (stringSpan == NULL) { 
+            // No span-relevant strings: Optimize for code point spans. 
+            bmpSet=new BMPSet(list, len); 
+            if (bmpSet == NULL) { // Check for memory allocation error. 
+                setToBogus(); 
+            } 
+        } 
+    } 
+    return this; 
+} 
+ 
+int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 
+    if(length>0 && bmpSet!=NULL) { 
+        return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); 
+    } 
+    if(length<0) { 
+        length=u_strlen(s); 
+    } 
+    if(length==0) { 
+        return 0; 
+    } 
+    if(stringSpan!=NULL) { 
+        return stringSpan->span(s, length, spanCondition); 
     } else if(hasStrings()) {
-        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
-                            UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
-                            UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
-        UnicodeSetStringSpan strSpan(*this, *strings, which);
-        if(strSpan.needsStringSpanUTF16()) {
-            return strSpan.span(s, length, spanCondition);
-        }
-    }
-
-    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
-        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
-    }
-
-    UChar32 c;
-    int32_t start=0, prev=0;
-    do {
-        U16_NEXT(s, start, length, c);
-        if(spanCondition!=contains(c)) {
-            break;
-        }
-    } while((prev=start)<length);
-    return prev;
-}
-
-int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
-    if(length>0 && bmpSet!=NULL) {
-        return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
-    }
-    if(length<0) {
-        length=u_strlen(s);
-    }
-    if(length==0) {
-        return 0;
-    }
-    if(stringSpan!=NULL) {
-        return stringSpan->spanBack(s, length, spanCondition);
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 
+                            UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : 
+                            UnicodeSetStringSpan::FWD_UTF16_CONTAINED; 
+        UnicodeSetStringSpan strSpan(*this, *strings, which); 
+        if(strSpan.needsStringSpanUTF16()) { 
+            return strSpan.span(s, length, spanCondition); 
+        } 
+    } 
+ 
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values. 
+    } 
+ 
+    UChar32 c; 
+    int32_t start=0, prev=0; 
+    do { 
+        U16_NEXT(s, start, length, c); 
+        if(spanCondition!=contains(c)) { 
+            break; 
+        } 
+    } while((prev=start)<length); 
+    return prev; 
+} 
+ 
+int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 
+    if(length>0 && bmpSet!=NULL) { 
+        return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); 
+    } 
+    if(length<0) { 
+        length=u_strlen(s); 
+    } 
+    if(length==0) { 
+        return 0; 
+    } 
+    if(stringSpan!=NULL) { 
+        return stringSpan->spanBack(s, length, spanCondition); 
     } else if(hasStrings()) {
-        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
-                            UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
-                            UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
-        UnicodeSetStringSpan strSpan(*this, *strings, which);
-        if(strSpan.needsStringSpanUTF16()) {
-            return strSpan.spanBack(s, length, spanCondition);
-        }
-    }
-
-    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
-        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
-    }
-
-    UChar32 c;
-    int32_t prev=length;
-    do {
-        U16_PREV(s, 0, length, c);
-        if(spanCondition!=contains(c)) {
-            break;
-        }
-    } while((prev=length)>0);
-    return prev;
-}
-
-int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
-    if(length>0 && bmpSet!=NULL) {
-        const uint8_t *s0=(const uint8_t *)s;
-        return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
-    }
-    if(length<0) {
-        length=(int32_t)uprv_strlen(s);
-    }
-    if(length==0) {
-        return 0;
-    }
-    if(stringSpan!=NULL) {
-        return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 
+                            UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : 
+                            UnicodeSetStringSpan::BACK_UTF16_CONTAINED; 
+        UnicodeSetStringSpan strSpan(*this, *strings, which); 
+        if(strSpan.needsStringSpanUTF16()) { 
+            return strSpan.spanBack(s, length, spanCondition); 
+        } 
+    } 
+ 
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values. 
+    } 
+ 
+    UChar32 c; 
+    int32_t prev=length; 
+    do { 
+        U16_PREV(s, 0, length, c); 
+        if(spanCondition!=contains(c)) { 
+            break; 
+        } 
+    } while((prev=length)>0); 
+    return prev; 
+} 
+ 
+int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 
+    if(length>0 && bmpSet!=NULL) { 
+        const uint8_t *s0=(const uint8_t *)s; 
+        return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); 
+    } 
+    if(length<0) { 
+        length=(int32_t)uprv_strlen(s); 
+    } 
+    if(length==0) { 
+        return 0; 
+    } 
+    if(stringSpan!=NULL) { 
+        return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); 
     } else if(hasStrings()) {
-        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
-                            UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
-                            UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
-        UnicodeSetStringSpan strSpan(*this, *strings, which);
-        if(strSpan.needsStringSpanUTF8()) {
-            return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
-        }
-    }
-
-    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
-        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
-    }
-
-    UChar32 c;
-    int32_t start=0, prev=0;
-    do {
-        U8_NEXT_OR_FFFD(s, start, length, c);
-        if(spanCondition!=contains(c)) {
-            break;
-        }
-    } while((prev=start)<length);
-    return prev;
-}
-
-int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
-    if(length>0 && bmpSet!=NULL) {
-        const uint8_t *s0=(const uint8_t *)s;
-        return bmpSet->spanBackUTF8(s0, length, spanCondition);
-    }
-    if(length<0) {
-        length=(int32_t)uprv_strlen(s);
-    }
-    if(length==0) {
-        return 0;
-    }
-    if(stringSpan!=NULL) {
-        return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 
+                            UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : 
+                            UnicodeSetStringSpan::FWD_UTF8_CONTAINED; 
+        UnicodeSetStringSpan strSpan(*this, *strings, which); 
+        if(strSpan.needsStringSpanUTF8()) { 
+            return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); 
+        } 
+    } 
+ 
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values. 
+    } 
+ 
+    UChar32 c; 
+    int32_t start=0, prev=0; 
+    do { 
+        U8_NEXT_OR_FFFD(s, start, length, c); 
+        if(spanCondition!=contains(c)) { 
+            break; 
+        } 
+    } while((prev=start)<length); 
+    return prev; 
+} 
+ 
+int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 
+    if(length>0 && bmpSet!=NULL) { 
+        const uint8_t *s0=(const uint8_t *)s; 
+        return bmpSet->spanBackUTF8(s0, length, spanCondition); 
+    } 
+    if(length<0) { 
+        length=(int32_t)uprv_strlen(s); 
+    } 
+    if(length==0) { 
+        return 0; 
+    } 
+    if(stringSpan!=NULL) { 
+        return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); 
     } else if(hasStrings()) {
-        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
-                            UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
-                            UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
-        UnicodeSetStringSpan strSpan(*this, *strings, which);
-        if(strSpan.needsStringSpanUTF8()) {
-            return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
-        }
-    }
-
-    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
-        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
-    }
-
-    UChar32 c;
-    int32_t prev=length;
-    do {
-        U8_PREV_OR_FFFD(s, 0, length, c);
-        if(spanCondition!=contains(c)) {
-            break;
-        }
-    } while((prev=length)>0);
-    return prev;
-}
-
-U_NAMESPACE_END
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 
+                            UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : 
+                            UnicodeSetStringSpan::BACK_UTF8_CONTAINED; 
+        UnicodeSetStringSpan strSpan(*this, *strings, which); 
+        if(strSpan.needsStringSpanUTF8()) { 
+            return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); 
+        } 
+    } 
+ 
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values. 
+    } 
+ 
+    UChar32 c; 
+    int32_t prev=length; 
+    do { 
+        U8_PREV_OR_FFFD(s, 0, length, c); 
+        if(spanCondition!=contains(c)) { 
+            break; 
+        } 
+    } while((prev=length)>0); 
+    return prev; 
+} 
+ 
+U_NAMESPACE_END
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:23 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:23 +0300
commit	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree	83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/common/uniset.cpp
parent	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download	ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz